Skip to content

Commit 81b3ade

Browse files
q2venkuba-moo
authored andcommitted
Revert "tcp: avoid the lookup process failing to get sk in ehash table"
This reverts commit 3f4ca5f. Commit 3f4ca5f ("tcp: avoid the lookup process failing to get sk in ehash table") reversed the order in how a socket is inserted into ehash to fix an issue that ehash-lookup could fail when reqsk/full sk/twsk are swapped. However, it introduced another lookup failure. The full socket in ehash is allocated from a slab with SLAB_TYPESAFE_BY_RCU and does not have SOCK_RCU_FREE, so the socket could be reused even while it is being referenced on another CPU doing RCU lookup. Let's say a socket is reused and inserted into the same hash bucket during lookup. After the blamed commit, a new socket is inserted at the end of the list. If that happens, we will skip sockets placed after the previous position of the reused socket, resulting in ehash lookup failure. As described in Documentation/RCU/rculist_nulls.rst, we should insert a new socket at the head of the list to avoid such an issue. This issue, the swap-lookup-failure, and another variant reported in [0] can all be handled properly by adding a locked ehash lookup suggested by Eric Dumazet [1]. However, this issue could occur for every packet, thus more likely than the other two races, so let's revert the change for now. Link: https://lore.kernel.org/netdev/[email protected]/ [0] Link: https://lore.kernel.org/netdev/CANn89iK8snOz8TYOhhwfimC7ykYA78GA3Nyv8x06SZYa1nKdyA@mail.gmail.com/ [1] Fixes: 3f4ca5f ("tcp: avoid the lookup process failing to get sk in ehash table") Signed-off-by: Kuniyuki Iwashima <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
1 parent e80698b commit 81b3ade

File tree

2 files changed

+6
-19
lines changed

2 files changed

+6
-19
lines changed

net/ipv4/inet_hashtables.c

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -650,20 +650,8 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk)
650650
spin_lock(lock);
651651
if (osk) {
652652
WARN_ON_ONCE(sk->sk_hash != osk->sk_hash);
653-
ret = sk_hashed(osk);
654-
if (ret) {
655-
/* Before deleting the node, we insert a new one to make
656-
* sure that the look-up-sk process would not miss either
657-
* of them and that at least one node would exist in ehash
658-
* table all the time. Otherwise there's a tiny chance
659-
* that lookup process could find nothing in ehash table.
660-
*/
661-
__sk_nulls_add_node_tail_rcu(sk, list);
662-
sk_nulls_del_node_init_rcu(osk);
663-
}
664-
goto unlock;
665-
}
666-
if (found_dup_sk) {
653+
ret = sk_nulls_del_node_init_rcu(osk);
654+
} else if (found_dup_sk) {
667655
*found_dup_sk = inet_ehash_lookup_by_sk(sk, list);
668656
if (*found_dup_sk)
669657
ret = false;
@@ -672,7 +660,6 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk)
672660
if (ret)
673661
__sk_nulls_add_node_rcu(sk, list);
674662

675-
unlock:
676663
spin_unlock(lock);
677664

678665
return ret;

net/ipv4/inet_timewait_sock.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -88,10 +88,10 @@ void inet_twsk_put(struct inet_timewait_sock *tw)
8888
}
8989
EXPORT_SYMBOL_GPL(inet_twsk_put);
9090

91-
static void inet_twsk_add_node_tail_rcu(struct inet_timewait_sock *tw,
92-
struct hlist_nulls_head *list)
91+
static void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw,
92+
struct hlist_nulls_head *list)
9393
{
94-
hlist_nulls_add_tail_rcu(&tw->tw_node, list);
94+
hlist_nulls_add_head_rcu(&tw->tw_node, list);
9595
}
9696

9797
static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw,
@@ -144,7 +144,7 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
144144

145145
spin_lock(lock);
146146

147-
inet_twsk_add_node_tail_rcu(tw, &ehead->chain);
147+
inet_twsk_add_node_rcu(tw, &ehead->chain);
148148

149149
/* Step 3: Remove SK from hash chain */
150150
if (__sk_nulls_del_node_init_rcu(sk))

0 commit comments

Comments
 (0)