Skip to content

Commit 681648e

Browse files
sowminivdavem330
authored andcommitted
rds: tcp: correctly sequence cleanup on netns deletion.
Commit 8edc3af ("rds: tcp: Take explicit refcounts on struct net") introduces a regression in rds-tcp netns cleanup. The cleanup_net(), (and thus rds_tcp_dev_event notification) is only called from put_net() when all netns refcounts go to 0, but this cannot happen if the rds_connection itself is holding a c_net ref that it expects to release in rds_tcp_kill_sock. Instead, the rds_tcp_kill_sock callback should make sure to tear down state carefully, ensuring that the socket teardown is only done after all data-structures and workqs that depend on it are quiesced. The original motivation for commit 8edc3af ("rds: tcp: Take explicit refcounts on struct net") was to resolve a race condition reported by syzkaller where workqs for tx/rx/connect were triggered after the namespace was deleted. Those worker threads should have been cancelled/flushed before socket tear-down and indeed, rds_conn_path_destroy() does try to sequence this by doing /* cancel cp_send_w */ /* cancel cp_recv_w */ /* flush cp_down_w */ /* free data structures */ Here the "flush cp_down_w" will trigger rds_conn_shutdown and thus invoke rds_tcp_conn_path_shutdown() to close the tcp socket, so that we ought to have satisfied the requirement that "socket-close is done after all other dependent state is quiesced". However, rds_conn_shutdown has a bug in that it *always* triggers the reconnect workq (and if connection is successful, we always restart tx/rx workqs so with the right timing, we risk the race conditions reported by syzkaller). Netns deletion is like module teardown- no need to restart a reconnect in this case. We can use the c_destroy_in_prog bit to avoid restarting the reconnect. Fixes: 8edc3af ("rds: tcp: Take explicit refcounts on struct net") Signed-off-by: Sowmini Varadhan <[email protected]> Acked-by: Santosh Shilimkar <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 2d746c9 commit 681648e

File tree

3 files changed

+7
-6
lines changed

3 files changed

+7
-6
lines changed

net/rds/connection.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -366,6 +366,8 @@ void rds_conn_shutdown(struct rds_conn_path *cp)
366366
* to the conn hash, so we never trigger a reconnect on this
367367
* conn - the reconnect is always triggered by the active peer. */
368368
cancel_delayed_work_sync(&cp->cp_conn_w);
369+
if (conn->c_destroy_in_prog)
370+
return;
369371
rcu_read_lock();
370372
if (!hlist_unhashed(&conn->c_hash_node)) {
371373
rcu_read_unlock();
@@ -445,7 +447,6 @@ void rds_conn_destroy(struct rds_connection *conn)
445447
*/
446448
rds_cong_remove_conn(conn);
447449

448-
put_net(conn->c_net);
449450
kfree(conn->c_path);
450451
kmem_cache_free(rds_conn_slab, conn);
451452

net/rds/rds.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ struct rds_connection {
150150

151151
/* Protocol version */
152152
unsigned int c_version;
153-
struct net *c_net;
153+
possible_net_t c_net;
154154

155155
struct list_head c_map_item;
156156
unsigned long c_map_queued;
@@ -165,13 +165,13 @@ struct rds_connection {
165165
static inline
166166
struct net *rds_conn_net(struct rds_connection *conn)
167167
{
168-
return conn->c_net;
168+
return read_pnet(&conn->c_net);
169169
}
170170

171171
static inline
172172
void rds_conn_net_set(struct rds_connection *conn, struct net *net)
173173
{
174-
conn->c_net = get_net(net);
174+
write_pnet(&conn->c_net, net);
175175
}
176176

177177
#define RDS_FLAG_CONG_BITMAP 0x01

net/rds/tcp.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -506,7 +506,7 @@ static void rds_tcp_kill_sock(struct net *net)
506506
rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w);
507507
spin_lock_irq(&rds_tcp_conn_lock);
508508
list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
509-
struct net *c_net = tc->t_cpath->cp_conn->c_net;
509+
struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
510510

511511
if (net != c_net || !tc->t_sock)
512512
continue;
@@ -563,7 +563,7 @@ static void rds_tcp_sysctl_reset(struct net *net)
563563

564564
spin_lock_irq(&rds_tcp_conn_lock);
565565
list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
566-
struct net *c_net = tc->t_cpath->cp_conn->c_net;
566+
struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
567567

568568
if (net != c_net || !tc->t_sock)
569569
continue;

0 commit comments

Comments
 (0)