Skip to content

Commit ebeeb1a

Browse files
sowminivdavem330
authored andcommitted
rds: tcp: use rds_destroy_pending() to synchronize netns/module teardown and rds connection/workq management
An rds_connection can get added during netns deletion between lines 528 and 529 of 506 static void rds_tcp_kill_sock(struct net *net) : /* code to pull out all the rds_connections that should be destroyed */ : 528 spin_unlock_irq(&rds_tcp_conn_lock); 529 list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) 530 rds_conn_destroy(tc->t_cpath->cp_conn); Such an rds_connection would miss out the rds_conn_destroy() loop (that cancels all pending work) and (if it was scheduled after netns deletion) could trigger the use-after-free. A similar race-window exists for the module unload path in rds_tcp_exit -> rds_tcp_destroy_conns Concurrency with netns deletion (rds_tcp_kill_sock()) must be handled by checking check_net() before enqueuing new work or adding new connections. Concurrency with module-unload is handled by maintaining a module specific flag that is set at the start of the module exit function, and must be checked before enqueuing new work or adding new connections. This commit refactors existing RDS_DESTROY_PENDING checks added by commit 3db6e0d ("rds: use RCU to synchronize work-enqueue with connection teardown") and consolidates all the concurrency checks listed above into the function rds_destroy_pending(). Signed-off-by: Sowmini Varadhan <[email protected]> Acked-by: Santosh Shilimkar <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 79a8a64 commit ebeeb1a

File tree

11 files changed

+76
-30
lines changed

11 files changed

+76
-30
lines changed

net/rds/cong.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@ void rds_cong_queue_updates(struct rds_cong_map *map)
223223

224224
rcu_read_lock();
225225
if (!test_and_set_bit(0, &conn->c_map_queued) &&
226-
!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) {
226+
!rds_destroy_pending(cp->cp_conn)) {
227227
rds_stats_inc(s_cong_update_queued);
228228
/* We cannot inline the call to rds_send_xmit() here
229229
* for two reasons (both pertaining to a TCP transport):

net/rds/connection.c

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -220,8 +220,13 @@ static struct rds_connection *__rds_conn_create(struct net *net,
220220
is_outgoing);
221221
conn->c_path[i].cp_index = i;
222222
}
223-
ret = trans->conn_alloc(conn, gfp);
223+
rcu_read_lock();
224+
if (rds_destroy_pending(conn))
225+
ret = -ENETDOWN;
226+
else
227+
ret = trans->conn_alloc(conn, gfp);
224228
if (ret) {
229+
rcu_read_unlock();
225230
kfree(conn->c_path);
226231
kmem_cache_free(rds_conn_slab, conn);
227232
conn = ERR_PTR(ret);
@@ -283,6 +288,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
283288
}
284289
}
285290
spin_unlock_irqrestore(&rds_conn_lock, flags);
291+
rcu_read_unlock();
286292

287293
out:
288294
return conn;
@@ -382,13 +388,10 @@ static void rds_conn_path_destroy(struct rds_conn_path *cp)
382388
{
383389
struct rds_message *rm, *rtmp;
384390

385-
set_bit(RDS_DESTROY_PENDING, &cp->cp_flags);
386-
387391
if (!cp->cp_transport_data)
388392
return;
389393

390394
/* make sure lingering queued work won't try to ref the conn */
391-
synchronize_rcu();
392395
cancel_delayed_work_sync(&cp->cp_send_w);
393396
cancel_delayed_work_sync(&cp->cp_recv_w);
394397

@@ -691,7 +694,7 @@ void rds_conn_path_drop(struct rds_conn_path *cp, bool destroy)
691694
atomic_set(&cp->cp_state, RDS_CONN_ERROR);
692695

693696
rcu_read_lock();
694-
if (!destroy && test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) {
697+
if (!destroy && rds_destroy_pending(cp->cp_conn)) {
695698
rcu_read_unlock();
696699
return;
697700
}
@@ -714,7 +717,7 @@ EXPORT_SYMBOL_GPL(rds_conn_drop);
714717
void rds_conn_path_connect_if_down(struct rds_conn_path *cp)
715718
{
716719
rcu_read_lock();
717-
if (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) {
720+
if (rds_destroy_pending(cp->cp_conn)) {
718721
rcu_read_unlock();
719722
return;
720723
}

net/rds/ib.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
static unsigned int rds_ib_mr_1m_pool_size = RDS_MR_1M_POOL_SIZE;
4949
static unsigned int rds_ib_mr_8k_pool_size = RDS_MR_8K_POOL_SIZE;
5050
unsigned int rds_ib_retry_count = RDS_IB_DEFAULT_RETRY_COUNT;
51+
static atomic_t rds_ib_unloading;
5152

5253
module_param(rds_ib_mr_1m_pool_size, int, 0444);
5354
MODULE_PARM_DESC(rds_ib_mr_1m_pool_size, " Max number of 1M mr per HCA");
@@ -378,8 +379,23 @@ static void rds_ib_unregister_client(void)
378379
flush_workqueue(rds_wq);
379380
}
380381

382+
static void rds_ib_set_unloading(void)
383+
{
384+
atomic_set(&rds_ib_unloading, 1);
385+
}
386+
387+
static bool rds_ib_is_unloading(struct rds_connection *conn)
388+
{
389+
struct rds_conn_path *cp = &conn->c_path[0];
390+
391+
return (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags) ||
392+
atomic_read(&rds_ib_unloading) != 0);
393+
}
394+
381395
void rds_ib_exit(void)
382396
{
397+
rds_ib_set_unloading();
398+
synchronize_rcu();
383399
rds_info_deregister_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info);
384400
rds_ib_unregister_client();
385401
rds_ib_destroy_nodev_conns();
@@ -413,6 +429,7 @@ struct rds_transport rds_ib_transport = {
413429
.flush_mrs = rds_ib_flush_mrs,
414430
.t_owner = THIS_MODULE,
415431
.t_name = "infiniband",
432+
.t_unloading = rds_ib_is_unloading,
416433
.t_type = RDS_TRANS_IB
417434
};
418435

net/rds/ib_cm.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
117117
&conn->c_laddr, &conn->c_faddr,
118118
RDS_PROTOCOL_MAJOR(conn->c_version),
119119
RDS_PROTOCOL_MINOR(conn->c_version));
120+
set_bit(RDS_DESTROY_PENDING, &conn->c_path[0].cp_flags);
120121
rds_conn_destroy(conn);
121122
return;
122123
} else {

net/rds/rds.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -518,6 +518,7 @@ struct rds_transport {
518518
void (*sync_mr)(void *trans_private, int direction);
519519
void (*free_mr)(void *trans_private, int invalidate);
520520
void (*flush_mrs)(void);
521+
bool (*t_unloading)(struct rds_connection *conn);
521522
};
522523

523524
struct rds_sock {
@@ -862,6 +863,12 @@ static inline void rds_mr_put(struct rds_mr *mr)
862863
__rds_put_mr_final(mr);
863864
}
864865

866+
static inline bool rds_destroy_pending(struct rds_connection *conn)
867+
{
868+
return !check_net(rds_conn_net(conn)) ||
869+
(conn->c_trans->t_unloading && conn->c_trans->t_unloading(conn));
870+
}
871+
865872
/* stats.c */
866873
DECLARE_PER_CPU_SHARED_ALIGNED(struct rds_statistics, rds_stats);
867874
#define rds_stats_inc_which(which, member) do { \

net/rds/send.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ int rds_send_xmit(struct rds_conn_path *cp)
162162
goto out;
163163
}
164164

165-
if (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) {
165+
if (rds_destroy_pending(cp->cp_conn)) {
166166
release_in_xmit(cp);
167167
ret = -ENETUNREACH; /* dont requeue send work */
168168
goto out;
@@ -444,7 +444,7 @@ int rds_send_xmit(struct rds_conn_path *cp)
444444
if (batch_count < send_batch_count)
445445
goto restart;
446446
rcu_read_lock();
447-
if (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags))
447+
if (rds_destroy_pending(cp->cp_conn))
448448
ret = -ENETUNREACH;
449449
else
450450
queue_delayed_work(rds_wq, &cp->cp_send_w, 1);
@@ -1162,7 +1162,7 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
11621162
else
11631163
cpath = &conn->c_path[0];
11641164

1165-
if (test_bit(RDS_DESTROY_PENDING, &cpath->cp_flags)) {
1165+
if (rds_destroy_pending(conn)) {
11661166
ret = -EAGAIN;
11671167
goto out;
11681168
}
@@ -1209,7 +1209,7 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
12091209
if (ret == -ENOMEM || ret == -EAGAIN) {
12101210
ret = 0;
12111211
rcu_read_lock();
1212-
if (test_bit(RDS_DESTROY_PENDING, &cpath->cp_flags))
1212+
if (rds_destroy_pending(cpath->cp_conn))
12131213
ret = -ENETUNREACH;
12141214
else
12151215
queue_delayed_work(rds_wq, &cpath->cp_send_w, 1);
@@ -1295,7 +1295,7 @@ rds_send_probe(struct rds_conn_path *cp, __be16 sport,
12951295

12961296
/* schedule the send work on rds_wq */
12971297
rcu_read_lock();
1298-
if (!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags))
1298+
if (!rds_destroy_pending(cp->cp_conn))
12991299
queue_delayed_work(rds_wq, &cp->cp_send_w, 1);
13001300
rcu_read_unlock();
13011301

net/rds/tcp.c

Lines changed: 30 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ static unsigned int rds_tcp_tc_count;
4949
/* Track rds_tcp_connection structs so they can be cleaned up */
5050
static DEFINE_SPINLOCK(rds_tcp_conn_lock);
5151
static LIST_HEAD(rds_tcp_conn_list);
52+
static atomic_t rds_tcp_unloading = ATOMIC_INIT(0);
5253

5354
static struct kmem_cache *rds_tcp_conn_slab;
5455

@@ -274,14 +275,13 @@ static int rds_tcp_laddr_check(struct net *net, __be32 addr)
274275
static void rds_tcp_conn_free(void *arg)
275276
{
276277
struct rds_tcp_connection *tc = arg;
277-
unsigned long flags;
278278

279279
rdsdebug("freeing tc %p\n", tc);
280280

281-
spin_lock_irqsave(&rds_tcp_conn_lock, flags);
281+
spin_lock_bh(&rds_tcp_conn_lock);
282282
if (!tc->t_tcp_node_detached)
283283
list_del(&tc->t_tcp_node);
284-
spin_unlock_irqrestore(&rds_tcp_conn_lock, flags);
284+
spin_unlock_bh(&rds_tcp_conn_lock);
285285

286286
kmem_cache_free(rds_tcp_conn_slab, tc);
287287
}
@@ -296,7 +296,7 @@ static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp)
296296
tc = kmem_cache_alloc(rds_tcp_conn_slab, gfp);
297297
if (!tc) {
298298
ret = -ENOMEM;
299-
break;
299+
goto fail;
300300
}
301301
mutex_init(&tc->t_conn_path_lock);
302302
tc->t_sock = NULL;
@@ -306,14 +306,19 @@ static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp)
306306

307307
conn->c_path[i].cp_transport_data = tc;
308308
tc->t_cpath = &conn->c_path[i];
309+
tc->t_tcp_node_detached = true;
309310

310-
spin_lock_irq(&rds_tcp_conn_lock);
311-
tc->t_tcp_node_detached = false;
312-
list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list);
313-
spin_unlock_irq(&rds_tcp_conn_lock);
314311
rdsdebug("rds_conn_path [%d] tc %p\n", i,
315312
conn->c_path[i].cp_transport_data);
316313
}
314+
spin_lock_bh(&rds_tcp_conn_lock);
315+
for (i = 0; i < RDS_MPATH_WORKERS; i++) {
316+
tc = conn->c_path[i].cp_transport_data;
317+
tc->t_tcp_node_detached = false;
318+
list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list);
319+
}
320+
spin_unlock_bh(&rds_tcp_conn_lock);
321+
fail:
317322
if (ret) {
318323
for (j = 0; j < i; j++)
319324
rds_tcp_conn_free(conn->c_path[j].cp_transport_data);
@@ -332,6 +337,16 @@ static bool list_has_conn(struct list_head *list, struct rds_connection *conn)
332337
return false;
333338
}
334339

340+
static void rds_tcp_set_unloading(void)
341+
{
342+
atomic_set(&rds_tcp_unloading, 1);
343+
}
344+
345+
static bool rds_tcp_is_unloading(struct rds_connection *conn)
346+
{
347+
return atomic_read(&rds_tcp_unloading) != 0;
348+
}
349+
335350
static void rds_tcp_destroy_conns(void)
336351
{
337352
struct rds_tcp_connection *tc, *_tc;
@@ -370,6 +385,7 @@ struct rds_transport rds_tcp_transport = {
370385
.t_type = RDS_TRANS_TCP,
371386
.t_prefer_loopback = 1,
372387
.t_mp_capable = 1,
388+
.t_unloading = rds_tcp_is_unloading,
373389
};
374390

375391
static unsigned int rds_tcp_netid;
@@ -513,7 +529,7 @@ static void rds_tcp_kill_sock(struct net *net)
513529

514530
rtn->rds_tcp_listen_sock = NULL;
515531
rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w);
516-
spin_lock_irq(&rds_tcp_conn_lock);
532+
spin_lock_bh(&rds_tcp_conn_lock);
517533
list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
518534
struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
519535

@@ -526,7 +542,7 @@ static void rds_tcp_kill_sock(struct net *net)
526542
tc->t_tcp_node_detached = true;
527543
}
528544
}
529-
spin_unlock_irq(&rds_tcp_conn_lock);
545+
spin_unlock_bh(&rds_tcp_conn_lock);
530546
list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node)
531547
rds_conn_destroy(tc->t_cpath->cp_conn);
532548
}
@@ -574,7 +590,7 @@ static void rds_tcp_sysctl_reset(struct net *net)
574590
{
575591
struct rds_tcp_connection *tc, *_tc;
576592

577-
spin_lock_irq(&rds_tcp_conn_lock);
593+
spin_lock_bh(&rds_tcp_conn_lock);
578594
list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
579595
struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
580596

@@ -584,7 +600,7 @@ static void rds_tcp_sysctl_reset(struct net *net)
584600
/* reconnect with new parameters */
585601
rds_conn_path_drop(tc->t_cpath, false);
586602
}
587-
spin_unlock_irq(&rds_tcp_conn_lock);
603+
spin_unlock_bh(&rds_tcp_conn_lock);
588604
}
589605

590606
static int rds_tcp_skbuf_handler(struct ctl_table *ctl, int write,
@@ -607,6 +623,8 @@ static int rds_tcp_skbuf_handler(struct ctl_table *ctl, int write,
607623

608624
static void rds_tcp_exit(void)
609625
{
626+
rds_tcp_set_unloading();
627+
synchronize_rcu();
610628
rds_info_deregister_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info);
611629
unregister_pernet_subsys(&rds_tcp_net_ops);
612630
if (unregister_netdevice_notifier(&rds_tcp_dev_notifier))

net/rds/tcp_connect.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ void rds_tcp_conn_path_shutdown(struct rds_conn_path *cp)
170170
cp->cp_conn, tc, sock);
171171

172172
if (sock) {
173-
if (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags))
173+
if (rds_destroy_pending(cp->cp_conn))
174174
rds_tcp_set_linger(sock);
175175
sock->ops->shutdown(sock, RCV_SHUTDOWN | SEND_SHUTDOWN);
176176
lock_sock(sock->sk);

net/rds/tcp_recv.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -323,7 +323,7 @@ void rds_tcp_data_ready(struct sock *sk)
323323

324324
if (rds_tcp_read_sock(cp, GFP_ATOMIC) == -ENOMEM) {
325325
rcu_read_lock();
326-
if (!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags))
326+
if (!rds_destroy_pending(cp->cp_conn))
327327
queue_delayed_work(rds_wq, &cp->cp_recv_w, 0);
328328
rcu_read_unlock();
329329
}

net/rds/tcp_send.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ void rds_tcp_write_space(struct sock *sk)
204204

205205
rcu_read_lock();
206206
if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf &&
207-
!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags))
207+
!rds_destroy_pending(cp->cp_conn))
208208
queue_delayed_work(rds_wq, &cp->cp_send_w, 0);
209209
rcu_read_unlock();
210210

net/rds/threads.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ void rds_connect_path_complete(struct rds_conn_path *cp, int curr)
8888
cp->cp_reconnect_jiffies = 0;
8989
set_bit(0, &cp->cp_conn->c_map_queued);
9090
rcu_read_lock();
91-
if (!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) {
91+
if (!rds_destroy_pending(cp->cp_conn)) {
9292
queue_delayed_work(rds_wq, &cp->cp_send_w, 0);
9393
queue_delayed_work(rds_wq, &cp->cp_recv_w, 0);
9494
}
@@ -138,7 +138,7 @@ void rds_queue_reconnect(struct rds_conn_path *cp)
138138
if (cp->cp_reconnect_jiffies == 0) {
139139
cp->cp_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies;
140140
rcu_read_lock();
141-
if (!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags))
141+
if (!rds_destroy_pending(cp->cp_conn))
142142
queue_delayed_work(rds_wq, &cp->cp_conn_w, 0);
143143
rcu_read_unlock();
144144
return;
@@ -149,7 +149,7 @@ void rds_queue_reconnect(struct rds_conn_path *cp)
149149
rand % cp->cp_reconnect_jiffies, cp->cp_reconnect_jiffies,
150150
conn, &conn->c_laddr, &conn->c_faddr);
151151
rcu_read_lock();
152-
if (!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags))
152+
if (!rds_destroy_pending(cp->cp_conn))
153153
queue_delayed_work(rds_wq, &cp->cp_conn_w,
154154
rand % cp->cp_reconnect_jiffies);
155155
rcu_read_unlock();

0 commit comments

Comments
 (0)