Skip to content

Commit ad52176

Browse files
committed
Merge branch 'rds-use-RCU-between-work-enqueue-and-connection-teardown'
Sowmini Varadhan says: ==================== rds: use RCU between work-enqueue and connection teardown This patchset follows up on the root-cause mentioned in https://www.spinics.net/lists/netdev/msg472849.html Patch1 implements some code refactoring that was suggeseted as an enhancement in http://patchwork.ozlabs.org/patch/843157/ It replaces the c_destroy_in_prog bit in rds_connection with an atomically managed flag in rds_conn_path. Patch2 builds on Patch1 and uses RCU to make sure that work is only enqueued if the connection destroy is not already in progress: the test-flag-and-enqueue is done under rcu_read_lock, while destroy first sets the flag, uses synchronize_rcu to wait for existing reader threads to complete, and then starts all the work-cancellation. Since I have not been able to reproduce the original stack traces reported by syszbot, and these are fixes for a race condition that are based on code-inspection I am not marking these as reported-by at this time. ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents eb9aa1b + 3db6e0d commit ad52176

File tree

8 files changed

+86
-24
lines changed

8 files changed

+86
-24
lines changed

net/rds/cong.c

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,11 @@ void rds_cong_queue_updates(struct rds_cong_map *map)
219219
spin_lock_irqsave(&rds_cong_lock, flags);
220220

221221
list_for_each_entry(conn, &map->m_conn_list, c_map_item) {
222-
if (!test_and_set_bit(0, &conn->c_map_queued)) {
222+
struct rds_conn_path *cp = &conn->c_path[0];
223+
224+
rcu_read_lock();
225+
if (!test_and_set_bit(0, &conn->c_map_queued) &&
226+
!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) {
223227
rds_stats_inc(s_cong_update_queued);
224228
/* We cannot inline the call to rds_send_xmit() here
225229
* for two reasons (both pertaining to a TCP transport):
@@ -235,9 +239,9 @@ void rds_cong_queue_updates(struct rds_cong_map *map)
235239
* therefore trigger warnings.
236240
* Defer the xmit to rds_send_worker() instead.
237241
*/
238-
queue_delayed_work(rds_wq,
239-
&conn->c_path[0].cp_send_w, 0);
242+
queue_delayed_work(rds_wq, &cp->cp_send_w, 0);
240243
}
244+
rcu_read_unlock();
241245
}
242246

243247
spin_unlock_irqrestore(&rds_cong_lock, flags);

net/rds/connection.c

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -366,8 +366,6 @@ void rds_conn_shutdown(struct rds_conn_path *cp)
366366
* to the conn hash, so we never trigger a reconnect on this
367367
* conn - the reconnect is always triggered by the active peer. */
368368
cancel_delayed_work_sync(&cp->cp_conn_w);
369-
if (conn->c_destroy_in_prog)
370-
return;
371369
rcu_read_lock();
372370
if (!hlist_unhashed(&conn->c_hash_node)) {
373371
rcu_read_unlock();
@@ -384,10 +382,13 @@ static void rds_conn_path_destroy(struct rds_conn_path *cp)
384382
{
385383
struct rds_message *rm, *rtmp;
386384

385+
set_bit(RDS_DESTROY_PENDING, &cp->cp_flags);
386+
387387
if (!cp->cp_transport_data)
388388
return;
389389

390390
/* make sure lingering queued work won't try to ref the conn */
391+
synchronize_rcu();
391392
cancel_delayed_work_sync(&cp->cp_send_w);
392393
cancel_delayed_work_sync(&cp->cp_recv_w);
393394

@@ -405,6 +406,11 @@ static void rds_conn_path_destroy(struct rds_conn_path *cp)
405406
if (cp->cp_xmit_rm)
406407
rds_message_put(cp->cp_xmit_rm);
407408

409+
WARN_ON(delayed_work_pending(&cp->cp_send_w));
410+
WARN_ON(delayed_work_pending(&cp->cp_recv_w));
411+
WARN_ON(delayed_work_pending(&cp->cp_conn_w));
412+
WARN_ON(work_pending(&cp->cp_down_w));
413+
408414
cp->cp_conn->c_trans->conn_free(cp->cp_transport_data);
409415
}
410416

@@ -426,7 +432,6 @@ void rds_conn_destroy(struct rds_connection *conn)
426432
"%pI4\n", conn, &conn->c_laddr,
427433
&conn->c_faddr);
428434

429-
conn->c_destroy_in_prog = 1;
430435
/* Ensure conn will not be scheduled for reconnect */
431436
spin_lock_irq(&rds_conn_lock);
432437
hlist_del_init_rcu(&conn->c_hash_node);
@@ -685,10 +690,13 @@ void rds_conn_path_drop(struct rds_conn_path *cp, bool destroy)
685690
{
686691
atomic_set(&cp->cp_state, RDS_CONN_ERROR);
687692

688-
if (!destroy && cp->cp_conn->c_destroy_in_prog)
693+
rcu_read_lock();
694+
if (!destroy && test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) {
695+
rcu_read_unlock();
689696
return;
690-
697+
}
691698
queue_work(rds_wq, &cp->cp_down_w);
699+
rcu_read_unlock();
692700
}
693701
EXPORT_SYMBOL_GPL(rds_conn_path_drop);
694702

@@ -705,9 +713,15 @@ EXPORT_SYMBOL_GPL(rds_conn_drop);
705713
*/
706714
void rds_conn_path_connect_if_down(struct rds_conn_path *cp)
707715
{
716+
rcu_read_lock();
717+
if (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) {
718+
rcu_read_unlock();
719+
return;
720+
}
708721
if (rds_conn_path_state(cp) == RDS_CONN_DOWN &&
709722
!test_and_set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags))
710723
queue_delayed_work(rds_wq, &cp->cp_conn_w, 0);
724+
rcu_read_unlock();
711725
}
712726
EXPORT_SYMBOL_GPL(rds_conn_path_connect_if_down);
713727

net/rds/rds.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ enum {
8888
#define RDS_RECONNECT_PENDING 1
8989
#define RDS_IN_XMIT 2
9090
#define RDS_RECV_REFILL 3
91+
#define RDS_DESTROY_PENDING 4
9192

9293
/* Max number of multipaths per RDS connection. Must be a power of 2 */
9394
#define RDS_MPATH_WORKERS 8
@@ -139,8 +140,7 @@ struct rds_connection {
139140
__be32 c_faddr;
140141
unsigned int c_loopback:1,
141142
c_ping_triggered:1,
142-
c_destroy_in_prog:1,
143-
c_pad_to_32:29;
143+
c_pad_to_32:30;
144144
int c_npaths;
145145
struct rds_connection *c_passive;
146146
struct rds_transport *c_trans;

net/rds/send.c

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,12 @@ int rds_send_xmit(struct rds_conn_path *cp)
162162
goto out;
163163
}
164164

165+
if (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) {
166+
release_in_xmit(cp);
167+
ret = -ENETUNREACH; /* dont requeue send work */
168+
goto out;
169+
}
170+
165171
/*
166172
* we record the send generation after doing the xmit acquire.
167173
* if someone else manages to jump in and do some work, we'll use
@@ -437,7 +443,12 @@ int rds_send_xmit(struct rds_conn_path *cp)
437443
!list_empty(&cp->cp_send_queue)) && !raced) {
438444
if (batch_count < send_batch_count)
439445
goto restart;
440-
queue_delayed_work(rds_wq, &cp->cp_send_w, 1);
446+
rcu_read_lock();
447+
if (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags))
448+
ret = -ENETUNREACH;
449+
else
450+
queue_delayed_work(rds_wq, &cp->cp_send_w, 1);
451+
rcu_read_unlock();
441452
} else if (raced) {
442453
rds_stats_inc(s_send_lock_queue_raced);
443454
}
@@ -1151,6 +1162,11 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
11511162
else
11521163
cpath = &conn->c_path[0];
11531164

1165+
if (test_bit(RDS_DESTROY_PENDING, &cpath->cp_flags)) {
1166+
ret = -EAGAIN;
1167+
goto out;
1168+
}
1169+
11541170
rds_conn_path_connect_if_down(cpath);
11551171

11561172
ret = rds_cong_wait(conn->c_fcong, dport, nonblock, rs);
@@ -1190,9 +1206,17 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
11901206
rds_stats_inc(s_send_queued);
11911207

11921208
ret = rds_send_xmit(cpath);
1193-
if (ret == -ENOMEM || ret == -EAGAIN)
1194-
queue_delayed_work(rds_wq, &cpath->cp_send_w, 1);
1195-
1209+
if (ret == -ENOMEM || ret == -EAGAIN) {
1210+
ret = 0;
1211+
rcu_read_lock();
1212+
if (test_bit(RDS_DESTROY_PENDING, &cpath->cp_flags))
1213+
ret = -ENETUNREACH;
1214+
else
1215+
queue_delayed_work(rds_wq, &cpath->cp_send_w, 1);
1216+
rcu_read_unlock();
1217+
}
1218+
if (ret)
1219+
goto out;
11961220
rds_message_put(rm);
11971221
return payload_len;
11981222

@@ -1270,7 +1294,10 @@ rds_send_probe(struct rds_conn_path *cp, __be16 sport,
12701294
rds_stats_inc(s_send_pong);
12711295

12721296
/* schedule the send work on rds_wq */
1273-
queue_delayed_work(rds_wq, &cp->cp_send_w, 1);
1297+
rcu_read_lock();
1298+
if (!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags))
1299+
queue_delayed_work(rds_wq, &cp->cp_send_w, 1);
1300+
rcu_read_unlock();
12741301

12751302
rds_message_put(rm);
12761303
return 0;

net/rds/tcp_connect.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ void rds_tcp_conn_path_shutdown(struct rds_conn_path *cp)
170170
cp->cp_conn, tc, sock);
171171

172172
if (sock) {
173-
if (cp->cp_conn->c_destroy_in_prog)
173+
if (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags))
174174
rds_tcp_set_linger(sock);
175175
sock->ops->shutdown(sock, RCV_SHUTDOWN | SEND_SHUTDOWN);
176176
lock_sock(sock->sk);

net/rds/tcp_recv.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -321,8 +321,12 @@ void rds_tcp_data_ready(struct sock *sk)
321321
ready = tc->t_orig_data_ready;
322322
rds_tcp_stats_inc(s_tcp_data_ready_calls);
323323

324-
if (rds_tcp_read_sock(cp, GFP_ATOMIC) == -ENOMEM)
325-
queue_delayed_work(rds_wq, &cp->cp_recv_w, 0);
324+
if (rds_tcp_read_sock(cp, GFP_ATOMIC) == -ENOMEM) {
325+
rcu_read_lock();
326+
if (!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags))
327+
queue_delayed_work(rds_wq, &cp->cp_recv_w, 0);
328+
rcu_read_unlock();
329+
}
326330
out:
327331
read_unlock_bh(&sk->sk_callback_lock);
328332
ready(sk);

net/rds/tcp_send.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,8 +202,11 @@ void rds_tcp_write_space(struct sock *sk)
202202
tc->t_last_seen_una = rds_tcp_snd_una(tc);
203203
rds_send_path_drop_acked(cp, rds_tcp_snd_una(tc), rds_tcp_is_acked);
204204

205-
if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf)
205+
rcu_read_lock();
206+
if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf &&
207+
!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags))
206208
queue_delayed_work(rds_wq, &cp->cp_send_w, 0);
209+
rcu_read_unlock();
207210

208211
out:
209212
read_unlock_bh(&sk->sk_callback_lock);

net/rds/threads.c

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,12 @@ void rds_connect_path_complete(struct rds_conn_path *cp, int curr)
8787

8888
cp->cp_reconnect_jiffies = 0;
8989
set_bit(0, &cp->cp_conn->c_map_queued);
90-
queue_delayed_work(rds_wq, &cp->cp_send_w, 0);
91-
queue_delayed_work(rds_wq, &cp->cp_recv_w, 0);
90+
rcu_read_lock();
91+
if (!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) {
92+
queue_delayed_work(rds_wq, &cp->cp_send_w, 0);
93+
queue_delayed_work(rds_wq, &cp->cp_recv_w, 0);
94+
}
95+
rcu_read_unlock();
9296
}
9397
EXPORT_SYMBOL_GPL(rds_connect_path_complete);
9498

@@ -133,16 +137,22 @@ void rds_queue_reconnect(struct rds_conn_path *cp)
133137
set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags);
134138
if (cp->cp_reconnect_jiffies == 0) {
135139
cp->cp_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies;
136-
queue_delayed_work(rds_wq, &cp->cp_conn_w, 0);
140+
rcu_read_lock();
141+
if (!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags))
142+
queue_delayed_work(rds_wq, &cp->cp_conn_w, 0);
143+
rcu_read_unlock();
137144
return;
138145
}
139146

140147
get_random_bytes(&rand, sizeof(rand));
141148
rdsdebug("%lu delay %lu ceil conn %p for %pI4 -> %pI4\n",
142149
rand % cp->cp_reconnect_jiffies, cp->cp_reconnect_jiffies,
143150
conn, &conn->c_laddr, &conn->c_faddr);
144-
queue_delayed_work(rds_wq, &cp->cp_conn_w,
145-
rand % cp->cp_reconnect_jiffies);
151+
rcu_read_lock();
152+
if (!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags))
153+
queue_delayed_work(rds_wq, &cp->cp_conn_w,
154+
rand % cp->cp_reconnect_jiffies);
155+
rcu_read_unlock();
146156

147157
cp->cp_reconnect_jiffies = min(cp->cp_reconnect_jiffies * 2,
148158
rds_sysctl_reconnect_max_jiffies);

0 commit comments

Comments
 (0)