Skip to content

Commit 5acb959

Browse files
RDS: add reconnect retry scheme for stalled connections
RDS IB connections gets stalled at times and letting the connections take its sweet time to reconnect. On passive side, we wait for 15 seconds for such stalled connections which is too slow based on application IO timeouts. IB connections are established in milliseconds so we better drop these stuck connections early and retry. The retry timeout is kept tunable via reconnect_retry_ms sysctl. The upper bound for retries is tunbale via rds_sysctl_reconnect_max_retries. Orabug: 22347191 Tested-by: Michael Nowak <[email protected]> Tested-by: Rafael Alejandro Peralez <[email protected]> Tested-by: Liwen Huang <[email protected]> Tested-by: Hong Liu <[email protected]> Reviewed-by: Mukesh Kacker <[email protected]> Signed-off-by: Santosh Shilimkar <[email protected]>
1 parent 812c027 commit 5acb959

File tree

6 files changed

+80
-23
lines changed

6 files changed

+80
-23
lines changed

net/rds/connection.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,9 @@ static struct rds_connection *__rds_conn_create(struct net *net,
215215
}
216216

217217
conn->c_trans = trans;
218+
conn->c_reconnect_retry = rds_sysctl_reconnect_retry_ms;
219+
conn->c_reconnect_retry_count = 0;
220+
218221
if (conn->c_loopback)
219222
conn->c_wq = rds_local_wq;
220223
else

net/rds/ib_cm.c

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -914,22 +914,23 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
914914
rds_ib_stats_inc(s_ib_listen_closed_stale);
915915
} else if (rds_conn_state(conn) == RDS_CONN_CONNECTING) {
916916
unsigned long now = get_seconds();
917+
unsigned long retry = conn->c_reconnect_retry;
917918

918-
/*
919-
* after 15 seconds, give up on existing connection
920-
* attempts and make them try again. At this point
921-
* it's no longer a race but something has gone
922-
* horribly wrong
919+
920+
/* after retry seconds, give up on
921+
* existing connection attempts and try again.
922+
* At this point it's no longer backoff race but
923+
* something has gone horribly wrong.
923924
*/
925+
retry = DIV_ROUND_UP(retry, 1000);
924926
if (now > conn->c_connection_start &&
925-
now - conn->c_connection_start > 15) {
926-
printk(KERN_CRIT "RDS/IB: connection "
927-
"<%u.%u.%u.%u,%u.%u.%u.%u,%d> "
928-
"racing for 15s, forcing reset ",
929-
NIPQUAD(conn->c_laddr),
930-
NIPQUAD(conn->c_faddr),
931-
conn->c_tos);
932-
rds_conn_drop(conn, DR_IB_REQ_WHILE_CONNECTING);
927+
now - conn->c_connection_start > retry) {
928+
pr_info("RDS/IB: conn <%pI4,%pI4,%d> racing for more than %lus, retry\n",
929+
&conn->c_laddr, &conn->c_faddr,
930+
conn->c_tos, retry);
931+
set_bit(RDS_RECONNECT_TIMEDOUT,
932+
&conn->c_reconn_flags);
933+
rds_conn_drop(conn, DR_RECONNECT_TIMEOUT);
933934
rds_ib_stats_inc(s_ib_listen_closed_stale);
934935
} else {
935936
/* Wait and see - our connect may still be succeeding */

net/rds/rdma_transport.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -305,8 +305,11 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
305305
"ADDR_CHANGE: calling rds_conn_drop <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n",
306306
NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr),
307307
conn->c_tos);
308-
if (!rds_conn_self_loopback_passive(conn))
308+
if (!rds_conn_self_loopback_passive(conn)) {
309+
queue_delayed_work(conn->c_wq, &conn->c_reconn_w,
310+
msecs_to_jiffies(conn->c_reconnect_retry));
309311
rds_conn_drop(conn, DR_IB_ADDR_CHANGE);
312+
}
310313
}
311314
break;
312315

net/rds/rds.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,9 @@ enum {
140140
#define RDS_RDMA_RESOLVE_TO_MAX_INDEX 5
141141
#define RDS_ADDR_RES_TM_INDEX_MAX 5
142142

143+
/* Bits for c_reconn_flags */
144+
#define RDS_RECONNECT_TIMEDOUT 0
145+
143146
enum rds_conn_drop_src {
144147
/* rds-core */
145148
DR_DEFAULT,
@@ -272,6 +275,9 @@ struct rds_connection {
272275
possible_net_t c_net;
273276

274277
/* Re-connect stall diagnostics */
278+
unsigned long c_reconn_flags;
279+
unsigned long c_reconnect_retry;
280+
unsigned int c_reconnect_retry_count;
275281
unsigned long c_reconnect_start;
276282
unsigned int c_reconnect_drops;
277283
int c_reconnect_warn;
@@ -1039,6 +1045,8 @@ extern unsigned long rds_sysctl_trace_flags;
10391045
extern unsigned int rds_sysctl_trace_level;
10401046
extern unsigned int rds_sysctl_shutdown_trace_start_time;
10411047
extern unsigned int rds_sysctl_shutdown_trace_end_time;
1048+
extern unsigned long rds_sysctl_reconnect_retry_ms;
1049+
extern unsigned int rds_sysctl_reconnect_max_retries;
10421050

10431051
/* threads.c */
10441052
int rds_threads_init(void);

net/rds/sysctl.c

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,13 @@ unsigned int rds_sysctl_ping_enable = 1;
5252
unsigned int rds_sysctl_shutdown_trace_start_time;
5353
unsigned int rds_sysctl_shutdown_trace_end_time;
5454

55+
unsigned long rds_sysctl_reconnect_retry_ms = 1000;
56+
static unsigned long reconnect_retry_ms_min = 100;
57+
static unsigned long reconnect_retry_ms_max = 15000;
58+
59+
unsigned int rds_sysctl_reconnect_max_retries = 60;
60+
static unsigned long reconnect_min_retries = 15;
61+
5562
/*
5663
* We have official values, but must maintain the sysctl interface for existing
5764
* software that expects to find these values here.
@@ -126,6 +133,25 @@ static struct ctl_table rds_sysctl_rds_table[] = {
126133
.maxlen = sizeof(int),
127134
.mode = 0644,
128135
.proc_handler = &proc_dointvec,
136+
137+
},
138+
{
139+
.procname = "reconnect_retry_ms",
140+
.data = &rds_sysctl_reconnect_retry_ms,
141+
.maxlen = sizeof(unsigned long),
142+
.mode = 0644,
143+
.proc_handler = proc_dointvec_minmax,
144+
.extra1 = &reconnect_retry_ms_min,
145+
.extra2 = &reconnect_retry_ms_max,
146+
},
147+
{
148+
.procname = "reconnect_max_retries",
149+
.data = &rds_sysctl_reconnect_max_retries,
150+
.maxlen = sizeof(unsigned int),
151+
.mode = 0644,
152+
.proc_handler = proc_dointvec_minmax,
153+
.extra1 = &reconnect_min_retries,
154+
.extra2 = &rds_sysctl_reconnect_max_retries,
129155
},
130156
{ }
131157
};

net/rds/threads.c

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,8 @@ void rds_connect_path_complete(struct rds_connection *conn, int curr)
9191
conn, &conn->c_laddr, &conn->c_faddr, conn->c_tos);
9292

9393
conn->c_reconnect_jiffies = 0;
94+
conn->c_reconnect_retry = rds_sysctl_reconnect_retry_ms;
95+
conn->c_reconnect_retry_count = 0;
9496
set_bit(0, &conn->c_map_queued);
9597
queue_delayed_work(conn->c_wq, &conn->c_send_w, 0);
9698
queue_delayed_work(conn->c_wq, &conn->c_recv_w, 0);
@@ -138,7 +140,8 @@ void rds_queue_reconnect(struct rds_connection *conn)
138140
conn->c_reconnect_jiffies);
139141

140142
set_bit(RDS_RECONNECT_PENDING, &conn->c_flags);
141-
if (conn->c_reconnect_jiffies == 0) {
143+
if (conn->c_reconnect_jiffies == 0 ||
144+
test_and_clear_bit(RDS_RECONNECT_TIMEDOUT, &conn->c_reconn_flags)) {
142145
conn->c_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies;
143146
queue_delayed_work(conn->c_wq, &conn->c_conn_w, 0);
144147
return;
@@ -284,15 +287,28 @@ void rds_reconnect_timeout(struct work_struct *work)
284287
struct rds_connection *conn =
285288
container_of(work, struct rds_connection, c_reconn_w.work);
286289

287-
/* if the higher IP has not reconnected, reset back to two-sided
288-
* reconnect.
289-
*/
290+
if (conn->c_reconnect_retry_count > rds_sysctl_reconnect_max_retries) {
291+
pr_info("RDS: connection <%pI4,%pI4,%d> reconnect retries(%d) exceeded, stop retry\n",
292+
&conn->c_laddr, &conn->c_faddr, conn->c_tos,
293+
conn->c_reconnect_retry_count);
294+
return;
295+
}
296+
290297
if (!rds_conn_up(conn)) {
291-
rds_rtd(RDS_RTD_CM,
292-
"conn not up, calling rds_conn_drop <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n",
293-
NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr),
294-
conn->c_tos);
295-
rds_conn_drop(conn, DR_RECONNECT_TIMEOUT);
298+
if (rds_conn_up(conn) == RDS_CONN_DISCONNECTING) {
299+
queue_delayed_work(conn->c_wq, &conn->c_reconn_w,
300+
msecs_to_jiffies(100));
301+
} else {
302+
conn->c_reconnect_retry_count++;
303+
rds_rtd(RDS_RTD_CM,
304+
"conn <%pI4,%pI4,%d> not up, retry(%d)\n",
305+
&conn->c_laddr, &conn->c_faddr, conn->c_tos,
306+
conn->c_reconnect_retry_count);
307+
queue_delayed_work(conn->c_wq, &conn->c_reconn_w,
308+
msecs_to_jiffies(conn->c_reconnect_retry));
309+
set_bit(RDS_RECONNECT_TIMEDOUT, &conn->c_reconn_flags);
310+
rds_conn_drop(conn, DR_RECONNECT_TIMEOUT);
311+
}
296312
}
297313
}
298314

0 commit comments

Comments
 (0)