Skip to content

Commit 5431260

Browse files
SantoshShilimkarSomasundaram Krishnasamy
authored andcommitted
RDS: add reconnect retry scheme for stalled connections
RDS IB connections gets stalled at times and letting the connections take its sweet time to reconnect. On passive side, we wait for 15 seconds for such stalled connections which is too slow based on application IO timeouts. IB connections are established in milliseconds so we better drop these stuck connections early and retry. The retry timeout is kept tunable via reconnect_retry_ms sysctl. The upper bound for retries is tunbale via rds_sysctl_reconnect_max_retries. Orabug: 22347191 Signed-off-by: Santosh Shilimkar <[email protected]> Reviewed-by: Mukesh Kacker <[email protected]> Tested-by: Michael Nowak <[email protected]> Tested-by: Rafael Alejandro Peralez <[email protected]> Tested-by: Liwen Huang <[email protected]> Tested-by: Hong Liu <[email protected]> Orabug: 27364391 (cherry picked from commit 5acb959) cherry-pick-repo=linux-uek.git Conflicts: net/rds/ib_cm.c net/rds/threads.c Signed-off-by: Gerd Rausch <[email protected]> Signed-off-by: Somasundaram Krishnasamy <[email protected]>
1 parent 086bee3 commit 5431260

File tree

6 files changed

+80
-23
lines changed

6 files changed

+80
-23
lines changed

net/rds/connection.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,9 @@ static struct rds_connection *__rds_conn_create(struct net *net,
215215
}
216216

217217
conn->c_trans = trans;
218+
conn->c_reconnect_retry = rds_sysctl_reconnect_retry_ms;
219+
conn->c_reconnect_retry_count = 0;
220+
218221
if (conn->c_loopback)
219222
conn->c_wq = rds_local_wq;
220223
else

net/rds/ib_cm.c

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -935,22 +935,23 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
935935
rds_ib_stats_inc(s_ib_listen_closed_stale);
936936
} else if (rds_conn_state(conn) == RDS_CONN_CONNECTING) {
937937
unsigned long now = get_seconds();
938+
unsigned long retry = conn->c_reconnect_retry;
938939

939-
/*
940-
* after 15 seconds, give up on existing connection
941-
* attempts and make them try again. At this point
942-
* it's no longer a race but something has gone
943-
* horribly wrong
940+
941+
/* after retry seconds, give up on
942+
* existing connection attempts and try again.
943+
* At this point it's no longer backoff race but
944+
* something has gone horribly wrong.
944945
*/
946+
retry = DIV_ROUND_UP(retry, 1000);
945947
if (now > conn->c_connection_start &&
946-
now - conn->c_connection_start > 15) {
947-
printk(KERN_CRIT "RDS/IB: connection "
948-
"<%pI4,%pI4,%d> "
949-
"racing for 15s, forcing reset ",
950-
&conn->c_laddr,
951-
&conn->c_faddr,
952-
conn->c_tos);
953-
rds_conn_drop(conn, DR_IB_REQ_WHILE_CONNECTING);
948+
now - conn->c_connection_start > retry) {
949+
pr_info("RDS/IB: conn <%pI4,%pI4,%d> racing for more than %lus, retry\n",
950+
&conn->c_laddr, &conn->c_faddr,
951+
conn->c_tos, retry);
952+
set_bit(RDS_RECONNECT_TIMEDOUT,
953+
&conn->c_reconn_flags);
954+
rds_conn_drop(conn, DR_RECONNECT_TIMEOUT);
954955
rds_ib_stats_inc(s_ib_listen_closed_stale);
955956
} else {
956957
/* Wait and see - our connect may still be succeeding */

net/rds/rdma_transport.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -303,8 +303,11 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
303303
"ADDR_CHANGE: calling rds_conn_drop <%pI4,%pI4,%d>\n",
304304
&conn->c_laddr, &conn->c_faddr,
305305
conn->c_tos);
306-
if (!rds_conn_self_loopback_passive(conn))
306+
if (!rds_conn_self_loopback_passive(conn)) {
307+
queue_delayed_work(conn->c_wq, &conn->c_reconn_w,
308+
msecs_to_jiffies(conn->c_reconnect_retry));
307309
rds_conn_drop(conn, DR_IB_ADDR_CHANGE);
310+
}
308311
}
309312
break;
310313

net/rds/rds.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,9 @@ enum {
141141
#define RDS_RDMA_RESOLVE_TO_MAX_INDEX 5
142142
#define RDS_ADDR_RES_TM_INDEX_MAX 5
143143

144+
/* Bits for c_reconn_flags */
145+
#define RDS_RECONNECT_TIMEDOUT 0
146+
144147
enum rds_conn_drop_src {
145148
/* rds-core */
146149
DR_DEFAULT,
@@ -273,6 +276,9 @@ struct rds_connection {
273276
possible_net_t c_net;
274277

275278
/* Re-connect stall diagnostics */
279+
unsigned long c_reconn_flags;
280+
unsigned long c_reconnect_retry;
281+
unsigned int c_reconnect_retry_count;
276282
unsigned long c_reconnect_start;
277283
unsigned int c_reconnect_drops;
278284
int c_reconnect_warn;
@@ -1040,6 +1046,8 @@ extern unsigned long rds_sysctl_trace_flags;
10401046
extern unsigned int rds_sysctl_trace_level;
10411047
extern unsigned int rds_sysctl_shutdown_trace_start_time;
10421048
extern unsigned int rds_sysctl_shutdown_trace_end_time;
1049+
extern unsigned long rds_sysctl_reconnect_retry_ms;
1050+
extern unsigned int rds_sysctl_reconnect_max_retries;
10431051

10441052
/* threads.c */
10451053
int rds_threads_init(void);

net/rds/sysctl.c

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,13 @@ unsigned int rds_sysctl_ping_enable = 1;
5252
unsigned int rds_sysctl_shutdown_trace_start_time;
5353
unsigned int rds_sysctl_shutdown_trace_end_time;
5454

55+
unsigned long rds_sysctl_reconnect_retry_ms = 1000;
56+
static unsigned long reconnect_retry_ms_min = 100;
57+
static unsigned long reconnect_retry_ms_max = 15000;
58+
59+
unsigned int rds_sysctl_reconnect_max_retries = 60;
60+
static unsigned long reconnect_min_retries = 15;
61+
5562
/*
5663
* We have official values, but must maintain the sysctl interface for existing
5764
* software that expects to find these values here.
@@ -126,6 +133,25 @@ static struct ctl_table rds_sysctl_rds_table[] = {
126133
.maxlen = sizeof(int),
127134
.mode = 0644,
128135
.proc_handler = &proc_dointvec,
136+
137+
},
138+
{
139+
.procname = "reconnect_retry_ms",
140+
.data = &rds_sysctl_reconnect_retry_ms,
141+
.maxlen = sizeof(unsigned long),
142+
.mode = 0644,
143+
.proc_handler = proc_dointvec_minmax,
144+
.extra1 = &reconnect_retry_ms_min,
145+
.extra2 = &reconnect_retry_ms_max,
146+
},
147+
{
148+
.procname = "reconnect_max_retries",
149+
.data = &rds_sysctl_reconnect_max_retries,
150+
.maxlen = sizeof(unsigned int),
151+
.mode = 0644,
152+
.proc_handler = proc_dointvec_minmax,
153+
.extra1 = &reconnect_min_retries,
154+
.extra2 = &rds_sysctl_reconnect_max_retries,
129155
},
130156
{ }
131157
};

net/rds/threads.c

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,8 @@ void rds_connect_path_complete(struct rds_connection *conn, int curr)
9191
conn, &conn->c_laddr, &conn->c_faddr, conn->c_tos);
9292

9393
conn->c_reconnect_jiffies = 0;
94+
conn->c_reconnect_retry = rds_sysctl_reconnect_retry_ms;
95+
conn->c_reconnect_retry_count = 0;
9496
set_bit(0, &conn->c_map_queued);
9597
queue_delayed_work(conn->c_wq, &conn->c_send_w, 0);
9698
queue_delayed_work(conn->c_wq, &conn->c_recv_w, 0);
@@ -138,7 +140,8 @@ void rds_queue_reconnect(struct rds_connection *conn)
138140
conn->c_reconnect_jiffies);
139141

140142
set_bit(RDS_RECONNECT_PENDING, &conn->c_flags);
141-
if (conn->c_reconnect_jiffies == 0) {
143+
if (conn->c_reconnect_jiffies == 0 ||
144+
test_and_clear_bit(RDS_RECONNECT_TIMEDOUT, &conn->c_reconn_flags)) {
142145
conn->c_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies;
143146
queue_delayed_work(conn->c_wq, &conn->c_conn_w, 0);
144147
return;
@@ -284,15 +287,28 @@ void rds_reconnect_timeout(struct work_struct *work)
284287
struct rds_connection *conn =
285288
container_of(work, struct rds_connection, c_reconn_w.work);
286289

287-
/* if the higher IP has not reconnected, reset back to two-sided
288-
* reconnect.
289-
*/
290+
if (conn->c_reconnect_retry_count > rds_sysctl_reconnect_max_retries) {
291+
pr_info("RDS: connection <%pI4,%pI4,%d> reconnect retries(%d) exceeded, stop retry\n",
292+
&conn->c_laddr, &conn->c_faddr, conn->c_tos,
293+
conn->c_reconnect_retry_count);
294+
return;
295+
}
296+
290297
if (!rds_conn_up(conn)) {
291-
rds_rtd(RDS_RTD_CM,
292-
"conn not up, calling rds_conn_drop <%pI4,%pI4,%d>\n",
293-
&conn->c_laddr, &conn->c_faddr,
294-
conn->c_tos);
295-
rds_conn_drop(conn, DR_RECONNECT_TIMEOUT);
298+
if (rds_conn_up(conn) == RDS_CONN_DISCONNECTING) {
299+
queue_delayed_work(conn->c_wq, &conn->c_reconn_w,
300+
msecs_to_jiffies(100));
301+
} else {
302+
conn->c_reconnect_retry_count++;
303+
rds_rtd(RDS_RTD_CM,
304+
"conn <%pI4,%pI4,%d> not up, retry(%d)\n",
305+
&conn->c_laddr, &conn->c_faddr, conn->c_tos,
306+
conn->c_reconnect_retry_count);
307+
queue_delayed_work(conn->c_wq, &conn->c_reconn_w,
308+
msecs_to_jiffies(conn->c_reconnect_retry));
309+
set_bit(RDS_RECONNECT_TIMEDOUT, &conn->c_reconn_flags);
310+
rds_conn_drop(conn, DR_RECONNECT_TIMEOUT);
311+
}
296312
}
297313
}
298314

0 commit comments

Comments
 (0)