Skip to content

Commit d4ead45

Browse files
Wei Lin Guaygerd-rausch
authored andcommitted
net/rds: use one sided reconnection during a race
This commit reverts commit 812c027 ("RDS: restore the exponential back-off scheme") to use one sided reconnection when a race is detected. When a race is detected, the active side reconnects as fast as possible, whereas the passive side wait for 15s. Orabug: 25521901 Signed-off-by: Wei Lin Guay <[email protected]> Tested-by: Dib Chatterjee <[email protected]> (cherry picked from commit 464c84386ab55a2700d963619a470a55e53a1b66 repo https://linux-git.us.oracle.com/UEK/linux-wguay-public) Conflicts: net/rds/ib_cm.c net/rds/rdma_transport.c net/rds/threads.c Made it checkpatch clean. Signed-off-by: Håkon Bugge <[email protected]> Reviewed-by: Shannon Nelson <[email protected]>
1 parent 0963187 commit d4ead45

File tree

6 files changed

+60
-10
lines changed

6 files changed

+60
-10
lines changed

net/rds/connection.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -378,7 +378,7 @@ struct rds_connection *rds_conn_find(struct net *net, struct in6_addr *laddr,
378378
}
379379
EXPORT_SYMBOL_GPL(rds_conn_find);
380380

381-
void rds_conn_shutdown(struct rds_conn_path *cp)
381+
void rds_conn_shutdown(struct rds_conn_path *cp, int restart)
382382
{
383383
struct rds_connection *conn = cp->cp_conn;
384384

@@ -447,7 +447,7 @@ void rds_conn_shutdown(struct rds_conn_path *cp)
447447
* conn - the reconnect is always triggered by the active peer. */
448448
cancel_delayed_work_sync(&cp->cp_conn_w);
449449
rcu_read_lock();
450-
if (!hlist_unhashed(&conn->c_hash_node)) {
450+
if (!hlist_unhashed(&conn->c_hash_node) && restart) {
451451
rcu_read_unlock();
452452
rds_queue_reconnect(cp);
453453
} else {

net/rds/ib_cm.c

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1091,13 +1091,27 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
10911091
rds_ib_stats_inc(s_ib_listen_closed_stale);
10921092
} else if (rds_conn_state(conn) == RDS_CONN_CONNECTING) {
10931093
unsigned long now = get_seconds();
1094+
conn->c_reconnect_racing++;
10941095

1096+
/* When a race is detected, one side should fall back
1097+
* to passive and let the active side to reconnect.
1098+
* If the connection is in CONNECTING and still receive
1099+
* multiple back-to-back REQ, it means something is
1100+
* horribly wrong. Thus, drop the connection.
1101+
*/
1102+
if (conn->c_reconnect_racing > 5) {
1103+
rds_rtd_ptr(RDS_RTD_CM,
1104+
"RDS/IB: conn <%pI6c,%pI6c,%d> back-to-back REQ, reset\n",
1105+
&conn->c_laddr, &conn->c_faddr,
1106+
conn->c_tos);
1107+
conn->c_reconnect_racing = 0;
1108+
rds_conn_drop(conn, DR_IB_REQ_WHILE_CONNECTING);
10951109
/* After 15 seconds, give up on existing connection
10961110
* attempts and make them try again. At this point
10971111
* it's no longer a race but something has gone
1098-
* horribly wrong
1112+
* horribly wrong.
10991113
*/
1100-
if (now > conn->c_connection_start &&
1114+
} else if (now > conn->c_connection_start &&
11011115
now - conn->c_connection_start > 15) {
11021116
rds_rtd_ptr(RDS_RTD_CM,
11031117
"RDS/IB: connection <%pI6c,%pI6c,%d> racing for 15s, forcing reset",

net/rds/rdma_transport.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,7 @@ int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
201201
"ROUTE_ERROR: conn %p, calling rds_conn_drop <%pI6c,%pI6c,%d>\n",
202202
conn, &conn->c_laddr,
203203
&conn->c_faddr, conn->c_tos);
204+
conn->c_reconnect_racing = 0;
204205
rds_conn_drop(conn, DR_IB_ROUTE_ERR);
205206
}
206207
break;
@@ -215,6 +216,7 @@ int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
215216
"ADDR_ERROR: conn %p, calling rds_conn_drop <%pI6c,%pI6c,%d>\n",
216217
conn, &conn->c_laddr,
217218
&conn->c_faddr, conn->c_tos);
219+
conn->c_reconnect_racing = 0;
218220
rds_conn_drop(conn, DR_IB_ADDR_ERR);
219221
}
220222
break;
@@ -227,13 +229,23 @@ int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
227229
"CONN/UNREACHABLE/RMVAL ERR: conn %p, calling rds_conn_drop <%pI6c,%pI6c,%d>\n",
228230
conn, &conn->c_laddr,
229231
&conn->c_faddr, conn->c_tos);
232+
conn->c_reconnect_racing = 0;
230233
rds_conn_drop(conn, DR_IB_CONNECT_ERR);
231234
}
232235
break;
233236

234237
case RDMA_CM_EVENT_REJECTED:
235238
err = (int *)event->param.conn.private_data;
236239

240+
if (conn && event->status == RDS_REJ_CONSUMER_DEFINED &&
241+
*err <= 1) {
242+
conn->c_reconnect_racing++;
243+
rds_rtd_ptr(RDS_RTD_ERR,
244+
"conn %p, reconnect racing (%d) rds_conn_drop <%pI6c,%pI6c,%d>\n",
245+
conn, conn->c_reconnect_racing, &conn->c_laddr,
246+
&conn->c_faddr, conn->c_tos);
247+
}
248+
237249
if (conn) {
238250
if (event->status == RDS_REJ_CONSUMER_DEFINED &&
239251
(*err) == 0) {
@@ -276,6 +288,7 @@ int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
276288
"ADDR_CHANGE: calling rds_conn_drop <%pI6c,%pI6c,%d>\n",
277289
&conn->c_laddr, &conn->c_faddr,
278290
conn->c_tos);
291+
conn->c_reconnect_racing = 0;
279292
if (!rds_conn_self_loopback_passive(conn))
280293
rds_conn_drop(conn, DR_IB_ADDR_CHANGE);
281294
}
@@ -285,6 +298,7 @@ int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
285298
rds_rtd_ptr(RDS_RTD_CM,
286299
"DISCONNECT event - dropping connection %pI6c->%pI6c tos %d\n",
287300
&conn->c_laddr, &conn->c_faddr, conn->c_tos);
301+
conn->c_reconnect_racing = 0;
288302
rds_conn_drop(conn, DR_IB_DISCONNECTED_EVENT);
289303
break;
290304

net/rds/rdma_transport.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
#include <rdma/rdma_cm.h>
66
#include "rds.h"
77

8-
#define RDS_RDMA_RESOLVE_TIMEOUT_MS 5000
8+
#define RDS_RDMA_RESOLVE_TIMEOUT_MS RDS_RECONNECT_RETRY_MS
99

1010
/* Per IB specification 7.7.3, service level is a 4-bit field. */
1111
#define TOS_TO_SL(tos) ((tos) & 0xF)

net/rds/rds.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@
4949
#define KERNEL_HAS_ATOMIC64
5050
#endif
5151

52+
#define RDS_RECONNECT_RETRY_MS 15000
53+
5254
#ifdef RDS_DEBUG
5355
#define rdsdebug(fmt, args...) pr_debug("%s(): " fmt, __func__ , ##args)
5456
#else
@@ -318,7 +320,6 @@ struct rds_connection {
318320
wait_queue_head_t c_hs_waitq; /* handshake waitq */
319321

320322

321-
/* used by RDS_CONN_RESET */
322323
struct list_head c_laddr_node;
323324

324325
u32 c_my_gen_num;
@@ -902,7 +903,7 @@ struct rds_connection *rds_conn_find(struct net *net, struct in6_addr *laddr,
902903
struct in6_addr *faddr,
903904
struct rds_transport *trans, u8 tos,
904905
int dev_if);
905-
void rds_conn_shutdown(struct rds_conn_path *cp);
906+
void rds_conn_shutdown(struct rds_conn_path *cp, int restart);
906907
void rds_conn_destroy(struct rds_connection *conn, int shutdown);
907908
void rds_conn_reset(struct rds_connection *conn);
908909
void rds_conn_drop(struct rds_connection *conn, int reason);

net/rds/threads.c

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -158,8 +158,11 @@ void rds_queue_reconnect(struct rds_conn_path *cp)
158158
rand % cp->cp_reconnect_jiffies, cp->cp_reconnect_jiffies,
159159
conn, &conn->c_laddr, &conn->c_faddr, conn->c_tos);
160160

161-
queue_delayed_work(cp->cp_wq, &cp->cp_conn_w,
162-
rand % cp->cp_reconnect_jiffies);
161+
if (rds_addr_cmp(&conn->c_laddr, &conn->c_faddr))
162+
queue_delayed_work(cp->cp_wq, &cp->cp_conn_w, 0);
163+
else
164+
queue_delayed_work(cp->cp_wq, &cp->cp_conn_w,
165+
msecs_to_jiffies(100));
163166

164167
cp->cp_reconnect_jiffies = min(cp->cp_reconnect_jiffies * 2,
165168
rds_sysctl_reconnect_max_jiffies);
@@ -308,6 +311,7 @@ void rds_reconnect_timeout(struct work_struct *work)
308311
"conn <%pI6c,%pI6c,%d> not up, retry(%d)\n",
309312
&conn->c_laddr, &conn->c_faddr, conn->c_tos,
310313
cp->cp_reconnect_retry_count);
314+
cp->cp_reconnect_racing = 0;
311315
rds_conn_path_drop(cp, DR_RECONNECT_TIMEOUT);
312316
}
313317
}
@@ -332,7 +336,24 @@ void rds_shutdown_worker(struct work_struct *work)
332336
conn->c_tos,
333337
conn_drop_reason_str(cp->cp_drop_source));
334338

335-
rds_conn_shutdown(cp);
339+
/* if racing is detected, lower IP backs off and let the higher IP
340+
* drives the reconnect (one-sided reconnect)
341+
*/
342+
if ((rds_addr_cmp(&conn->c_faddr, &conn->c_laddr) ||
343+
rds_conn_self_loopback_passive(conn)) &&
344+
cp->cp_reconnect_racing) {
345+
rds_rtd_ptr(RDS_RTD_CM,
346+
"calling rds_conn_shutdown, conn %p:0 <%pI6c,%pI6c,%d>\n",
347+
conn, &conn->c_laddr, &conn->c_faddr, conn->c_tos);
348+
rds_conn_shutdown(cp, 0);
349+
queue_delayed_work(cp->cp_wq, &cp->cp_reconn_w,
350+
msecs_to_jiffies(RDS_RECONNECT_RETRY_MS));
351+
} else {
352+
rds_rtd_ptr(RDS_RTD_CM,
353+
"calling rds_conn_shutdown, conn %p:1 <%pI6c,%pI6c,%d>\n",
354+
conn, &conn->c_laddr, &conn->c_faddr, conn->c_tos);
355+
rds_conn_shutdown(cp, 1);
356+
}
336357
}
337358

338359
void rds_threads_exit(void)

0 commit comments

Comments
 (0)