Skip to content

Commit 1f2ea7a

Browse files
Wei Lin GuayLinuxMinion
authored andcommitted
net/rds: determine active/passive connection with IP addresses
This patch changes RDS to use randomize backoff only in the first attempt to reconnect. This means both ends try to be active by sending out REQ to its peer in random t seconds. If the connection can't be established due to a race, the peer IP addresses comparison is used to determine active/passive connection establishment. (e.g IP_A > IP_B) The following description illustrates the connection establishment, t1randA: 192.168.1.A (active) --------------> 192.168.1.B (passive) t1randB: 192.168.1.A (passive) <------------- 192.168.1.B (active) t2 : 192.168.1.A (active) ---------------> REJ t3 : 192.168.1.B (active) ---------------> REJ t4 : Connection between A,B is not up. t5 : 192.168.1.A (active) --------------> 192.168.1.B (passive) Orabug: 25521901 Signed-off-by: Wei Lin Guay <[email protected]> Reviewed-by: Håkon Bugge <[email protected]> Suggested-by : Håkon Bugge <[email protected]> Tested-by: Dib Chatterjee <[email protected]> Tested-by: Rosa Isela Lopez Romero <[email protected]>
1 parent ad7312b commit 1f2ea7a

File tree

3 files changed

+47
-6
lines changed

3 files changed

+47
-6
lines changed

net/rds/ib_cm.c

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -914,6 +914,14 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
914914
* see the comment above rds_queue_reconnect()
915915
*/
916916
mutex_lock(&conn->c_cm_lock);
917+
if (rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_DOWN) &&
918+
(conn->c_laddr < conn->c_faddr)) {
919+
rds_rtd(RDS_RTD_CM_EXT_P,
920+
"incoming passive connection is trying to connect %p\n",
921+
conn);
922+
rds_conn_drop(conn, DR_IB_CONN_DROP_RACE);
923+
goto out;
924+
}
917925
if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING)) {
918926
/*
919927
* in both of the cases below, the conn is half setup.
@@ -944,10 +952,39 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
944952
NIPQUAD(conn->c_laddr),
945953
NIPQUAD(conn->c_faddr),
946954
conn->c_tos);
955+
rds_rtd(RDS_RTD_CM, "RDS/IB: connection "
956+
" id %p conn %p "
957+
"<%u.%u.%u.%u,%u.%u.%u.%u,%d> "
958+
"racing for 15s, forcing reset\n",
959+
cm_id, conn,
960+
NIPQUAD(conn->c_laddr),
961+
NIPQUAD(conn->c_faddr),
962+
conn->c_tos);
947963
rds_conn_drop(conn, DR_IB_REQ_WHILE_CONNECTING);
948964
rds_ib_stats_inc(s_ib_listen_closed_stale);
949965
} else {
950966
/* Wait and see - our connect may still be succeeding */
967+
rds_rtd(RDS_RTD_CM, "RDS/IB: connection "
968+
" id %p conn %p "
969+
"<%u.%u.%u.%u,%u.%u.%u.%u,%d> "
970+
" will be rejected\n",
971+
cm_id, conn,
972+
NIPQUAD(conn->c_laddr),
973+
NIPQUAD(conn->c_faddr),
974+
conn->c_tos);
975+
if (test_and_clear_bit(RDS_INITIAL_RECONNECT, &conn->c_flags) ||
976+
(conn->c_laddr > conn->c_faddr) ||
977+
rds_conn_self_loopback_passive(conn)) {
978+
rds_rtd(RDS_RTD_CM, "RDS/IB: connection "
979+
" id %p conn %p "
980+
"<%u.%u.%u.%u,%u.%u.%u.%u,%d> "
981+
" will be rejected as passive conn\n",
982+
cm_id, conn,
983+
NIPQUAD(conn->c_laddr),
984+
NIPQUAD(conn->c_faddr),
985+
conn->c_tos);
986+
rds_conn_drop(conn, DR_IB_CONN_DROP_RACE);
987+
}
951988
rds_ib_stats_inc(s_ib_connect_raced);
952989
}
953990
}

net/rds/rds.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@ enum {
136136
#define RDS_IN_XMIT 2
137137
#define RDS_RECV_REFILL 3
138138
#define RDS_DESTROY_PENDING 4
139+
#define RDS_INITIAL_RECONNECT 5
139140

140141
#define RDS_RDMA_RESOLVE_TO_MAX_INDEX 5
141142
#define RDS_ADDR_RES_TM_INDEX_MAX 5

net/rds/threads.c

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ EXPORT_SYMBOL_GPL(rds_connect_complete);
134134
*/
135135
void rds_queue_reconnect(struct rds_conn_path *cp)
136136
{
137+
unsigned long delay = 0;
137138
unsigned long rand;
138139
struct rds_connection *conn = cp->cp_conn;
139140
bool is_tcp = conn->c_trans->t_type == RDS_TRANS_TCP;
@@ -149,20 +150,22 @@ void rds_queue_reconnect(struct rds_conn_path *cp)
149150

150151
set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags);
151152
if (cp->cp_reconnect_jiffies == 0) {
153+
set_bit(RDS_INITIAL_RECONNECT, &cp->cp_flags);
152154
cp->cp_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies;
153-
queue_delayed_work(cp->cp_wq, &cp->cp_conn_w, 0);
155+
queue_delayed_work(cp->cp_wq, &cp->cp_conn_w, rand % conn->c_reconnect_jiffies);
154156
return;
155157
}
156158

157-
get_random_bytes(&rand, sizeof(rand));
159+
clear_bit(RDS_INITIAL_RECONNECT, &conn->c_flags);
160+
if ((conn->c_laddr > conn->c_faddr) ||
161+
rds_conn_self_loopback_passive(conn))
162+
delay = msecs_to_jiffies(15000);
158163
rds_rtd(RDS_RTD_CM_EXT,
159164
"%lu delay %lu ceil conn %p for %pI4 -> %pI4 tos %d\n",
160-
rand % cp->cp_reconnect_jiffies, cp->cp_reconnect_jiffies,
165+
delay, cp->cp_reconnect_jiffies,
161166
conn, &conn->c_laddr, &conn->c_faddr, conn->c_tos);
162167

163-
queue_delayed_work(cp->cp_wq, &cp->cp_conn_w,
164-
rand % cp->cp_reconnect_jiffies);
165-
168+
queue_delayed_work(cp->cp_wq, &cp->cp_conn_w, delay);
166169
cp->cp_reconnect_jiffies = min(cp->cp_reconnect_jiffies * 2,
167170
rds_sysctl_reconnect_max_jiffies);
168171
}

0 commit comments

Comments
 (0)