Skip to content

Commit 37fef55

Browse files
Hakon-Buggejfvogel
authored andcommitted
rds: ib: Revert "net/rds: Avoid stalled connection due to CM REQ retries"
This reverts commit 637d078. Commit 637d078 ("net/rds: Avoid stalled connection due to CM REQ retries") introduced a generation scheme allegedly to detect duplicate RDMA CM REQs. However, duplicate CM packets are filtered by the CM layer and should not reach the client (RDS). Further, this commit introduced a bug when a node being the active connector rebooted. Once rebooted, the peer would classify the rebooted node's connection requests as duplicates. The signature of this bug is: rds_ib_cm_handle_connect: 1151: duplicated REQ in UP state. cm_id x conn y reject! ic->i_last_rej_seq 1 cm_req_seq 1 Orabug: 31648138 Tested-by: Aiman Al-Khammash <[email protected]> Tested-by: Gerald Gibson <[email protected]> Reviewed-by: Sharath Srinivasan <[email protected]> cherry-pick-repo=uek/uek6/master Signed-off-by: Håkon Bugge <[email protected]> Conflicts in: net/rds/ib.h net/rds/ib_cm.c Due to context differences (cherry picked from commit b6e8f987e83198acca42cb8a79fda3c7d739ba80) cherry-pick-repo=uek/uek6/master Signed-off-by: Håkon Bugge <[email protected]>
1 parent 257ea71 commit 37fef55

File tree

2 files changed

+5
-40
lines changed

2 files changed

+5
-40
lines changed

net/rds/ib.h

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,6 @@
5656
#define RDS_IB_CQ_ERR 2
5757

5858
#define RDS_IB_DEFAULT_FREG_PORT_NUM 1
59-
#define RDS_CM_RETRY_SEQ_EN BIT(7)
60-
#define RDS_CM_REQ_SEQ_SZ (RDS_CM_RETRY_SEQ_EN - 1)
6159

6260
#define RDS_RDMA_RESOLVE_ADDR_TIMEOUT_MS(c) ((c)->c_loopback ? 1000 : 4000)
6361

@@ -103,7 +101,7 @@ struct rds_ib_conn_priv_cmn {
103101
u8 ricpc_protocol_minor;
104102
__be16 ricpc_protocol_minor_mask; /* bitmask */
105103
u8 ricpc_tos;
106-
u8 ricpc_cm_seq;
104+
u8 ricpc_reserved1;
107105
__be16 ricpc_frag_sz;
108106
__be64 ricpc_ack_seq;
109107
__be32 ricpc_credit; /* non-zero enables flow ctl */
@@ -127,7 +125,7 @@ struct rds6_ib_connect_private {
127125
#define dp_protocol_minor dp_cmn.ricpc_protocol_minor
128126
#define dp_protocol_minor_mask dp_cmn.ricpc_protocol_minor_mask
129127
#define dp_tos dp_cmn.ricpc_tos
130-
#define dp_cm_seq dp_cmn.ricpc_cm_seq
128+
#define dp_reserved1 dp_cmn.ricpc_reserved1
131129
#define dp_frag_sz dp_cmn.ricpc_frag_sz
132130
#define dp_ack_seq dp_cmn.ricpc_ack_seq
133131
#define dp_credit dp_cmn.ricpc_credit
@@ -292,9 +290,6 @@ struct rds_ib_connection {
292290
unsigned int i_rx_wait_for_handler;
293291
atomic_t i_worker_has_rx;
294292
atomic_t i_cq_quiesce;
295-
u8 i_req_sequence;
296-
u8 i_prev_seq;
297-
u8 i_last_rej_seq;
298293
uint i_irq_local_cpu;
299294
};
300295

@@ -303,7 +298,6 @@ struct rds_ib_connection {
303298
#define IB_GET_POST_CREDITS(v) ((v) >> 16)
304299
#define IB_SET_SEND_CREDITS(v) ((v) & 0xffff)
305300
#define IB_SET_POST_CREDITS(v) ((v) << 16)
306-
#define IB_GET_CM_SEQ_NUM(v) ((v) & RDS_CM_REQ_SEQ_SZ)
307301

308302
struct rds_ib_ipaddr {
309303
struct list_head list;

net/rds/ib_cm.c

Lines changed: 3 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -384,7 +384,7 @@ static void rds_ib_cm_fill_conn_param(struct rds_connection *conn,
384384
u32 protocol_version,
385385
u32 max_responder_resources,
386386
u32 max_initiator_depth, u16 frag,
387-
bool isv6, u8 seq)
387+
bool isv6)
388388
{
389389
struct rds_ib_connection *ic = conn->c_transport_data;
390390
struct rds_ib_device *rds_ibdev = ic->rds_ibdev;
@@ -414,7 +414,6 @@ static void rds_ib_cm_fill_conn_param(struct rds_connection *conn,
414414
cpu_to_be64(rds_ib_piggyb_ack(ic));
415415
dp->ricp_v6.dp_tos = conn->c_tos;
416416
dp->ricp_v6.dp_frag_sz = cpu_to_be16(frag);
417-
dp->ricp_v6.dp_cm_seq = seq;
418417

419418
conn_param->private_data = &dp->ricp_v6;
420419
conn_param->private_data_len = sizeof(dp->ricp_v6);
@@ -431,7 +430,6 @@ static void rds_ib_cm_fill_conn_param(struct rds_connection *conn,
431430
cpu_to_be64(rds_ib_piggyb_ack(ic));
432431
dp->ricp_v4.dp_tos = conn->c_tos;
433432
dp->ricp_v4.dp_frag_sz = cpu_to_be16(frag);
434-
dp->ricp_v4.dp_cm_seq = seq;
435433

436434
conn_param->private_data = &dp->ricp_v4;
437435
conn_param->private_data_len = sizeof(dp->ricp_v4);
@@ -1177,8 +1175,6 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
11771175
u32 version;
11781176
int err = 1;
11791177
u16 frag;
1180-
u8 cm_req_seq = 0;
1181-
bool cm_seq_check_enable = false;
11821178

11831179
/* Check whether the remote protocol version matches ours. */
11841180
version = rds_ib_protocol_compatible(event, isv6);
@@ -1208,8 +1204,6 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
12081204
goto out;
12091205
}
12101206
}
1211-
cm_seq_check_enable = dp->ricp_v6.dp_cm_seq & RDS_CM_RETRY_SEQ_EN;
1212-
cm_req_seq = IB_GET_CM_SEQ_NUM(dp->ricp_v6.dp_cm_seq);
12131207
#else
12141208
err = -EOPNOTSUPP;
12151209
goto out;
@@ -1220,8 +1214,6 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
12201214
ipv6_addr_set_v4mapped(dp->ricp_v4.dp_daddr, &d_mapped_addr);
12211215
saddr6 = &s_mapped_addr;
12221216
daddr6 = &d_mapped_addr;
1223-
cm_seq_check_enable = dp->ricp_v4.dp_cm_seq & RDS_CM_RETRY_SEQ_EN;
1224-
cm_req_seq = IB_GET_CM_SEQ_NUM(dp->ricp_v4.dp_cm_seq);
12251217
}
12261218

12271219
rds_rtd_ptr(RDS_RTD_CM,
@@ -1273,22 +1265,6 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
12731265
*/
12741266
mutex_lock(&conn->c_cm_lock);
12751267
ic = conn->c_transport_data;
1276-
1277-
if (ic && cm_seq_check_enable) {
1278-
if (cm_req_seq != ic->i_prev_seq) {
1279-
rds_rtd(RDS_RTD_CM_EXT_P,
1280-
"cm_id %p conn %p updating ic->i_prev_seq %d cm_req_seq %d\n",
1281-
cm_id, conn, ic->i_prev_seq, cm_req_seq);
1282-
ic->i_prev_seq = cm_req_seq;
1283-
} else if (cm_req_seq == ic->i_prev_seq && ic->i_last_rej_seq == cm_req_seq &&
1284-
rds_conn_state(conn) == RDS_CONN_UP) {
1285-
rds_rtd(RDS_RTD_CM_EXT_P,
1286-
"duplicated REQ in UP state. cm_id %p conn %p reject! ic->i_last_rej_seq %d cm_req_seq %d\n",
1287-
cm_id, conn, ic->i_last_rej_seq, cm_req_seq);
1288-
goto out;
1289-
}
1290-
}
1291-
12921268
if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING)) {
12931269
/*
12941270
* in both of the cases below, the conn is half setup.
@@ -1339,8 +1315,6 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
13391315
rds_ib_stats_inc(s_ib_connect_raced);
13401316
}
13411317
}
1342-
if (ic && cm_seq_check_enable)
1343-
ic->i_last_rej_seq = cm_req_seq;
13441318
goto out;
13451319
} else {
13461320
/* Cancel any pending reconnect */
@@ -1395,7 +1369,7 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
13951369
rds_ib_cm_fill_conn_param(conn, &conn_param, &dp_rep, version,
13961370
event->param.conn.responder_resources,
13971371
event->param.conn.initiator_depth,
1398-
frag, isv6, cm_req_seq);
1372+
frag, isv6);
13991373

14001374
/* rdma_accept() calls rdma_reject() internally if it fails */
14011375
if (rds_ib_sysctl_local_ack_timeout &&
@@ -1450,7 +1424,6 @@ int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id, bool isv6)
14501424
union rds_ib_conn_priv dp;
14511425
u16 frag;
14521426
int ret;
1453-
u8 seq;
14541427

14551428
#ifdef CONFIG_RDS_ACL
14561429

@@ -1495,11 +1468,9 @@ int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id, bool isv6)
14951468
goto out;
14961469
}
14971470
frag = rds_ib_set_frag_size(conn, ib_init_frag_size);
1498-
ic->i_req_sequence = IB_GET_CM_SEQ_NUM(ic->i_req_sequence + 1);
1499-
seq = RDS_CM_RETRY_SEQ_EN | ic->i_req_sequence;
15001471
rds_ib_cm_fill_conn_param(conn, &conn_param, &dp,
15011472
conn->c_proposed_version, UINT_MAX, UINT_MAX,
1502-
frag, isv6, seq);
1473+
frag, isv6);
15031474
ret = rdma_connect(cm_id, &conn_param);
15041475
if (ret) {
15051476
rds_rtd(RDS_RTD_CM, "RDS/IB: rdma_connect failed (%d)\n", ret);

0 commit comments

Comments
 (0)