Skip to content

Commit 2bbf615

Browse files
Wei Lin GuayLinuxMinion
authored andcommitted
net/rds: remove the RDS specific path record caching
This patch partially reverts commit b128261 ("RDS: SA query optimization"), which has RDS specific path record caching, and uses the underlying ibacm path record caching. ibacm considers all <source,dest,N> entries as a similar path record (N is the TOS). Thus, RDS needs to update the SL manually during the QP creation. RDS also assumes that it is a 1:1 mapping in the TOS to SL mapping. Orabug: 26124147 Signed-off-by: Wei Lin Guay <[email protected]> Signed-off-by: Santosh Shilimkar <[email protected]> Reviewed-by: Avinash Repaka <[email protected]> Reviewed-by: Håkon Bugge <[email protected]>
1 parent 5f8fe0e commit 2bbf615

File tree

3 files changed

+10
-82
lines changed

3 files changed

+10
-82
lines changed

net/rds/connection.c

Lines changed: 0 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -774,7 +774,6 @@ static char *conn_drop_reasons[] = {
774774
[DR_INV_CONN_STATE] = "invalid connection state",
775775
[DR_DOWN_TRANSITION_FAIL] = "failure to move to DOWN state",
776776
[DR_CONN_DESTROY] = "connection destroy",
777-
[DR_ZERO_LANE_DOWN] = "zero lane went down",
778777
[DR_CONN_CONNECT_FAIL] = "conn_connect failure",
779778
[DR_HB_TIMEOUT] = "hb timeout",
780779
[DR_RECONNECT_TIMEOUT] = "reconnect timeout",
@@ -789,7 +788,6 @@ static char *conn_drop_reasons[] = {
789788
[DR_IB_RDMA_ACCEPT_FAIL] = "rdma_accept failure",
790789
[DR_IB_ACT_SETUP_QP_FAIL] = "active setup_qp failure",
791790
[DR_IB_RDMA_CONNECT_FAIL] = "rdma_connect failure",
792-
[DR_IB_SET_IB_PATH_FAIL] = "rdma_set_ib_paths failure",
793791
[DR_IB_RESOLVE_ROUTE_FAIL] = "resolve_route failure",
794792
[DR_IB_RDMA_CM_ID_MISMATCH] = "detected rdma_cm_id mismatch",
795793
[DR_IB_ROUTE_ERR] = "ROUTE_ERROR event",
@@ -835,37 +833,6 @@ char *conn_drop_reason_str(enum rds_conn_drop_src reason)
835833
ARRAY_SIZE(conn_drop_reasons), reason);
836834
}
837835

838-
static void rds_conn_probe_lanes(struct rds_connection *conn)
839-
{
840-
struct hlist_head *head =
841-
rds_conn_bucket(conn->c_laddr, conn->c_faddr);
842-
struct rds_connection *tmp;
843-
844-
/* XXX only do this for IB transport? */
845-
rcu_read_lock();
846-
hlist_for_each_entry_rcu(tmp, head, c_hash_node) {
847-
if (tmp->c_faddr == conn->c_faddr &&
848-
tmp->c_laddr == conn->c_laddr &&
849-
tmp->c_tos != 0 &&
850-
tmp->c_trans == conn->c_trans) {
851-
if (rds_conn_up(tmp))
852-
rds_send_hb(tmp, 0);
853-
else if (rds_conn_connecting(tmp) &&
854-
(tmp->c_path[0].cp_route_resolved == 0)) {
855-
printk(KERN_INFO "RDS/IB: connection "
856-
"<%u.%u.%u.%u,%u.%u.%u.%u,%d> "
857-
"connecting, force reset ",
858-
NIPQUAD(tmp->c_laddr),
859-
NIPQUAD(tmp->c_faddr),
860-
tmp->c_tos);
861-
862-
rds_conn_drop(tmp, DR_ZERO_LANE_DOWN);
863-
}
864-
}
865-
}
866-
rcu_read_unlock();
867-
}
868-
869836
/*
870837
* Force a disconnect
871838
*/
@@ -890,9 +857,6 @@ void rds_conn_path_drop(struct rds_conn_path *cp, int reason)
890857
conn->c_tos,
891858
conn_drop_reason_str(cp->cp_drop_source));
892859

893-
if (conn->c_tos == 0)
894-
rds_conn_probe_lanes(conn);
895-
896860
} else if ((cp->cp_reconnect_warn) &&
897861
(now - cp->cp_reconnect_start > 60)) {
898862
printk(KERN_INFO "RDS/%s: re-connect "
@@ -905,9 +869,6 @@ void rds_conn_path_drop(struct rds_conn_path *cp, int reason)
905869
cp->cp_reconnect_drops,
906870
cp->cp_reconnect_err);
907871
cp->cp_reconnect_warn = 0;
908-
909-
if (conn->c_tos == 0)
910-
rds_conn_probe_lanes(conn);
911872
}
912873
cp->cp_reconnect_drops++;
913874

net/rds/rdma_transport.c

Lines changed: 10 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,6 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
103103
* while we're executing. */
104104
if (conn) {
105105
mutex_lock(&conn->c_cm_lock);
106-
107106
/* If the connection is being shut down, bail out
108107
* right away. We return 0 so cm_id doesn't get
109108
* destroyed prematurely */
@@ -124,44 +123,6 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
124123
case RDMA_CM_EVENT_ADDR_RESOLVED:
125124
rdma_set_service_type(cm_id, conn->c_tos);
126125

127-
if (conn->c_tos && conn->c_reconnect) {
128-
struct rds_ib_connection *base_ic =
129-
conn->c_base_conn->c_transport_data;
130-
131-
mutex_lock(&conn->c_base_conn->c_cm_lock);
132-
if (rds_conn_transition(conn->c_base_conn, RDS_CONN_UP,
133-
RDS_CONN_UP)) {
134-
ret = rdma_set_ib_paths(cm_id,
135-
base_ic->i_cm_id->route.path_rec,
136-
base_ic->i_cm_id->route.num_paths);
137-
if (!ret) {
138-
struct rds_ib_connection *ic =
139-
conn->c_transport_data;
140-
141-
cm_id->route.path_rec[0].sl =
142-
ic->i_sl;
143-
cm_id->route.path_rec[0].qos_class =
144-
conn->c_tos;
145-
ret = trans->cm_initiate_connect(cm_id);
146-
}
147-
} else {
148-
ret = 1;
149-
}
150-
mutex_unlock(&conn->c_base_conn->c_cm_lock);
151-
152-
if (ret) {
153-
rds_rtd(RDS_RTD_CM,
154-
"ADDR_RESOLVED: ret %d, calling rds_conn_drop <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n",
155-
ret, NIPQUAD(conn->c_laddr),
156-
NIPQUAD(conn->c_faddr), conn->c_tos);
157-
rds_conn_drop(conn, DR_IB_SET_IB_PATH_FAIL);
158-
ret = 0;
159-
}
160-
161-
break;
162-
}
163-
164-
165126
/* XXX do we need to clean up if this fails? */
166127
ret = rdma_resolve_route(cm_id,
167128
rds_rdma_resolve_to_ms[conn->c_to_index]);
@@ -194,9 +155,17 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
194155
struct rds_ib_connection *ibic;
195156

196157
ibic = conn->c_transport_data;
197-
if (ibic && ibic->i_cm_id == cm_id)
158+
if (ibic && ibic->i_cm_id == cm_id) {
159+
/* ibacm caches the path record without considering the tos/sl.
160+
* It is considered a match if the <src,dest> matches the
161+
* cache. In order to create qp with the correct sl/vl, RDS
162+
* needs to update the sl manually. As for now, RDS is assuming
163+
* that it is a 1:1 in tos to sl mapping.
164+
*/
165+
cm_id->route.path_rec[0].sl = conn->c_tos;
166+
cm_id->route.path_rec[0].qos_class = conn->c_tos;
198167
ret = trans->cm_initiate_connect(cm_id);
199-
else {
168+
} else {
200169
rds_rtd(RDS_RTD_CM,
201170
"ROUTE_RESOLVED: calling rds_conn_drop, conn %p <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n",
202171
conn, NIPQUAD(conn->c_laddr),

net/rds/rds.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,6 @@ enum rds_conn_drop_src {
154154
DR_INV_CONN_STATE,
155155
DR_DOWN_TRANSITION_FAIL,
156156
DR_CONN_DESTROY,
157-
DR_ZERO_LANE_DOWN,
158157
DR_CONN_CONNECT_FAIL,
159158
DR_HB_TIMEOUT,
160159
DR_RECONNECT_TIMEOUT,
@@ -173,7 +172,6 @@ enum rds_conn_drop_src {
173172
DR_IB_RDMA_CONNECT_FAIL,
174173

175174
/* event handling */
176-
DR_IB_SET_IB_PATH_FAIL,
177175
DR_IB_RESOLVE_ROUTE_FAIL,
178176
DR_IB_RDMA_CM_ID_MISMATCH,
179177
DR_IB_ROUTE_ERR,

0 commit comments

Comments
 (0)