Skip to content

Commit 1bc87d2

Browse files
Wei Lin GuayLinuxMinion
authored andcommitted
net/rds: prioritize the base connection establishment
As of today, all the TOS connections can only be established after their base connections are up. This is due to the fact that TOS connections rely on their base connections to perform route resolution. Nevertheless, when all the connections drop/reconnect(e.g., ADDR_CHANGE event), the TOS connections establishment consume the CPU resources by constantly retrying the connection establishment until their base connections are up. Thus, this patch delays all the TOS connections if their associated base connections are not up. By doing so, the priority is given to the base connections establishment. Consequently, the base connections can be established faster and subsequent their associated TOS connections. Orabug: 25521901 Signed-off-by: Wei Lin Guay <[email protected]> Reviewed-by: Håkon Bugge <[email protected]> Reviewed-by: Ajaykumar Hotchandani <[email protected]> Tested-by: Dib Chatterjee <[email protected]> Tested-by: Rosa Isela Lopez Romero <[email protected]>
1 parent 1f2ea7a commit 1bc87d2

File tree

4 files changed

+48
-14
lines changed

4 files changed

+48
-14
lines changed

net/rds/connection.c

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ static void __rds_conn_path_init(struct rds_connection *conn,
159159
INIT_DELAYED_WORK(&cp->cp_hb_w, rds_hb_worker);
160160
INIT_DELAYED_WORK(&cp->cp_reconn_w, rds_reconnect_timeout);
161161
INIT_DELAYED_WORK(&cp->cp_reject_w, rds_reject_worker);
162-
INIT_WORK(&cp->cp_down_w, rds_shutdown_worker);
162+
INIT_DELAYED_WORK(&cp->cp_down_w, rds_shutdown_worker);
163163
mutex_init(&cp->cp_cm_lock);
164164
cp->cp_flags = 0;
165165
}
@@ -421,7 +421,7 @@ void rds_conn_shutdown(struct rds_conn_path *cp, int restart)
421421
rcu_read_lock();
422422
if (!hlist_unhashed(&conn->c_hash_node) && restart) {
423423
rcu_read_unlock();
424-
rds_queue_reconnect(cp);
424+
rds_queue_reconnect(cp, DR_DEFAULT);
425425
} else {
426426
rcu_read_unlock();
427427
}
@@ -442,7 +442,7 @@ static void rds_conn_path_destroy(struct rds_conn_path *cp, int shutdown)
442442
return;
443443

444444
rds_conn_path_drop(cp, DR_CONN_DESTROY);
445-
flush_work(&cp->cp_down_w);
445+
flush_delayed_work(&cp->cp_down_w);
446446

447447
/* now that conn down worker is flushed; there cannot be any
448448
* more posting of reconn timeout work. But cancel any already
@@ -852,6 +852,7 @@ void rds_conn_path_drop(struct rds_conn_path *cp, int reason)
852852
unsigned long now = get_seconds();
853853
struct rds_connection *conn = cp->cp_conn;
854854

855+
unsigned long delay = 0;
855856
cp->cp_drop_source = reason;
856857
if (rds_conn_path_state(cp) == RDS_CONN_UP) {
857858
cp->cp_reconnect_start = now;
@@ -891,13 +892,16 @@ void rds_conn_path_drop(struct rds_conn_path *cp, int reason)
891892

892893
atomic_set(&cp->cp_state, RDS_CONN_ERROR);
893894

895+
if ((conn->c_tos && reason == DR_IB_ADDR_CHANGE) ||
896+
reason == DR_IB_BASE_CONN_DOWN)
897+
delay = msecs_to_jiffies(100);
894898
rds_rtd(RDS_RTD_CM_EXT,
895899
"RDS/%s: queueing shutdown work, conn %p, <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n",
896900
conn->c_trans->t_type == RDS_TRANS_TCP ? "TCP" : "IB",
897901
conn, NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr),
898902
conn->c_tos);
899903

900-
queue_work(cp->cp_wq, &cp->cp_down_w);
904+
queue_delayed_work(cp->cp_wq, &cp->cp_down_w, delay);
901905
}
902906
EXPORT_SYMBOL_GPL(rds_conn_path_drop);
903907

@@ -918,11 +922,18 @@ void rds_conn_path_connect_if_down(struct rds_conn_path *cp)
918922

919923
if (rds_conn_path_state(cp) == RDS_CONN_DOWN &&
920924
!test_and_set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags)) {
925+
if (conn->c_tos == 0 ||
926+
(conn->c_tos && rds_conn_state(cp->cp_base_conn) == RDS_CONN_UP)) {
921927
rds_rtd(RDS_RTD_CM_EXT,
922928
"queueing connect work, conn %p, <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n",
923929
conn, NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr),
924930
conn->c_tos);
925931
queue_delayed_work(cp->cp_wq, &cp->cp_conn_w, 0);
932+
} else
933+
rds_rtd(RDS_RTD_CM_EXT,
934+
"skip, base conn %p down, conn %p, <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n",
935+
cp->cp_base_conn, conn, NIPQUAD(conn->c_laddr),
936+
NIPQUAD(conn->c_faddr), conn->c_tos);
926937
}
927938
}
928939
EXPORT_SYMBOL_GPL(rds_conn_path_connect_if_down);

net/rds/ib_cm.c

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1144,9 +1144,24 @@ int rds_ib_conn_path_connect(struct rds_conn_path *cp)
11441144
struct rds_connection *conn = cp->cp_conn;
11451145
struct rds_ib_connection *ic = conn->c_transport_data;
11461146
struct sockaddr_in src, dest;
1147-
int ret;
1147+
int ret = 0;
11481148

11491149
conn->c_route_resolved = 0;
1150+
1151+
if (conn->c_tos) {
1152+
mutex_lock(&conn->c_base_conn->c_cm_lock);
1153+
if (!rds_conn_transition(conn->c_base_conn, RDS_CONN_UP,
1154+
RDS_CONN_UP)) {
1155+
rds_rtd(RDS_RTD_CM_EXT,
1156+
"RDS/IB: base conn %p (%p) is not up\n",
1157+
conn->c_base_conn, conn);
1158+
ret = DR_IB_BASE_CONN_DOWN;
1159+
}
1160+
mutex_unlock(&conn->c_base_conn->c_cm_lock);
1161+
if (ret)
1162+
goto out;
1163+
}
1164+
11501165
/* XXX I wonder what affect the port space has */
11511166
/* delegate cm event handler to rdma_transport */
11521167
ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn,

net/rds/rds.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@ struct rds_conn_path {
253253
struct delayed_work cp_reject_w;
254254
struct delayed_work cp_hb_w;
255255
struct delayed_work cp_reconn_w;
256-
struct work_struct cp_down_w;
256+
struct delayed_work cp_down_w;
257257
struct mutex cp_cm_lock; /* protect cp_state & cm */
258258
wait_queue_head_t cp_waitq;
259259

@@ -1139,7 +1139,7 @@ void rds_threads_exit(void);
11391139
extern struct workqueue_struct *rds_wq;
11401140
extern struct workqueue_struct *rds_tos_wq;
11411141
extern struct workqueue_struct *rds_local_wq;
1142-
void rds_queue_reconnect(struct rds_conn_path *cp);
1142+
void rds_queue_reconnect(struct rds_conn_path *cp, int reason);
11431143
void rds_connect_worker(struct work_struct *);
11441144
void rds_shutdown_worker(struct work_struct *);
11451145
void rds_send_worker(struct work_struct *);

net/rds/threads.c

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -132,31 +132,38 @@ EXPORT_SYMBOL_GPL(rds_connect_complete);
132132
* We should *always* start with a random backoff; otherwise a broken connection
133133
* will always take several iterations to be re-established.
134134
*/
135-
void rds_queue_reconnect(struct rds_conn_path *cp)
135+
void rds_queue_reconnect(struct rds_conn_path *cp, int reason)
136136
{
137137
unsigned long delay = 0;
138138
unsigned long rand;
139139
struct rds_connection *conn = cp->cp_conn;
140140
bool is_tcp = conn->c_trans->t_type == RDS_TRANS_TCP;
141141

142142
rds_rtd(RDS_RTD_CM_EXT,
143-
"conn %p for %pI4 to %pI4 tos %d reconnect jiffies %lu\n", conn,
143+
"conn %p for %pI4 to %pI4 tos %d reconnect jiffies %lu %s\n", conn,
144144
&conn->c_laddr, &conn->c_faddr, conn->c_tos,
145-
cp->cp_reconnect_jiffies);
145+
cp->cp_reconnect_jiffies, conn_drop_reason_str(reason));
146146

147147
/* let peer with smaller addr initiate reconnect, to avoid duels */
148148
if (is_tcp && !IS_CANONICAL(conn->c_laddr, conn->c_faddr))
149149
return;
150150

151+
if (reason == DR_IB_BASE_CONN_DOWN) {
152+
cp->cp_reconnect_jiffies = 0;
153+
delay = msecs_to_jiffies(500);
154+
}
155+
151156
set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags);
152157
if (cp->cp_reconnect_jiffies == 0) {
153158
set_bit(RDS_INITIAL_RECONNECT, &cp->cp_flags);
159+
get_random_bytes(&rand, sizeof(rand));
154160
cp->cp_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies;
155-
queue_delayed_work(cp->cp_wq, &cp->cp_conn_w, rand % conn->c_reconnect_jiffies);
161+
queue_delayed_work(cp->cp_wq, &cp->cp_conn_w,
162+
delay + (rand % cp->cp_reconnect_jiffies));
156163
return;
157164
}
158165

159-
clear_bit(RDS_INITIAL_RECONNECT, &conn->c_flags);
166+
clear_bit(RDS_INITIAL_RECONNECT, &cp->cp_flags);
160167
if ((conn->c_laddr > conn->c_faddr) ||
161168
rds_conn_self_loopback_passive(conn))
162169
delay = msecs_to_jiffies(15000);
@@ -203,7 +210,8 @@ void rds_connect_worker(struct work_struct *work)
203210
RDS_CONN_DOWN)) {
204211
rds_rtd(RDS_RTD_CM_EXT,
205212
"reconnecting..., conn %p\n", conn);
206-
rds_queue_reconnect(cp);
213+
rds_queue_reconnect(cp, ret == DR_IB_BASE_CONN_DOWN ?
214+
DR_IB_BASE_CONN_DOWN : DR_DEFAULT);
207215
} else {
208216
rds_conn_path_drop(cp, DR_CONN_CONNECT_FAIL);
209217
}
@@ -334,7 +342,7 @@ void rds_shutdown_worker(struct work_struct *work)
334342
{
335343
struct rds_conn_path *cp = container_of(work,
336344
struct rds_conn_path,
337-
cp_down_w);
345+
cp_down_w.work);
338346
unsigned long now = get_seconds();
339347
bool is_tcp = cp->cp_conn->c_trans->t_type == RDS_TRANS_TCP;
340348
struct rds_connection *conn = cp->cp_conn;

0 commit comments

Comments
 (0)