Skip to content

Commit 7a352a5

Browse files
Chien-Hua YenMukesh Kacker
authored andcommitted
RDS: Idle QoS connections during remote peer reboot causing application brownout
This fix addresses the issue with the idled QoS connection not getting disconnect event when the remote peer reboots. This is causing delayed reconnect, hence application brownout when the peer comes online. The fix was to proactively drop and reconnect them when the base lane is going through the reconnect to the reboot peer, in effect forcing all the lanes to go through the reconnect at the same time. Orabug: 18443194 Signed-off-by: Bang Nguyen <[email protected]> Signed-off-by: Chien-Hua Yen <[email protected]> (cherry picked from commit f51ccef)
1 parent e3d8841 commit 7a352a5

File tree

2 files changed

+31
-1
lines changed

2 files changed

+31
-1
lines changed

net/rds/connection.c

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -580,6 +580,32 @@ void rds_conn_exit(void)
580580
rds_conn_message_info_retrans);
581581
}
582582

583+
/*
584+
* Drop connections when the idled QoS connection not getting
585+
* disconnect event when the remote peer reboots. This is causing
586+
* delayed reconnect, hence application brownout when the peer comes online.
587+
* The fix was to proactively drop and reconnect them when the base lane is
588+
* going through the reconnect to the reboot peer, in effect forcing all
589+
* the lanes to go through the reconnect at the same time.
590+
*/
591+
static void rds_conn_shutdown_lanes(struct rds_connection *conn)
592+
{
593+
struct hlist_head *head =
594+
rds_conn_bucket(conn->c_laddr, conn->c_faddr);
595+
struct rds_connection *tmp;
596+
597+
rcu_read_lock();
598+
hlist_for_each_entry_rcu(tmp, head, c_hash_node) {
599+
if (tmp->c_faddr == conn->c_faddr &&
600+
tmp->c_laddr == conn->c_laddr &&
601+
tmp->c_tos != 0 &&
602+
tmp->c_trans == conn->c_trans) {
603+
rds_conn_drop(tmp);
604+
}
605+
}
606+
rcu_read_unlock();
607+
}
608+
583609
/*
584610
* Force a disconnect
585611
*/
@@ -608,6 +634,10 @@ void rds_conn_drop(struct rds_connection *conn)
608634
conn->c_reconnect_drops,
609635
conn->c_reconnect_err);
610636
conn->c_reconnect_warn = 0;
637+
638+
/* see comment for rds_conn_shutdown_lanes() */
639+
if (conn->c_tos == 0)
640+
rds_conn_shutdown_lanes(conn);
611641
}
612642
conn->c_reconnect_drops++;
613643

net/rds/ib.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -772,7 +772,7 @@ static int rds_ib_move_ip(char *from_dev,
772772

773773
work->conn = (struct rds_ib_connection *)ic->conn;
774774
INIT_DELAYED_WORK(&work->work, rds_ib_conn_drop);
775-
queue_delayed_work(rds_wq, &work->work,
775+
queue_delayed_work(rds_aux_wq, &work->work,
776776
msecs_to_jiffies(1000 * rds_ib_active_bonding_reconnect_delay));
777777
} else
778778
rds_conn_drop(ic->conn);

0 commit comments

Comments
 (0)