Skip to content

Commit f51ccef

Browse files
Chien-Hua YenJerry Snitselaar
authored andcommitted
RDS: Idle QoS connections during remote peer reboot causing application brownout
This fix addresses the issue with the idled QoS connection not getting disconnect event when the remote peer reboots. This is causing delayed reconnect, hence application brownout when the peer comes online. The fix was to proactively drop and reconnect them when the base lane is going through the reconnect to the reboot peer, in effect forcing all the lanes to go through the reconnect at the same time. Orabug: 18443194 Signed-off-by: Bang Nguyen <[email protected]> Signed-off-by: Chien-Hua Yen <[email protected]>
1 parent 725e628 commit f51ccef

File tree

2 files changed

+32
-1
lines changed

2 files changed

+32
-1
lines changed

net/rds/connection.c

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -577,6 +577,33 @@ void rds_conn_exit(void)
577577
rds_conn_message_info_retrans);
578578
}
579579

580+
/*
581+
* Drop connections when the idled QoS connection not getting
582+
* disconnect event when the remote peer reboots. This is causing
583+
* delayed reconnect, hence application brownout when the peer comes online.
584+
* The fix was to proactively drop and reconnect them when the base lane is
585+
* going through the reconnect to the reboot peer, in effect forcing all
586+
* the lanes to go through the reconnect at the same time.
587+
*/
588+
static void rds_conn_shutdown_lanes(struct rds_connection *conn)
589+
{
590+
struct hlist_head *head =
591+
rds_conn_bucket(conn->c_laddr, conn->c_faddr);
592+
struct rds_connection *tmp;
593+
struct hlist_node *pos;
594+
595+
rcu_read_lock();
596+
hlist_for_each_entry_rcu(tmp, pos, head, c_hash_node) {
597+
if (tmp->c_faddr == conn->c_faddr &&
598+
tmp->c_laddr == conn->c_laddr &&
599+
tmp->c_tos != 0 &&
600+
tmp->c_trans == conn->c_trans) {
601+
rds_conn_drop(tmp);
602+
}
603+
}
604+
rcu_read_unlock();
605+
}
606+
580607
/*
581608
* Force a disconnect
582609
*/
@@ -605,6 +632,10 @@ void rds_conn_drop(struct rds_connection *conn)
605632
conn->c_reconnect_drops,
606633
conn->c_reconnect_err);
607634
conn->c_reconnect_warn = 0;
635+
636+
/* see comment for rds_conn_shutdown_lanes() */
637+
if (conn->c_tos == 0)
638+
rds_conn_shutdown_lanes(conn);
608639
}
609640
conn->c_reconnect_drops++;
610641

net/rds/ib.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -754,7 +754,7 @@ static int rds_ib_move_ip(char *from_dev,
754754

755755
work->conn = ic->conn;
756756
INIT_DELAYED_WORK(&work->work, rds_ib_conn_drop);
757-
queue_delayed_work(rds_wq, &work->work,
757+
queue_delayed_work(rds_aux_wq, &work->work,
758758
msecs_to_jiffies(1000 * rds_ib_active_bonding_reconnect_delay));
759759
} else
760760
rds_conn_drop(ic->conn);

0 commit comments

Comments
 (0)