Skip to content

Commit 1a0a04c

Browse files
Ursula Braundavem330
authored andcommitted
net/smc: check for healthy link group resp. connections
If a problem for at least one connection of a link group is detected, the whole link group and all its connections are terminated. This patch adds a check for healthy link group when trying to reserve a work request, and checks for healthy connections before starting a tx worker. Signed-off-by: Ursula Braun <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 732720f commit 1a0a04c

File tree

4 files changed

+29
-12
lines changed

4 files changed

+29
-12
lines changed

net/smc/smc_cdc.c

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,9 +65,14 @@ int smc_cdc_get_free_slot(struct smc_connection *conn,
6565
struct smc_cdc_tx_pend **pend)
6666
{
6767
struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
68+
int rc;
6869

69-
return smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf,
70-
(struct smc_wr_tx_pend_priv **)pend);
70+
rc = smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf,
71+
(struct smc_wr_tx_pend_priv **)pend);
72+
if (!conn->alert_token_local)
73+
/* abnormal termination */
74+
rc = -EPIPE;
75+
return rc;
7176
}
7277

7378
static inline void smc_cdc_add_pending_send(struct smc_connection *conn,

net/smc/smc_diag.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,8 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
8686
if (smc_diag_msg_attrs_fill(sk, skb, r, user_ns))
8787
goto errout;
8888

89-
if ((req->diag_ext & (1 << (SMC_DIAG_CONNINFO - 1))) && smc->conn.lgr) {
89+
if ((req->diag_ext & (1 << (SMC_DIAG_CONNINFO - 1))) &&
90+
smc->conn.alert_token_local) {
9091
struct smc_connection *conn = &smc->conn;
9192
struct smc_diag_conninfo cinfo = {
9293
.token = conn->alert_token_local,
@@ -124,7 +125,8 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
124125
goto errout;
125126
}
126127

127-
if ((req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && smc->conn.lgr) {
128+
if ((req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && smc->conn.lgr &&
129+
!list_empty(&smc->conn.lgr->list)) {
128130
struct smc_diag_lgrinfo linfo = {
129131
.role = smc->conn.lgr->role,
130132
.lnk[0].ibport = smc->conn.lgr->lnk[0].ibport,

net/smc/smc_tx.c

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -408,8 +408,9 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
408408
goto out_unlock;
409409
}
410410
rc = 0;
411-
schedule_delayed_work(&conn->tx_work,
412-
SMC_TX_WORK_DELAY);
411+
if (conn->alert_token_local) /* connection healthy */
412+
schedule_delayed_work(&conn->tx_work,
413+
SMC_TX_WORK_DELAY);
413414
}
414415
goto out_unlock;
415416
}
@@ -440,10 +441,17 @@ static void smc_tx_work(struct work_struct *work)
440441
int rc;
441442

442443
lock_sock(&smc->sk);
444+
if (smc->sk.sk_err ||
445+
!conn->alert_token_local ||
446+
conn->local_rx_ctrl.conn_state_flags.peer_conn_abort)
447+
goto out;
448+
443449
rc = smc_tx_sndbuf_nonempty(conn);
444450
if (!rc && conn->local_rx_ctrl.prod_flags.write_blocked &&
445451
!atomic_read(&conn->bytes_to_rcv))
446452
conn->local_rx_ctrl.prod_flags.write_blocked = 0;
453+
454+
out:
447455
release_sock(&smc->sk);
448456
}
449457

@@ -464,7 +472,8 @@ void smc_tx_consumer_update(struct smc_connection *conn)
464472
((to_confirm > conn->rmbe_update_limit) &&
465473
((to_confirm > (conn->rmbe_size / 2)) ||
466474
conn->local_rx_ctrl.prod_flags.write_blocked))) {
467-
if (smc_cdc_get_slot_and_msg_send(conn) < 0) {
475+
if ((smc_cdc_get_slot_and_msg_send(conn) < 0) &&
476+
conn->alert_token_local) { /* connection healthy */
468477
schedule_delayed_work(&conn->tx_work,
469478
SMC_TX_WORK_DELAY);
470479
return;

net/smc/smc_wr.c

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -174,9 +174,9 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,
174174
struct smc_wr_tx_pend_priv **wr_pend_priv)
175175
{
176176
struct smc_wr_tx_pend *wr_pend;
177+
u32 idx = link->wr_tx_cnt;
177178
struct ib_send_wr *wr_ib;
178179
u64 wr_id;
179-
u32 idx;
180180
int rc;
181181

182182
*wr_buf = NULL;
@@ -186,16 +186,17 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,
186186
if (rc)
187187
return rc;
188188
} else {
189+
struct smc_link_group *lgr;
190+
191+
lgr = container_of(link, struct smc_link_group,
192+
lnk[SMC_SINGLE_LINK]);
189193
rc = wait_event_timeout(
190194
link->wr_tx_wait,
195+
list_empty(&lgr->list) || /* lgr terminated */
191196
(smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY),
192197
SMC_WR_TX_WAIT_FREE_SLOT_TIME);
193198
if (!rc) {
194199
/* timeout - terminate connections */
195-
struct smc_link_group *lgr;
196-
197-
lgr = container_of(link, struct smc_link_group,
198-
lnk[SMC_SINGLE_LINK]);
199200
smc_lgr_terminate(lgr);
200201
return -EPIPE;
201202
}

0 commit comments

Comments
 (0)