Skip to content

Commit 3ba8c62

Browse files
committed
Merge branch 'smc-race-fixes'
Wen Gu says: ==================== net/smc: Fixes for race in smc link group termination We encountered some crashes recently and they are caused by the race between the access and free of link/link group in abnormal smc link group termination. The crashes can be reproduced in frequent abnormal link group termination, like setting RNICs up/down. This set of patches tries to fix this by extending the life cycle of link/link group to ensure that they won't be referred to after cleared or freed. v1 -> v2: - Improve some comments. - Move codes of waking up lgrs_deleted wait queue from smc_lgr_free() to __smc_lgr_free(). - Move codes of waking up links_deleted wait queue from smcr_link_clear() to __smcr_link_clear(). - Move codes of smc_ibdev_cnt_dec() and put_device() from smcr_link_clear() to __smcr_link_clear() - Move smc_lgr_put() to the end of __smcr_link_clear(). - Call smc_lgr_put() after 'out' tag in smcr_link_init() when link initialization fails. - Modify the location where smc connection holds the lgr or link. before: * hold lgr in smc_lgr_register_conn(). * hold link in smcr_lgr_conn_assign_link(). after: * hold both lgr and link in smc_conn_create(). Modify the location to symmetrical with the place where smc connections put the lgr or link, which is smc_conn_free(). - Initialize conn->freed as zero in smc_conn_create(). ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents de0e444 + 61f434b commit 3ba8c62

File tree

3 files changed

+53
-11
lines changed

3 files changed

+53
-11
lines changed

net/smc/smc.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,7 @@ struct smc_connection {
221221
*/
222222
u64 peer_token; /* SMC-D token of peer */
223223
u8 killed : 1; /* abnormal termination */
224+
u8 freed : 1; /* normal termiation */
224225
u8 out_of_sync : 1; /* out of sync with peer */
225226
};
226227

net/smc/smc_core.c

Lines changed: 49 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,6 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn)
218218
__smc_lgr_unregister_conn(conn);
219219
}
220220
write_unlock_bh(&lgr->conns_lock);
221-
conn->lgr = NULL;
222221
}
223222

224223
int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb)
@@ -752,6 +751,7 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
752751
lnk->path_mtu = lnk->smcibdev->pattr[lnk->ibport - 1].active_mtu;
753752
lnk->link_id = smcr_next_link_id(lgr);
754753
lnk->lgr = lgr;
754+
smc_lgr_hold(lgr); /* lgr_put in smcr_link_clear() */
755755
lnk->link_idx = link_idx;
756756
smc_ibdev_cnt_inc(lnk);
757757
smcr_copy_dev_info_to_link(lnk);
@@ -806,6 +806,7 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
806806
lnk->state = SMC_LNK_UNUSED;
807807
if (!atomic_dec_return(&smcibdev->lnk_cnt))
808808
wake_up(&smcibdev->lnks_deleted);
809+
smc_lgr_put(lgr); /* lgr_hold above */
809810
return rc;
810811
}
811812

@@ -844,6 +845,7 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
844845
lgr->terminating = 0;
845846
lgr->freeing = 0;
846847
lgr->vlan_id = ini->vlan_id;
848+
refcount_set(&lgr->refcnt, 1); /* set lgr refcnt to 1 */
847849
mutex_init(&lgr->sndbufs_lock);
848850
mutex_init(&lgr->rmbs_lock);
849851
rwlock_init(&lgr->conns_lock);
@@ -1130,8 +1132,19 @@ void smc_conn_free(struct smc_connection *conn)
11301132
{
11311133
struct smc_link_group *lgr = conn->lgr;
11321134

1133-
if (!lgr)
1135+
if (!lgr || conn->freed)
1136+
/* Connection has never been registered in a
1137+
* link group, or has already been freed.
1138+
*/
11341139
return;
1140+
1141+
conn->freed = 1;
1142+
if (!conn->alert_token_local)
1143+
/* Connection has already unregistered from
1144+
* link group.
1145+
*/
1146+
goto lgr_put;
1147+
11351148
if (lgr->is_smcd) {
11361149
if (!list_empty(&lgr->list))
11371150
smc_ism_unset_conn(conn);
@@ -1148,6 +1161,8 @@ void smc_conn_free(struct smc_connection *conn)
11481161

11491162
if (!lgr->conns_num)
11501163
smc_lgr_schedule_free_work(lgr);
1164+
lgr_put:
1165+
smc_lgr_put(lgr); /* lgr_hold in smc_conn_create() */
11511166
}
11521167

11531168
/* unregister a link from a buf_desc */
@@ -1206,9 +1221,10 @@ static void smcr_rtoken_clear_link(struct smc_link *lnk)
12061221
/* must be called under lgr->llc_conf_mutex lock */
12071222
void smcr_link_clear(struct smc_link *lnk, bool log)
12081223
{
1224+
struct smc_link_group *lgr = lnk->lgr;
12091225
struct smc_ib_device *smcibdev;
12101226

1211-
if (!lnk->lgr || lnk->state == SMC_LNK_UNUSED)
1227+
if (!lgr || lnk->state == SMC_LNK_UNUSED)
12121228
return;
12131229
lnk->peer_qpn = 0;
12141230
smc_llc_link_clear(lnk, log);
@@ -1226,6 +1242,7 @@ void smcr_link_clear(struct smc_link *lnk, bool log)
12261242
lnk->state = SMC_LNK_UNUSED;
12271243
if (!atomic_dec_return(&smcibdev->lnk_cnt))
12281244
wake_up(&smcibdev->lnks_deleted);
1245+
smc_lgr_put(lgr); /* lgr_hold in smcr_link_init() */
12291246
}
12301247

12311248
static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb,
@@ -1290,6 +1307,21 @@ static void smc_lgr_free_bufs(struct smc_link_group *lgr)
12901307
__smc_lgr_free_bufs(lgr, true);
12911308
}
12921309

1310+
/* won't be freed until no one accesses to lgr anymore */
1311+
static void __smc_lgr_free(struct smc_link_group *lgr)
1312+
{
1313+
smc_lgr_free_bufs(lgr);
1314+
if (lgr->is_smcd) {
1315+
if (!atomic_dec_return(&lgr->smcd->lgr_cnt))
1316+
wake_up(&lgr->smcd->lgrs_deleted);
1317+
} else {
1318+
smc_wr_free_lgr_mem(lgr);
1319+
if (!atomic_dec_return(&lgr_cnt))
1320+
wake_up(&lgrs_deleted);
1321+
}
1322+
kfree(lgr);
1323+
}
1324+
12931325
/* remove a link group */
12941326
static void smc_lgr_free(struct smc_link_group *lgr)
12951327
{
@@ -1305,19 +1337,23 @@ static void smc_lgr_free(struct smc_link_group *lgr)
13051337
smc_llc_lgr_clear(lgr);
13061338
}
13071339

1308-
smc_lgr_free_bufs(lgr);
13091340
destroy_workqueue(lgr->tx_wq);
13101341
if (lgr->is_smcd) {
13111342
smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
13121343
put_device(&lgr->smcd->dev);
1313-
if (!atomic_dec_return(&lgr->smcd->lgr_cnt))
1314-
wake_up(&lgr->smcd->lgrs_deleted);
1315-
} else {
1316-
smc_wr_free_lgr_mem(lgr);
1317-
if (!atomic_dec_return(&lgr_cnt))
1318-
wake_up(&lgrs_deleted);
13191344
}
1320-
kfree(lgr);
1345+
smc_lgr_put(lgr); /* theoretically last lgr_put */
1346+
}
1347+
1348+
void smc_lgr_hold(struct smc_link_group *lgr)
1349+
{
1350+
refcount_inc(&lgr->refcnt);
1351+
}
1352+
1353+
void smc_lgr_put(struct smc_link_group *lgr)
1354+
{
1355+
if (refcount_dec_and_test(&lgr->refcnt))
1356+
__smc_lgr_free(lgr);
13211357
}
13221358

13231359
static void smc_sk_wake_ups(struct smc_sock *smc)
@@ -1856,6 +1892,8 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
18561892
goto out;
18571893
}
18581894
}
1895+
smc_lgr_hold(conn->lgr); /* lgr_put in smc_conn_free() */
1896+
conn->freed = 0;
18591897
conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
18601898
conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
18611899
conn->urg_state = SMC_URG_READ;

net/smc/smc_core.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,7 @@ struct smc_link_group {
249249
u8 terminating : 1;/* lgr is terminating */
250250
u8 freeing : 1; /* lgr is being freed */
251251

252+
refcount_t refcnt; /* lgr reference count */
252253
bool is_smcd; /* SMC-R or SMC-D */
253254
u8 smc_version;
254255
u8 negotiated_eid[SMC_MAX_EID_LEN];
@@ -487,6 +488,8 @@ struct smc_clc_msg_accept_confirm;
487488

488489
void smc_lgr_cleanup_early(struct smc_link_group *lgr);
489490
void smc_lgr_terminate_sched(struct smc_link_group *lgr);
491+
void smc_lgr_hold(struct smc_link_group *lgr);
492+
void smc_lgr_put(struct smc_link_group *lgr);
490493
void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport);
491494
void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport);
492495
void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid,

0 commit comments

Comments
 (0)