Skip to content

Commit 1e51592

Browse files
sheftyrleon
authored andcommitted
IB/cm: Do not hold reference on cm_id unless needed
Typically, when the CM sends a MAD it bumps a reference count on the associated cm_id. There are some exceptions, such as when the MAD is a direct response to a receive MAD. For example, the CM may generate an MRA in response to a duplicate REQ. But, in general, if a MAD may be sent as a result of the user invoking an API call (e.g. ib_send_cm_rep(), ib_send_cm_rtu(), etc.), a reference is taken on the cm_id. This reference is necessary if the MAD requires a response. The reference allows routing a response MAD back to the cm_id, or, if no response is received, allows updating the cm_id state to reflect the failure. For MADs which do not generate a response from the target, however, there's no need to hold a reference on the cm_id. Such MADs will not be retried by the MAD layer and their completions do not change the state of the cm_id. There are 2 internal calls used to allocate MADs which take a reference on the cm_id: cm_alloc_msg() and cm_alloc_priv_msg(). The latter calls the former. It turns out that all other places where cm_alloc_msg() is called are for MADs that do not generate a response from the target: sending an RTU, DREP, REJ, MRA, or SIDR REP. In all of these cases, there's no need to hold a reference on the cm_id. The benefit of dropping unneeded references is that it allows destruction of the cm_id to proceed immediately. Currently, the cm_destroy_id() call blocks as long as there's a reference held on the cm_id. Worse, is that cm_destroy_id() will send MADs, which it then needs to complete. Sending the MADs is beneficial, as they notify the peer that a connection is being destroyed. However, since the MADs hold a reference on the cm_id, they block destruction and cannot be retried. Move cm_id referencing from cm_alloc_msg() to cm_alloc_priv_msg(). The latter should hold a reference on the cm_id in all cases but one, which will be handled in a separate patch. cm_alloc_priv_msg() is used when sending a REQ, REP, DREQ, and SIDR REQ, all of which require a response. Also, merge common code into cm_alloc_priv_msg() and combine the freeing of all messages which do not need a response. Signed-off-by: Sean Hefty <[email protected]> Signed-off-by: Or Har-Toov <[email protected]> Signed-off-by: Vlad Dumitrescu <[email protected]> Link: https://patch.msgid.link/1f0f96acace72790ecf89087fc765dead960189e.1731495873.git.leon@kernel.org Signed-off-by: Leon Romanovsky <[email protected]>
1 parent 0492458 commit 1e51592

File tree

1 file changed

+24
-42
lines changed
  • drivers/infiniband/core

1 file changed

+24
-42
lines changed

drivers/infiniband/core/cm.c

Lines changed: 24 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -309,12 +309,7 @@ static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv)
309309
goto out;
310310
}
311311

312-
/* Timeout set by caller if response is expected. */
313312
m->ah = ah;
314-
m->retries = cm_id_priv->max_cm_retries;
315-
316-
refcount_inc(&cm_id_priv->refcount);
317-
m->context[0] = cm_id_priv;
318313

319314
out:
320315
spin_unlock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
@@ -323,16 +318,13 @@ static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv)
323318

324319
static void cm_free_msg(struct ib_mad_send_buf *msg)
325320
{
326-
struct cm_id_private *cm_id_priv = msg->context[0];
327-
328321
if (msg->ah)
329322
rdma_destroy_ah(msg->ah, 0);
330-
cm_deref_id(cm_id_priv);
331323
ib_free_send_mad(msg);
332324
}
333325

334326
static struct ib_mad_send_buf *
335-
cm_alloc_priv_msg(struct cm_id_private *cm_id_priv)
327+
cm_alloc_priv_msg(struct cm_id_private *cm_id_priv, enum ib_cm_state state)
336328
{
337329
struct ib_mad_send_buf *msg;
338330

@@ -341,7 +333,15 @@ cm_alloc_priv_msg(struct cm_id_private *cm_id_priv)
341333
msg = cm_alloc_msg(cm_id_priv);
342334
if (IS_ERR(msg))
343335
return msg;
336+
344337
cm_id_priv->msg = msg;
338+
refcount_inc(&cm_id_priv->refcount);
339+
msg->context[0] = cm_id_priv;
340+
msg->context[1] = (void *) (unsigned long) state;
341+
342+
msg->retries = cm_id_priv->max_cm_retries;
343+
msg->timeout_ms = cm_id_priv->timeout_ms;
344+
345345
return msg;
346346
}
347347

@@ -413,13 +413,6 @@ static int cm_alloc_response_msg(struct cm_port *port,
413413
return 0;
414414
}
415415

416-
static void cm_free_response_msg(struct ib_mad_send_buf *msg)
417-
{
418-
if (msg->ah)
419-
rdma_destroy_ah(msg->ah, 0);
420-
ib_free_send_mad(msg);
421-
}
422-
423416
static void *cm_copy_private_data(const void *private_data, u8 private_data_len)
424417
{
425418
void *data;
@@ -1567,7 +1560,7 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
15671560
if (param->alternate_path)
15681561
cm_move_av_from_path(&cm_id_priv->alt_av, &alt_av);
15691562

1570-
msg = cm_alloc_priv_msg(cm_id_priv);
1563+
msg = cm_alloc_priv_msg(cm_id_priv, IB_CM_REQ_SENT);
15711564
if (IS_ERR(msg)) {
15721565
ret = PTR_ERR(msg);
15731566
goto out_unlock;
@@ -1576,8 +1569,6 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
15761569
req_msg = (struct cm_req_msg *)msg->mad;
15771570
cm_format_req(req_msg, cm_id_priv, param);
15781571
cm_id_priv->tid = req_msg->hdr.tid;
1579-
msg->timeout_ms = cm_id_priv->timeout_ms;
1580-
msg->context[1] = (void *)(unsigned long)IB_CM_REQ_SENT;
15811572

15821573
cm_id_priv->local_qpn = cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg));
15831574
cm_id_priv->rq_psn = cpu_to_be32(IBA_GET(CM_REQ_STARTING_PSN, req_msg));
@@ -1634,7 +1625,7 @@ static int cm_issue_rej(struct cm_port *port,
16341625
IBA_GET(CM_REJ_REMOTE_COMM_ID, rcv_msg));
16351626
ret = ib_post_send_mad(msg, NULL);
16361627
if (ret)
1637-
cm_free_response_msg(msg);
1628+
cm_free_msg(msg);
16381629

16391630
return ret;
16401631
}
@@ -1990,7 +1981,7 @@ static void cm_dup_req_handler(struct cm_work *work,
19901981
return;
19911982

19921983
unlock: spin_unlock_irq(&cm_id_priv->lock);
1993-
free: cm_free_response_msg(msg);
1984+
free: cm_free_msg(msg);
19941985
}
19951986

19961987
static struct cm_id_private *cm_match_req(struct cm_work *work,
@@ -2304,16 +2295,14 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id,
23042295
goto out;
23052296
}
23062297

2307-
msg = cm_alloc_priv_msg(cm_id_priv);
2298+
msg = cm_alloc_priv_msg(cm_id_priv, IB_CM_REP_SENT);
23082299
if (IS_ERR(msg)) {
23092300
ret = PTR_ERR(msg);
23102301
goto out;
23112302
}
23122303

23132304
rep_msg = (struct cm_rep_msg *) msg->mad;
23142305
cm_format_rep(rep_msg, cm_id_priv, param);
2315-
msg->timeout_ms = cm_id_priv->timeout_ms;
2316-
msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT;
23172306

23182307
trace_icm_send_rep(cm_id);
23192308
ret = ib_post_send_mad(msg, NULL);
@@ -2479,7 +2468,7 @@ static void cm_dup_rep_handler(struct cm_work *work)
24792468
goto deref;
24802469

24812470
unlock: spin_unlock_irq(&cm_id_priv->lock);
2482-
free: cm_free_response_msg(msg);
2471+
free: cm_free_msg(msg);
24832472
deref: cm_deref_id(cm_id_priv);
24842473
}
24852474

@@ -2683,16 +2672,14 @@ static int cm_send_dreq_locked(struct cm_id_private *cm_id_priv,
26832672
cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
26842673
ib_cancel_mad(cm_id_priv->msg);
26852674

2686-
msg = cm_alloc_priv_msg(cm_id_priv);
2675+
msg = cm_alloc_priv_msg(cm_id_priv, IB_CM_DREQ_SENT);
26872676
if (IS_ERR(msg)) {
26882677
cm_enter_timewait(cm_id_priv);
26892678
return PTR_ERR(msg);
26902679
}
26912680

26922681
cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv,
26932682
private_data, private_data_len);
2694-
msg->timeout_ms = cm_id_priv->timeout_ms;
2695-
msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT;
26962683

26972684
trace_icm_send_dreq(&cm_id_priv->id);
26982685
ret = ib_post_send_mad(msg, NULL);
@@ -2819,7 +2806,7 @@ static int cm_issue_drep(struct cm_port *port,
28192806
IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg));
28202807
ret = ib_post_send_mad(msg, NULL);
28212808
if (ret)
2822-
cm_free_response_msg(msg);
2809+
cm_free_msg(msg);
28232810

28242811
return ret;
28252812
}
@@ -2878,7 +2865,7 @@ static int cm_dreq_handler(struct cm_work *work)
28782865

28792866
if (cm_create_response_msg_ah(work->port, work->mad_recv_wc, msg) ||
28802867
ib_post_send_mad(msg, NULL))
2881-
cm_free_response_msg(msg);
2868+
cm_free_msg(msg);
28822869
goto deref;
28832870
case IB_CM_DREQ_RCVD:
28842871
atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES]
@@ -3386,7 +3373,7 @@ static int cm_lap_handler(struct cm_work *work)
33863373

33873374
if (cm_create_response_msg_ah(work->port, work->mad_recv_wc, msg) ||
33883375
ib_post_send_mad(msg, NULL))
3389-
cm_free_response_msg(msg);
3376+
cm_free_msg(msg);
33903377
goto deref;
33913378
case IB_CM_LAP_RCVD:
33923379
atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES]
@@ -3525,16 +3512,14 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
35253512
goto out_unlock;
35263513
}
35273514

3528-
msg = cm_alloc_priv_msg(cm_id_priv);
3515+
msg = cm_alloc_priv_msg(cm_id_priv, IB_CM_SIDR_REQ_SENT);
35293516
if (IS_ERR(msg)) {
35303517
ret = PTR_ERR(msg);
35313518
goto out_unlock;
35323519
}
35333520

35343521
cm_format_sidr_req((struct cm_sidr_req_msg *)msg->mad, cm_id_priv,
35353522
param);
3536-
msg->timeout_ms = cm_id_priv->timeout_ms;
3537-
msg->context[1] = (void *)(unsigned long)IB_CM_SIDR_REQ_SENT;
35383523

35393524
trace_icm_send_sidr_req(&cm_id_priv->id);
35403525
ret = ib_post_send_mad(msg, NULL);
@@ -3780,17 +3765,17 @@ static int cm_sidr_rep_handler(struct cm_work *work)
37803765

37813766
static void cm_process_send_error(struct cm_id_private *cm_id_priv,
37823767
struct ib_mad_send_buf *msg,
3783-
enum ib_cm_state state,
37843768
enum ib_wc_status wc_status)
37853769
{
3770+
enum ib_cm_state state = (unsigned long) msg->context[1];
37863771
struct ib_cm_event cm_event = {};
37873772
int ret;
37883773

3789-
/* Discard old sends or ones without a response. */
3774+
/* Discard old sends. */
37903775
spin_lock_irq(&cm_id_priv->lock);
37913776
if (msg != cm_id_priv->msg) {
37923777
spin_unlock_irq(&cm_id_priv->lock);
3793-
cm_free_msg(msg);
3778+
cm_free_priv_msg(msg);
37943779
return;
37953780
}
37963781
cm_free_priv_msg(msg);
@@ -3839,8 +3824,6 @@ static void cm_send_handler(struct ib_mad_agent *mad_agent,
38393824
{
38403825
struct ib_mad_send_buf *msg = mad_send_wc->send_buf;
38413826
struct cm_id_private *cm_id_priv;
3842-
enum ib_cm_state state =
3843-
(enum ib_cm_state)(unsigned long)msg->context[1];
38443827
struct cm_port *port;
38453828
u16 attr_index;
38463829

@@ -3861,10 +3844,9 @@ static void cm_send_handler(struct ib_mad_agent *mad_agent,
38613844
&port->counters[CM_XMIT_RETRIES][attr_index]);
38623845

38633846
if (cm_id_priv)
3864-
cm_process_send_error(cm_id_priv, msg, state,
3865-
mad_send_wc->status);
3847+
cm_process_send_error(cm_id_priv, msg, mad_send_wc->status);
38663848
else
3867-
cm_free_response_msg(msg);
3849+
cm_free_msg(msg);
38683850
}
38693851

38703852
static void cm_work_handler(struct work_struct *_work)

0 commit comments

Comments
 (0)