Skip to content

Commit c761611

Browse files
rolandddledford
authored andcommitted
IB/cm: Fix sleeping in atomic when RoCE is used
A couple of places in the CM do spin_lock_irq(&cm_id_priv->lock); ... if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg)) However when the underlying transport is RoCE, this leads to a sleeping function being called with the lock held - the callchain is cm_alloc_response_msg() -> ib_create_ah_from_wc() -> ib_init_ah_from_wc() -> rdma_addr_find_l2_eth_by_grh() -> rdma_resolve_ip() and rdma_resolve_ip() starts out by doing req = kzalloc(sizeof *req, GFP_KERNEL); not to mention rdma_addr_find_l2_eth_by_grh() doing wait_for_completion(&ctx.comp); to wait for the task that rdma_resolve_ip() queues up. Fix this by moving the AH creation out of the lock. Signed-off-by: Roland Dreier <[email protected]> Reviewed-by: Sean Hefty <[email protected]> Signed-off-by: Doug Ledford <[email protected]>
1 parent f43dbeb commit c761611

File tree

1 file changed

+44
-19
lines changed
  • drivers/infiniband/core

1 file changed

+44
-19
lines changed

drivers/infiniband/core/cm.c

Lines changed: 44 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -373,39 +373,60 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
373373
return ret;
374374
}
375375

376-
static int cm_alloc_response_msg(struct cm_port *port,
377-
struct ib_mad_recv_wc *mad_recv_wc,
378-
struct ib_mad_send_buf **msg)
376+
static struct ib_mad_send_buf *cm_alloc_response_msg_no_ah(struct cm_port *port,
377+
struct ib_mad_recv_wc *mad_recv_wc)
378+
{
379+
return ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index,
380+
0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
381+
GFP_ATOMIC,
382+
IB_MGMT_BASE_VERSION);
383+
}
384+
385+
static int cm_create_response_msg_ah(struct cm_port *port,
386+
struct ib_mad_recv_wc *mad_recv_wc,
387+
struct ib_mad_send_buf *msg)
379388
{
380-
struct ib_mad_send_buf *m;
381389
struct ib_ah *ah;
382390

383391
ah = ib_create_ah_from_wc(port->mad_agent->qp->pd, mad_recv_wc->wc,
384392
mad_recv_wc->recv_buf.grh, port->port_num);
385393
if (IS_ERR(ah))
386394
return PTR_ERR(ah);
387395

388-
m = ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index,
389-
0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
390-
GFP_ATOMIC,
391-
IB_MGMT_BASE_VERSION);
392-
if (IS_ERR(m)) {
393-
rdma_destroy_ah(ah);
394-
return PTR_ERR(m);
395-
}
396-
m->ah = ah;
397-
*msg = m;
396+
msg->ah = ah;
398397
return 0;
399398
}
400399

401400
static void cm_free_msg(struct ib_mad_send_buf *msg)
402401
{
403-
rdma_destroy_ah(msg->ah);
402+
if (msg->ah)
403+
rdma_destroy_ah(msg->ah);
404404
if (msg->context[0])
405405
cm_deref_id(msg->context[0]);
406406
ib_free_send_mad(msg);
407407
}
408408

409+
static int cm_alloc_response_msg(struct cm_port *port,
410+
struct ib_mad_recv_wc *mad_recv_wc,
411+
struct ib_mad_send_buf **msg)
412+
{
413+
struct ib_mad_send_buf *m;
414+
int ret;
415+
416+
m = cm_alloc_response_msg_no_ah(port, mad_recv_wc);
417+
if (IS_ERR(m))
418+
return PTR_ERR(m);
419+
420+
ret = cm_create_response_msg_ah(port, mad_recv_wc, m);
421+
if (ret) {
422+
cm_free_msg(m);
423+
return ret;
424+
}
425+
426+
*msg = m;
427+
return 0;
428+
}
429+
409430
static void * cm_copy_private_data(const void *private_data,
410431
u8 private_data_len)
411432
{
@@ -2497,15 +2518,17 @@ static int cm_dreq_handler(struct cm_work *work)
24972518
case IB_CM_TIMEWAIT:
24982519
atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
24992520
counter[CM_DREQ_COUNTER]);
2500-
if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg))
2521+
msg = cm_alloc_response_msg_no_ah(work->port, work->mad_recv_wc);
2522+
if (IS_ERR(msg))
25012523
goto unlock;
25022524

25032525
cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
25042526
cm_id_priv->private_data,
25052527
cm_id_priv->private_data_len);
25062528
spin_unlock_irq(&cm_id_priv->lock);
25072529

2508-
if (ib_post_send_mad(msg, NULL))
2530+
if (cm_create_response_msg_ah(work->port, work->mad_recv_wc, msg) ||
2531+
ib_post_send_mad(msg, NULL))
25092532
cm_free_msg(msg);
25102533
goto deref;
25112534
case IB_CM_DREQ_RCVD:
@@ -3083,7 +3106,8 @@ static int cm_lap_handler(struct cm_work *work)
30833106
case IB_CM_MRA_LAP_SENT:
30843107
atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
30853108
counter[CM_LAP_COUNTER]);
3086-
if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg))
3109+
msg = cm_alloc_response_msg_no_ah(work->port, work->mad_recv_wc);
3110+
if (IS_ERR(msg))
30873111
goto unlock;
30883112

30893113
cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
@@ -3093,7 +3117,8 @@ static int cm_lap_handler(struct cm_work *work)
30933117
cm_id_priv->private_data_len);
30943118
spin_unlock_irq(&cm_id_priv->lock);
30953119

3096-
if (ib_post_send_mad(msg, NULL))
3120+
if (cm_create_response_msg_ah(work->port, work->mad_recv_wc, msg) ||
3121+
ib_post_send_mad(msg, NULL))
30973122
cm_free_msg(msg);
30983123
goto deref;
30993124
case IB_CM_LAP_RCVD:

0 commit comments

Comments
 (0)