Skip to content

Commit 083bfdb

Browse files
committed
RDMA/cm: Make it clear that there is no concurrency in cm_sidr_req_handler()
ib_create_cm_id() immediately places the id in the xarray, so it is visible to network traffic. The state is initially set to IB_CM_IDLE and all the MAD handlers will test this state under lock and refuse to advance from IDLE, so adding to the xarray is harmless. Further, the set to IB_CM_SIDR_REQ_RCVD also excludes all MAD handlers. However, the local_id isn't even used for SIDR mode, and there will be no input MADs related to the newly created ID. So, make the whole flow simpler so it can be understood: - Do not put the SIDR cm_id in the xarray. This directly shows that there is no concurrency - Delete the confusing work_count and pending_list manipulations. This mechanism is only used by MAD handlers and timewait, neither of which apply to SIDR. - Add a few comments and rename 'cur_cm_id_priv' to 'listen_cm_id_priv' - Move other loose sets up to immediately after cm_id creation so that the cm_id is fully configured right away. This fixes an oversight where the service_id will not be returned back on a IB_SIDR_UNSUPPORTED reject. Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Leon Romanovsky <[email protected]> Signed-off-by: Jason Gunthorpe <[email protected]>
1 parent 153a2e4 commit 083bfdb

File tree

1 file changed

+37
-27
lines changed
  • drivers/infiniband/core

1 file changed

+37
-27
lines changed

drivers/infiniband/core/cm.c

Lines changed: 37 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -3535,20 +3535,27 @@ static void cm_format_sidr_req_event(struct cm_work *work,
35353535

35363536
static int cm_sidr_req_handler(struct cm_work *work)
35373537
{
3538-
struct ib_cm_id *cm_id;
3539-
struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
3538+
struct cm_id_private *cm_id_priv, *listen_cm_id_priv;
35403539
struct cm_sidr_req_msg *sidr_req_msg;
35413540
struct ib_wc *wc;
35423541
int ret;
35433542

3544-
cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL);
3545-
if (IS_ERR(cm_id))
3546-
return PTR_ERR(cm_id);
3547-
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3543+
cm_id_priv =
3544+
cm_alloc_id_priv(work->port->cm_dev->ib_device, NULL, NULL);
3545+
if (IS_ERR(cm_id_priv))
3546+
return PTR_ERR(cm_id_priv);
35483547

35493548
/* Record SGID/SLID and request ID for lookup. */
35503549
sidr_req_msg = (struct cm_sidr_req_msg *)
35513550
work->mad_recv_wc->recv_buf.mad;
3551+
3552+
cm_id_priv->id.remote_id =
3553+
cpu_to_be32(IBA_GET(CM_SIDR_REQ_REQUESTID, sidr_req_msg));
3554+
cm_id_priv->id.service_id =
3555+
cpu_to_be64(IBA_GET(CM_SIDR_REQ_SERVICEID, sidr_req_msg));
3556+
cm_id_priv->id.service_mask = ~cpu_to_be64(0);
3557+
cm_id_priv->tid = sidr_req_msg->hdr.tid;
3558+
35523559
wc = work->mad_recv_wc->wc;
35533560
cm_id_priv->av.dgid.global.subnet_prefix = cpu_to_be64(wc->slid);
35543561
cm_id_priv->av.dgid.global.interface_id = 0;
@@ -3558,41 +3565,44 @@ static int cm_sidr_req_handler(struct cm_work *work)
35583565
if (ret)
35593566
goto out;
35603567

3561-
cm_id_priv->id.remote_id =
3562-
cpu_to_be32(IBA_GET(CM_SIDR_REQ_REQUESTID, sidr_req_msg));
3563-
cm_id_priv->tid = sidr_req_msg->hdr.tid;
3564-
atomic_inc(&cm_id_priv->work_count);
3565-
35663568
spin_lock_irq(&cm.lock);
3567-
cur_cm_id_priv = cm_insert_remote_sidr(cm_id_priv);
3568-
if (cur_cm_id_priv) {
3569+
listen_cm_id_priv = cm_insert_remote_sidr(cm_id_priv);
3570+
if (listen_cm_id_priv) {
35693571
spin_unlock_irq(&cm.lock);
35703572
atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
35713573
counter[CM_SIDR_REQ_COUNTER]);
35723574
goto out; /* Duplicate message. */
35733575
}
35743576
cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD;
3575-
cur_cm_id_priv = cm_find_listen(
3576-
cm_id->device,
3577-
cpu_to_be64(IBA_GET(CM_SIDR_REQ_SERVICEID, sidr_req_msg)));
3578-
if (!cur_cm_id_priv) {
3577+
listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device,
3578+
cm_id_priv->id.service_id);
3579+
if (!listen_cm_id_priv) {
35793580
spin_unlock_irq(&cm.lock);
35803581
cm_reject_sidr_req(cm_id_priv, IB_SIDR_UNSUPPORTED);
35813582
goto out; /* No match. */
35823583
}
3583-
refcount_inc(&cur_cm_id_priv->refcount);
3584-
refcount_inc(&cm_id_priv->refcount);
3584+
refcount_inc(&listen_cm_id_priv->refcount);
35853585
spin_unlock_irq(&cm.lock);
35863586

3587-
cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler;
3588-
cm_id_priv->id.context = cur_cm_id_priv->id.context;
3589-
cm_id_priv->id.service_id =
3590-
cpu_to_be64(IBA_GET(CM_SIDR_REQ_SERVICEID, sidr_req_msg));
3591-
cm_id_priv->id.service_mask = ~cpu_to_be64(0);
3587+
cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
3588+
cm_id_priv->id.context = listen_cm_id_priv->id.context;
35923589

3593-
cm_format_sidr_req_event(work, cm_id_priv, &cur_cm_id_priv->id);
3594-
cm_process_work(cm_id_priv, work);
3595-
cm_deref_id(cur_cm_id_priv);
3590+
/*
3591+
* A SIDR ID does not need to be in the xarray since it does not receive
3592+
* mads, is not placed in the remote_id or remote_qpn rbtree, and does
3593+
* not enter timewait.
3594+
*/
3595+
3596+
cm_format_sidr_req_event(work, cm_id_priv, &listen_cm_id_priv->id);
3597+
ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->cm_event);
3598+
cm_free_work(work);
3599+
/*
3600+
* A pointer to the listen_cm_id is held in the event, so this deref
3601+
* must be after the event is delivered above.
3602+
*/
3603+
cm_deref_id(listen_cm_id_priv);
3604+
if (ret)
3605+
cm_destroy_id(&cm_id_priv->id, ret);
35963606
return 0;
35973607
out:
35983608
ib_destroy_cm_id(&cm_id_priv->id);

0 commit comments

Comments
 (0)