Skip to content

Commit 67b3c8d

Browse files
committed
RDMA/cm: Make sure the cm_id is in the IB_CM_IDLE state in destroy
The first switch statement in cm_destroy_id() tries to move the ID to either IB_CM_IDLE or IB_CM_TIMEWAIT. Both states will block concurrent MAD handlers from progressing. Previous patches removed the unreliably lock/unlock sequences in this flow, this patch removes the extra locking steps and adds the missing parts to guarantee that destroy reaches IB_CM_IDLE. There is no point in leaving the ID in the IB_CM_TIMEWAIT state the memory about to be kfreed. Rework things to hold the lock across all the state transitions and directly assert when done that it ended up in IB_CM_IDLE as expected. This was accompanied by a careful audit of all the state transitions here, which generally did end up in IDLE on their success and non-racy paths. Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Leon Romanovsky <[email protected]> Signed-off-by: Jason Gunthorpe <[email protected]>
1 parent 6a8824a commit 67b3c8d

File tree

1 file changed

+21
-20
lines changed
  • drivers/infiniband/core

1 file changed

+21
-20
lines changed

drivers/infiniband/core/cm.c

Lines changed: 21 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1026,34 +1026,34 @@ static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
10261026
struct cm_work *work;
10271027

10281028
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1029-
retest:
10301029
spin_lock_irq(&cm_id_priv->lock);
1030+
retest:
10311031
switch (cm_id->state) {
10321032
case IB_CM_LISTEN:
1033-
spin_unlock_irq(&cm_id_priv->lock);
1034-
1035-
spin_lock_irq(&cm.lock);
1033+
spin_lock(&cm.lock);
10361034
if (--cm_id_priv->listen_sharecount > 0) {
10371035
/* The id is still shared. */
10381036
WARN_ON(refcount_read(&cm_id_priv->refcount) == 1);
1037+
spin_unlock(&cm.lock);
1038+
spin_unlock_irq(&cm_id_priv->lock);
10391039
cm_deref_id(cm_id_priv);
1040-
spin_unlock_irq(&cm.lock);
10411040
return;
10421041
}
1042+
cm_id->state = IB_CM_IDLE;
10431043
rb_erase(&cm_id_priv->service_node, &cm.listen_service_table);
10441044
RB_CLEAR_NODE(&cm_id_priv->service_node);
1045-
spin_unlock_irq(&cm.lock);
1045+
spin_unlock(&cm.lock);
10461046
break;
10471047
case IB_CM_SIDR_REQ_SENT:
10481048
cm_id->state = IB_CM_IDLE;
10491049
ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1050-
spin_unlock_irq(&cm_id_priv->lock);
10511050
break;
10521051
case IB_CM_SIDR_REQ_RCVD:
10531052
cm_send_sidr_rep_locked(cm_id_priv,
10541053
&(struct ib_cm_sidr_rep_param){
10551054
.status = IB_SIDR_REJECT });
1056-
spin_unlock_irq(&cm_id_priv->lock);
1055+
/* cm_send_sidr_rep_locked will not move to IDLE if it fails */
1056+
cm_id->state = IB_CM_IDLE;
10571057
break;
10581058
case IB_CM_REQ_SENT:
10591059
case IB_CM_MRA_REQ_RCVD:
@@ -1062,18 +1062,15 @@ static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
10621062
&cm_id_priv->id.device->node_guid,
10631063
sizeof(cm_id_priv->id.device->node_guid),
10641064
NULL, 0);
1065-
spin_unlock_irq(&cm_id_priv->lock);
10661065
break;
10671066
case IB_CM_REQ_RCVD:
10681067
if (err == -ENOMEM) {
10691068
/* Do not reject to allow future retries. */
10701069
cm_reset_to_idle(cm_id_priv);
1071-
spin_unlock_irq(&cm_id_priv->lock);
10721070
} else {
10731071
cm_send_rej_locked(cm_id_priv,
10741072
IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
10751073
NULL, 0);
1076-
spin_unlock_irq(&cm_id_priv->lock);
10771074
}
10781075
break;
10791076
case IB_CM_REP_SENT:
@@ -1085,31 +1082,35 @@ static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
10851082
case IB_CM_MRA_REP_SENT:
10861083
cm_send_rej_locked(cm_id_priv, IB_CM_REJ_CONSUMER_DEFINED, NULL,
10871084
0, NULL, 0);
1088-
spin_unlock_irq(&cm_id_priv->lock);
10891085
break;
10901086
case IB_CM_ESTABLISHED:
10911087
if (cm_id_priv->qp_type == IB_QPT_XRC_TGT) {
1092-
spin_unlock_irq(&cm_id_priv->lock);
1088+
cm_id->state = IB_CM_IDLE;
10931089
break;
10941090
}
10951091
cm_send_dreq_locked(cm_id_priv, NULL, 0);
1096-
spin_unlock_irq(&cm_id_priv->lock);
10971092
goto retest;
10981093
case IB_CM_DREQ_SENT:
10991094
ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
11001095
cm_enter_timewait(cm_id_priv);
1101-
spin_unlock_irq(&cm_id_priv->lock);
1102-
break;
1096+
goto retest;
11031097
case IB_CM_DREQ_RCVD:
11041098
cm_send_drep_locked(cm_id_priv, NULL, 0);
1105-
spin_unlock_irq(&cm_id_priv->lock);
1099+
WARN_ON(cm_id->state != IB_CM_TIMEWAIT);
1100+
goto retest;
1101+
case IB_CM_TIMEWAIT:
1102+
/*
1103+
* The cm_acquire_id in cm_timewait_handler will stop working
1104+
* once we do cm_free_id() below, so just move to idle here for
1105+
* consistency.
1106+
*/
1107+
cm_id->state = IB_CM_IDLE;
11061108
break;
1107-
default:
1108-
spin_unlock_irq(&cm_id_priv->lock);
1109+
case IB_CM_IDLE:
11091110
break;
11101111
}
1112+
WARN_ON(cm_id->state != IB_CM_IDLE);
11111113

1112-
spin_lock_irq(&cm_id_priv->lock);
11131114
spin_lock(&cm.lock);
11141115
/* Required for cleanup paths related cm_req_handler() */
11151116
if (cm_id_priv->timewait_info) {

0 commit comments

Comments
 (0)