Skip to content

Commit 3722a11

Browse files
jgunthorpejfvogel
authored andcommitted
RDMA/cm: Make sure the cm_id is in the IB_CM_IDLE state in destroy
The first switch statement in cm_destroy_id() tries to move the ID to either IB_CM_IDLE or IB_CM_TIMEWAIT. Both states will block concurrent MAD handlers from progressing. Previous patches removed the unreliably lock/unlock sequences in this flow, this patch removes the extra locking steps and adds the missing parts to guarantee that destroy reaches IB_CM_IDLE. There is no point in leaving the ID in the IB_CM_TIMEWAIT state the memory about to be kfreed. Rework things to hold the lock across all the state transitions and directly assert when done that it ended up in IB_CM_IDLE as expected. This was accompanied by a careful audit of all the state transitions here, which generally did end up in IDLE on their success and non-racy paths. Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Leon Romanovsky <[email protected]> Signed-off-by: Jason Gunthorpe <[email protected]> Orabug: 31483278 (cherry picked from commit 67b3c8d) cherry-pick-repo=linux/kernel/git/torvalds/linux.git unmodified-from-upstream: 67b3c8d Signed-off-by: Ka-Cheong Poon <[email protected]> Tested-by: June Wang <[email protected]> Reviewed-by: Sharath Srinivasan <[email protected]>
1 parent 9be1959 commit 3722a11

File tree

1 file changed

+21
-20
lines changed
  • drivers/infiniband/core

1 file changed

+21
-20
lines changed

drivers/infiniband/core/cm.c

Lines changed: 21 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1023,34 +1023,34 @@ static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
10231023
struct cm_work *work;
10241024

10251025
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1026-
retest:
10271026
spin_lock_irq(&cm_id_priv->lock);
1027+
retest:
10281028
switch (cm_id->state) {
10291029
case IB_CM_LISTEN:
1030-
spin_unlock_irq(&cm_id_priv->lock);
1031-
1032-
spin_lock_irq(&cm.lock);
1030+
spin_lock(&cm.lock);
10331031
if (--cm_id_priv->listen_sharecount > 0) {
10341032
/* The id is still shared. */
10351033
WARN_ON(refcount_read(&cm_id_priv->refcount) == 1);
1034+
spin_unlock(&cm.lock);
1035+
spin_unlock_irq(&cm_id_priv->lock);
10361036
cm_deref_id(cm_id_priv);
1037-
spin_unlock_irq(&cm.lock);
10381037
return;
10391038
}
1039+
cm_id->state = IB_CM_IDLE;
10401040
rb_erase(&cm_id_priv->service_node, &cm.listen_service_table);
10411041
RB_CLEAR_NODE(&cm_id_priv->service_node);
1042-
spin_unlock_irq(&cm.lock);
1042+
spin_unlock(&cm.lock);
10431043
break;
10441044
case IB_CM_SIDR_REQ_SENT:
10451045
cm_id->state = IB_CM_IDLE;
10461046
ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1047-
spin_unlock_irq(&cm_id_priv->lock);
10481047
break;
10491048
case IB_CM_SIDR_REQ_RCVD:
10501049
cm_send_sidr_rep_locked(cm_id_priv,
10511050
&(struct ib_cm_sidr_rep_param){
10521051
.status = IB_SIDR_REJECT });
1053-
spin_unlock_irq(&cm_id_priv->lock);
1052+
/* cm_send_sidr_rep_locked will not move to IDLE if it fails */
1053+
cm_id->state = IB_CM_IDLE;
10541054
break;
10551055
case IB_CM_REQ_SENT:
10561056
case IB_CM_MRA_REQ_RCVD:
@@ -1059,18 +1059,15 @@ static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
10591059
&cm_id_priv->id.device->node_guid,
10601060
sizeof(cm_id_priv->id.device->node_guid),
10611061
NULL, 0);
1062-
spin_unlock_irq(&cm_id_priv->lock);
10631062
break;
10641063
case IB_CM_REQ_RCVD:
10651064
if (err == -ENOMEM) {
10661065
/* Do not reject to allow future retries. */
10671066
cm_reset_to_idle(cm_id_priv);
1068-
spin_unlock_irq(&cm_id_priv->lock);
10691067
} else {
10701068
cm_send_rej_locked(cm_id_priv,
10711069
IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
10721070
NULL, 0);
1073-
spin_unlock_irq(&cm_id_priv->lock);
10741071
}
10751072
break;
10761073
case IB_CM_REP_SENT:
@@ -1082,31 +1079,35 @@ static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
10821079
case IB_CM_MRA_REP_SENT:
10831080
cm_send_rej_locked(cm_id_priv, IB_CM_REJ_CONSUMER_DEFINED, NULL,
10841081
0, NULL, 0);
1085-
spin_unlock_irq(&cm_id_priv->lock);
10861082
break;
10871083
case IB_CM_ESTABLISHED:
10881084
if (cm_id_priv->qp_type == IB_QPT_XRC_TGT) {
1089-
spin_unlock_irq(&cm_id_priv->lock);
1085+
cm_id->state = IB_CM_IDLE;
10901086
break;
10911087
}
10921088
cm_send_dreq_locked(cm_id_priv, NULL, 0);
1093-
spin_unlock_irq(&cm_id_priv->lock);
10941089
goto retest;
10951090
case IB_CM_DREQ_SENT:
10961091
ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
10971092
cm_enter_timewait(cm_id_priv);
1098-
spin_unlock_irq(&cm_id_priv->lock);
1099-
break;
1093+
goto retest;
11001094
case IB_CM_DREQ_RCVD:
11011095
cm_send_drep_locked(cm_id_priv, NULL, 0);
1102-
spin_unlock_irq(&cm_id_priv->lock);
1096+
WARN_ON(cm_id->state != IB_CM_TIMEWAIT);
1097+
goto retest;
1098+
case IB_CM_TIMEWAIT:
1099+
/*
1100+
* The cm_acquire_id in cm_timewait_handler will stop working
1101+
* once we do cm_free_id() below, so just move to idle here for
1102+
* consistency.
1103+
*/
1104+
cm_id->state = IB_CM_IDLE;
11031105
break;
1104-
default:
1105-
spin_unlock_irq(&cm_id_priv->lock);
1106+
case IB_CM_IDLE:
11061107
break;
11071108
}
1109+
WARN_ON(cm_id->state != IB_CM_IDLE);
11081110

1109-
spin_lock_irq(&cm_id_priv->lock);
11101111
spin_lock(&cm.lock);
11111112
/* Required for cleanup paths related cm_req_handler() */
11121113
if (cm_id_priv->timewait_info) {

0 commit comments

Comments
 (0)