Skip to content

Commit 1c77483

Browse files
yishaihjgunthorpe
authored andcommitted
IB: Improve uverbs_cleanup_ucontext algorithm
Improve uverbs_cleanup_ucontext algorithm to work properly when the topology graph of the objects cannot be determined at compile time. This is the case with objects created via the devx interface in mlx5. Typically uverbs objects must be created in a strict topologically sorted order, so that LIFO ordering will generally cause them to be freed properly. There are only a few cases (eg memory windows) where objects can point to things out of the strict LIFO order. Instead of using an explicit ordering scheme where the HW destroy is not allowed to fail, go over the list multiple times and allow the destroy function to fail. If progress halts then a final, desperate, cleanup is done before leaking the memory. This indicates a driver bug. Signed-off-by: Yishai Hadas <[email protected]> Signed-off-by: Leon Romanovsky <[email protected]> Signed-off-by: Jason Gunthorpe <[email protected]>
1 parent e620ebf commit 1c77483

File tree

12 files changed

+190
-111
lines changed

12 files changed

+190
-111
lines changed

drivers/infiniband/core/rdma_core.c

Lines changed: 65 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -360,9 +360,10 @@ static int __must_check remove_commit_idr_uobject(struct ib_uobject *uobj,
360360

361361
/*
362362
* We can only fail gracefully if the user requested to destroy the
363-
* object. In the rest of the cases, just remove whatever you can.
363+
* object or when a retry may be called upon an error.
364+
* In the rest of the cases, just remove whatever you can.
364365
*/
365-
if (why == RDMA_REMOVE_DESTROY && ret)
366+
if (ib_is_destroy_retryable(ret, why, uobj))
366367
return ret;
367368

368369
ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device,
@@ -393,7 +394,7 @@ static int __must_check remove_commit_fd_uobject(struct ib_uobject *uobj,
393394
container_of(uobj, struct ib_uobject_file, uobj);
394395
int ret = fd_type->context_closed(uobj_file, why);
395396

396-
if (why == RDMA_REMOVE_DESTROY && ret)
397+
if (ib_is_destroy_retryable(ret, why, uobj))
397398
return ret;
398399

399400
if (why == RDMA_REMOVE_DURING_CLEANUP) {
@@ -422,7 +423,7 @@ static int __must_check _rdma_remove_commit_uobject(struct ib_uobject *uobj,
422423
struct ib_ucontext *ucontext = uobj->context;
423424

424425
ret = uobj->type->type_class->remove_commit(uobj, why);
425-
if (ret && why == RDMA_REMOVE_DESTROY) {
426+
if (ib_is_destroy_retryable(ret, why, uobj)) {
426427
/* We couldn't remove the object, so just unlock the uobject */
427428
atomic_set(&uobj->usecnt, 0);
428429
uobj->type->type_class->lookup_put(uobj, true);
@@ -645,61 +646,77 @@ void uverbs_close_fd(struct file *f)
645646
kref_put(uverbs_file_ref, ib_uverbs_release_file);
646647
}
647648

648-
void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed)
649+
static int __uverbs_cleanup_ucontext(struct ib_ucontext *ucontext,
650+
enum rdma_remove_reason reason)
649651
{
650-
enum rdma_remove_reason reason = device_removed ?
651-
RDMA_REMOVE_DRIVER_REMOVE : RDMA_REMOVE_CLOSE;
652-
unsigned int cur_order = 0;
652+
struct ib_uobject *obj, *next_obj;
653+
int ret = -EINVAL;
654+
int err = 0;
653655

656+
/*
657+
* This shouldn't run while executing other commands on this
658+
* context. Thus, the only thing we should take care of is
659+
* releasing a FD while traversing this list. The FD could be
660+
* closed and released from the _release fop of this FD.
661+
* In order to mitigate this, we add a lock.
662+
* We take and release the lock per traversal in order to let
663+
* other threads (which might still use the FDs) chance to run.
664+
*/
665+
mutex_lock(&ucontext->uobjects_lock);
654666
ucontext->cleanup_reason = reason;
667+
list_for_each_entry_safe(obj, next_obj, &ucontext->uobjects, list) {
668+
/*
669+
* if we hit this WARN_ON, that means we are
670+
* racing with a lookup_get.
671+
*/
672+
WARN_ON(uverbs_try_lock_object(obj, true));
673+
err = obj->type->type_class->remove_commit(obj, reason);
674+
675+
if (ib_is_destroy_retryable(err, reason, obj)) {
676+
pr_debug("ib_uverbs: failed to remove uobject id %d err %d\n",
677+
obj->id, err);
678+
atomic_set(&obj->usecnt, 0);
679+
continue;
680+
}
681+
682+
if (err)
683+
pr_err("ib_uverbs: unable to remove uobject id %d err %d\n",
684+
obj->id, err);
685+
686+
list_del(&obj->list);
687+
/* put the ref we took when we created the object */
688+
uverbs_uobject_put(obj);
689+
ret = 0;
690+
}
691+
mutex_unlock(&ucontext->uobjects_lock);
692+
return ret;
693+
}
694+
695+
void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed)
696+
{
697+
enum rdma_remove_reason reason = device_removed ?
698+
RDMA_REMOVE_DRIVER_REMOVE :
699+
RDMA_REMOVE_CLOSE;
655700
/*
656701
* Waits for all remove_commit and alloc_commit to finish. Logically, We
657702
* want to hold this forever as the context is going to be destroyed,
658703
* but we'll release it since it causes a "held lock freed" BUG message.
659704
*/
660705
down_write(&ucontext->cleanup_rwsem);
706+
ucontext->cleanup_retryable = true;
707+
while (!list_empty(&ucontext->uobjects))
708+
if (__uverbs_cleanup_ucontext(ucontext, reason)) {
709+
/*
710+
* No entry was cleaned-up successfully during this
711+
* iteration
712+
*/
713+
break;
714+
}
661715

662-
while (!list_empty(&ucontext->uobjects)) {
663-
struct ib_uobject *obj, *next_obj;
664-
unsigned int next_order = UINT_MAX;
716+
ucontext->cleanup_retryable = false;
717+
if (!list_empty(&ucontext->uobjects))
718+
__uverbs_cleanup_ucontext(ucontext, reason);
665719

666-
/*
667-
* This shouldn't run while executing other commands on this
668-
* context. Thus, the only thing we should take care of is
669-
* releasing a FD while traversing this list. The FD could be
670-
* closed and released from the _release fop of this FD.
671-
* In order to mitigate this, we add a lock.
672-
* We take and release the lock per order traversal in order
673-
* to let other threads (which might still use the FDs) chance
674-
* to run.
675-
*/
676-
mutex_lock(&ucontext->uobjects_lock);
677-
list_for_each_entry_safe(obj, next_obj, &ucontext->uobjects,
678-
list) {
679-
if (obj->type->destroy_order == cur_order) {
680-
int ret;
681-
682-
/*
683-
* if we hit this WARN_ON, that means we are
684-
* racing with a lookup_get.
685-
*/
686-
WARN_ON(uverbs_try_lock_object(obj, true));
687-
ret = obj->type->type_class->remove_commit(obj,
688-
reason);
689-
list_del(&obj->list);
690-
if (ret)
691-
pr_warn("ib_uverbs: failed to remove uobject id %d order %u\n",
692-
obj->id, cur_order);
693-
/* put the ref we took when we created the object */
694-
uverbs_uobject_put(obj);
695-
} else {
696-
next_order = min(next_order,
697-
obj->type->destroy_order);
698-
}
699-
}
700-
mutex_unlock(&ucontext->uobjects_lock);
701-
cur_order = next_order;
702-
}
703720
up_write(&ucontext->cleanup_rwsem);
704721
}
705722

drivers/infiniband/core/uverbs.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr);
230230
void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
231231
void ib_uverbs_event_handler(struct ib_event_handler *handler,
232232
struct ib_event *event);
233-
int ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd,
233+
int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd,
234234
enum rdma_remove_reason why);
235235

236236
int uverbs_dealloc_mw(struct ib_mw *mw);

drivers/infiniband/core/uverbs_cmd.c

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
116116
ucontext->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
117117
rcu_read_unlock();
118118
ucontext->closing = 0;
119+
ucontext->cleanup_retryable = false;
119120

120121
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
121122
ucontext->umem_tree = RB_ROOT_CACHED;
@@ -611,22 +612,26 @@ ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file,
611612
return ret ?: in_len;
612613
}
613614

614-
int ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev,
615+
int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject,
615616
struct ib_xrcd *xrcd,
616617
enum rdma_remove_reason why)
617618
{
618619
struct inode *inode;
619620
int ret;
621+
struct ib_uverbs_device *dev = uobject->context->ufile->device;
620622

621623
inode = xrcd->inode;
622624
if (inode && !atomic_dec_and_test(&xrcd->usecnt))
623625
return 0;
624626

625627
ret = ib_dealloc_xrcd(xrcd);
626628

627-
if (why == RDMA_REMOVE_DESTROY && ret)
629+
if (ib_is_destroy_retryable(ret, why, uobject)) {
628630
atomic_inc(&xrcd->usecnt);
629-
else if (inode)
631+
return ret;
632+
}
633+
634+
if (inode)
630635
xrcd_table_delete(dev, inode);
631636

632637
return ret;

drivers/infiniband/core/uverbs_std_types.c

Lines changed: 36 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,13 @@ static int uverbs_free_qp(struct ib_uobject *uobject,
7777
container_of(uobject, struct ib_uqp_object, uevent.uobject);
7878
int ret;
7979

80+
/*
81+
* If this is a user triggered destroy then do not allow destruction
82+
* until the user cleans up all the mcast bindings. Unlike in other
83+
* places we forcibly clean up the mcast attachments for !DESTROY
84+
* because the mcast attaches are not ubojects and will not be
85+
* destroyed by anything else during cleanup processing.
86+
*/
8087
if (why == RDMA_REMOVE_DESTROY) {
8188
if (!list_empty(&uqp->mcast_list))
8289
return -EBUSY;
@@ -85,7 +92,7 @@ static int uverbs_free_qp(struct ib_uobject *uobject,
8592
}
8693

8794
ret = ib_destroy_qp(qp);
88-
if (ret && why == RDMA_REMOVE_DESTROY)
95+
if (ib_is_destroy_retryable(ret, why, uobject))
8996
return ret;
9097

9198
if (uqp->uxrcd)
@@ -103,8 +110,10 @@ static int uverbs_free_rwq_ind_tbl(struct ib_uobject *uobject,
103110
int ret;
104111

105112
ret = ib_destroy_rwq_ind_table(rwq_ind_tbl);
106-
if (!ret || why != RDMA_REMOVE_DESTROY)
107-
kfree(ind_tbl);
113+
if (ib_is_destroy_retryable(ret, why, uobject))
114+
return ret;
115+
116+
kfree(ind_tbl);
108117
return ret;
109118
}
110119

@@ -117,8 +126,10 @@ static int uverbs_free_wq(struct ib_uobject *uobject,
117126
int ret;
118127

119128
ret = ib_destroy_wq(wq);
120-
if (!ret || why != RDMA_REMOVE_DESTROY)
121-
ib_uverbs_release_uevent(uobject->context->ufile, &uwq->uevent);
129+
if (ib_is_destroy_retryable(ret, why, uobject))
130+
return ret;
131+
132+
ib_uverbs_release_uevent(uobject->context->ufile, &uwq->uevent);
122133
return ret;
123134
}
124135

@@ -132,8 +143,7 @@ static int uverbs_free_srq(struct ib_uobject *uobject,
132143
int ret;
133144

134145
ret = ib_destroy_srq(srq);
135-
136-
if (ret && why == RDMA_REMOVE_DESTROY)
146+
if (ib_is_destroy_retryable(ret, why, uobject))
137147
return ret;
138148

139149
if (srq_type == IB_SRQT_XRC) {
@@ -155,12 +165,12 @@ static int uverbs_free_xrcd(struct ib_uobject *uobject,
155165
container_of(uobject, struct ib_uxrcd_object, uobject);
156166
int ret;
157167

168+
ret = ib_destroy_usecnt(&uxrcd->refcnt, why, uobject);
169+
if (ret)
170+
return ret;
171+
158172
mutex_lock(&uobject->context->ufile->device->xrcd_tree_mutex);
159-
if (why == RDMA_REMOVE_DESTROY && atomic_read(&uxrcd->refcnt))
160-
ret = -EBUSY;
161-
else
162-
ret = ib_uverbs_dealloc_xrcd(uobject->context->ufile->device,
163-
xrcd, why);
173+
ret = ib_uverbs_dealloc_xrcd(uobject, xrcd, why);
164174
mutex_unlock(&uobject->context->ufile->device->xrcd_tree_mutex);
165175

166176
return ret;
@@ -170,9 +180,11 @@ static int uverbs_free_pd(struct ib_uobject *uobject,
170180
enum rdma_remove_reason why)
171181
{
172182
struct ib_pd *pd = uobject->object;
183+
int ret;
173184

174-
if (why == RDMA_REMOVE_DESTROY && atomic_read(&pd->usecnt))
175-
return -EBUSY;
185+
ret = ib_destroy_usecnt(&pd->usecnt, why, uobject);
186+
if (ret)
187+
return ret;
176188

177189
ib_dealloc_pd((struct ib_pd *)uobject->object);
178190
return 0;
@@ -249,44 +261,42 @@ void create_udata(struct uverbs_attr_bundle *ctx, struct ib_udata *udata)
249261
}
250262

251263
DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_COMP_CHANNEL,
252-
&UVERBS_TYPE_ALLOC_FD(0,
253-
sizeof(struct ib_uverbs_completion_event_file),
264+
&UVERBS_TYPE_ALLOC_FD(sizeof(struct ib_uverbs_completion_event_file),
254265
uverbs_hot_unplug_completion_event_file,
255266
&uverbs_event_fops,
256267
"[infinibandevent]", O_RDONLY));
257268

258269
DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_QP,
259-
&UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), 0,
270+
&UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object),
260271
uverbs_free_qp));
261272

262273
DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_MW,
263-
&UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_mw));
274+
&UVERBS_TYPE_ALLOC_IDR(uverbs_free_mw));
264275

265276
DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_SRQ,
266-
&UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object), 0,
277+
&UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object),
267278
uverbs_free_srq));
268279

269280
DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_AH,
270-
&UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_ah));
281+
&UVERBS_TYPE_ALLOC_IDR(uverbs_free_ah));
271282

272283
DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_FLOW,
273284
&UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uflow_object),
274-
0, uverbs_free_flow));
285+
uverbs_free_flow));
275286

276287
DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_WQ,
277-
&UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), 0,
288+
&UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object),
278289
uverbs_free_wq));
279290

280291
DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL,
281-
&UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_rwq_ind_tbl));
292+
&UVERBS_TYPE_ALLOC_IDR(uverbs_free_rwq_ind_tbl));
282293

283294
DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_XRCD,
284-
&UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object), 0,
295+
&UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object),
285296
uverbs_free_xrcd));
286297

287298
DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_PD,
288-
/* 2 is used in order to free the PD after MRs */
289-
&UVERBS_TYPE_ALLOC_IDR(2, uverbs_free_pd));
299+
&UVERBS_TYPE_ALLOC_IDR(uverbs_free_pd));
290300

291301
DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_DEVICE, NULL);
292302

drivers/infiniband/core/uverbs_std_types_counters.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,11 @@ static int uverbs_free_counters(struct ib_uobject *uobject,
3838
enum rdma_remove_reason why)
3939
{
4040
struct ib_counters *counters = uobject->object;
41+
int ret;
4142

42-
if (why == RDMA_REMOVE_DESTROY &&
43-
atomic_read(&counters->usecnt))
44-
return -EBUSY;
43+
ret = ib_destroy_usecnt(&counters->usecnt, why, uobject);
44+
if (ret)
45+
return ret;
4546

4647
return counters->device->destroy_counters(counters);
4748
}
@@ -150,7 +151,7 @@ static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_COUNTERS_READ,
150151
UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
151152

152153
DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_COUNTERS,
153-
&UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_counters),
154+
&UVERBS_TYPE_ALLOC_IDR(uverbs_free_counters),
154155
&UVERBS_METHOD(UVERBS_METHOD_COUNTERS_CREATE),
155156
&UVERBS_METHOD(UVERBS_METHOD_COUNTERS_DESTROY),
156157
&UVERBS_METHOD(UVERBS_METHOD_COUNTERS_READ));

0 commit comments

Comments
 (0)