Skip to content

Commit e951747

Browse files
committed
IB/uverbs: Rework the locking for cleaning up the ucontext
The locking here has always been a bit crazy and spread out, upon some careful analysis we can simplify things. Create a single function uverbs_destroy_ufile_hw() that internally handles all locking. This pulls together pieces of this process that were sprinkled all over the places into one place, and covers them with one lock. This eliminates several duplicate/confusing locks and makes the control flow in ib_uverbs_close() and ib_uverbs_free_hw_resources() extremely simple. Unfortunately we have to keep an extra mutex, ucontext_lock. This lock is logically part of the rwsem and provides the 'down write, fail if write locked, wait if read locked' semantic we require. Signed-off-by: Jason Gunthorpe <[email protected]>
1 parent 8706427 commit e951747

File tree

6 files changed

+127
-108
lines changed

6 files changed

+127
-108
lines changed

drivers/infiniband/core/rdma_core.c

Lines changed: 104 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232

3333
#include <linux/file.h>
3434
#include <linux/anon_inodes.h>
35+
#include <linux/sched/mm.h>
3536
#include <rdma/ib_verbs.h>
3637
#include <rdma/uverbs_types.h>
3738
#include <linux/rcupdate.h>
@@ -284,11 +285,8 @@ struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type,
284285
}
285286

286287
ret = uverbs_try_lock_object(uobj, exclusive);
287-
if (ret) {
288-
WARN(uobj->ufile->cleanup_reason,
289-
"ib_uverbs: Trying to lookup_get while cleanup context\n");
288+
if (ret)
290289
goto free;
291-
}
292290

293291
return uobj;
294292
free:
@@ -694,6 +692,71 @@ void uverbs_close_fd(struct file *f)
694692
uverbs_uobject_put(uobj);
695693
}
696694

695+
static void ufile_disassociate_ucontext(struct ib_ucontext *ibcontext)
696+
{
697+
struct ib_device *ib_dev = ibcontext->device;
698+
struct task_struct *owning_process = NULL;
699+
struct mm_struct *owning_mm = NULL;
700+
701+
owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID);
702+
if (!owning_process)
703+
return;
704+
705+
owning_mm = get_task_mm(owning_process);
706+
if (!owning_mm) {
707+
pr_info("no mm, disassociate ucontext is pending task termination\n");
708+
while (1) {
709+
put_task_struct(owning_process);
710+
usleep_range(1000, 2000);
711+
owning_process = get_pid_task(ibcontext->tgid,
712+
PIDTYPE_PID);
713+
if (!owning_process ||
714+
owning_process->state == TASK_DEAD) {
715+
pr_info("disassociate ucontext done, task was terminated\n");
716+
/* in case task was dead need to release the
717+
* task struct.
718+
*/
719+
if (owning_process)
720+
put_task_struct(owning_process);
721+
return;
722+
}
723+
}
724+
}
725+
726+
down_write(&owning_mm->mmap_sem);
727+
ib_dev->disassociate_ucontext(ibcontext);
728+
up_write(&owning_mm->mmap_sem);
729+
mmput(owning_mm);
730+
put_task_struct(owning_process);
731+
}
732+
733+
/*
734+
* Drop the ucontext off the ufile and completely disconnect it from the
735+
* ib_device
736+
*/
737+
static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile,
738+
enum rdma_remove_reason reason)
739+
{
740+
struct ib_ucontext *ucontext = ufile->ucontext;
741+
int ret;
742+
743+
if (reason == RDMA_REMOVE_DRIVER_REMOVE)
744+
ufile_disassociate_ucontext(ucontext);
745+
746+
put_pid(ucontext->tgid);
747+
ib_rdmacg_uncharge(&ucontext->cg_obj, ucontext->device,
748+
RDMACG_RESOURCE_HCA_HANDLE);
749+
750+
/*
751+
* FIXME: Drivers are not permitted to fail dealloc_ucontext, remove
752+
* the error return.
753+
*/
754+
ret = ucontext->device->dealloc_ucontext(ufile->ucontext);
755+
WARN_ON(ret);
756+
757+
ufile->ucontext = NULL;
758+
}
759+
697760
static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile,
698761
enum rdma_remove_reason reason)
699762
{
@@ -710,7 +773,6 @@ static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile,
710773
* We take and release the lock per traversal in order to let
711774
* other threads (which might still use the FDs) chance to run.
712775
*/
713-
ufile->cleanup_reason = reason;
714776
list_for_each_entry_safe(obj, next_obj, &ufile->uobjects, list) {
715777
/*
716778
* if we hit this WARN_ON, that means we are
@@ -738,18 +800,43 @@ static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile,
738800
return ret;
739801
}
740802

741-
void uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, bool device_removed)
803+
/*
804+
* Destroy the uncontext and every uobject associated with it. If called with
805+
* reason != RDMA_REMOVE_CLOSE this will not return until the destruction has
806+
* been completed and ufile->ucontext is NULL.
807+
*
808+
* This is internally locked and can be called in parallel from multiple
809+
* contexts.
810+
*/
811+
void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile,
812+
enum rdma_remove_reason reason)
742813
{
743-
enum rdma_remove_reason reason = device_removed ?
744-
RDMA_REMOVE_DRIVER_REMOVE :
745-
RDMA_REMOVE_CLOSE;
814+
if (reason == RDMA_REMOVE_CLOSE) {
815+
/*
816+
* During destruction we might trigger something that
817+
* synchronously calls release on any file descriptor. For
818+
* this reason all paths that come from file_operations
819+
* release must use try_lock. They can progress knowing that
820+
* there is an ongoing uverbs_destroy_ufile_hw that will clean
821+
* up the driver resources.
822+
*/
823+
if (!mutex_trylock(&ufile->ucontext_lock))
824+
return;
825+
826+
} else {
827+
mutex_lock(&ufile->ucontext_lock);
828+
}
829+
830+
down_write(&ufile->hw_destroy_rwsem);
746831

747832
/*
748-
* Waits for all remove_commit and alloc_commit to finish. Logically, We
749-
* want to hold this forever as the context is going to be destroyed,
750-
* but we'll release it since it causes a "held lock freed" BUG message.
833+
* If a ucontext was never created then we can't have any uobjects to
834+
* cleanup, nothing to do.
751835
*/
752-
down_write(&ufile->hw_destroy_rwsem);
836+
if (!ufile->ucontext)
837+
goto done;
838+
839+
ufile->ucontext->closing = true;
753840
ufile->ucontext->cleanup_retryable = true;
754841
while (!list_empty(&ufile->uobjects))
755842
if (__uverbs_cleanup_ufile(ufile, reason)) {
@@ -764,7 +851,11 @@ void uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, bool device_removed)
764851
if (!list_empty(&ufile->uobjects))
765852
__uverbs_cleanup_ufile(ufile, reason);
766853

854+
ufile_destroy_ucontext(ufile, reason);
855+
856+
done:
767857
up_write(&ufile->hw_destroy_rwsem);
858+
mutex_unlock(&ufile->ucontext_lock);
768859
}
769860

770861
const struct uverbs_obj_type_class uverbs_fd_class = {

drivers/infiniband/core/rdma_core.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ const struct uverbs_object_spec *uverbs_get_object(struct ib_uverbs_file *ufile,
4949
const struct uverbs_method_spec *uverbs_get_method(const struct uverbs_object_spec *object,
5050
uint16_t method);
5151

52-
void uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, bool device_removed);
52+
void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile,
53+
enum rdma_remove_reason reason);
5354

5455
/*
5556
* uverbs_uobject_get is called in order to increase the reference count on

drivers/infiniband/core/uverbs.h

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -136,9 +136,9 @@ struct ib_uverbs_completion_event_file {
136136

137137
struct ib_uverbs_file {
138138
struct kref ref;
139-
struct mutex mutex;
140-
struct mutex cleanup_mutex; /* protect cleanup */
141139
struct ib_uverbs_device *device;
140+
/* Protects writing to ucontext */
141+
struct mutex ucontext_lock;
142142
struct ib_ucontext *ucontext;
143143
struct ib_event_handler event_handler;
144144
struct ib_uverbs_async_event_file *async_file;
@@ -155,8 +155,6 @@ struct ib_uverbs_file {
155155
spinlock_t uobjects_lock;
156156
struct list_head uobjects;
157157

158-
enum rdma_remove_reason cleanup_reason;
159-
160158
struct idr idr;
161159
/* spinlock protects write access to idr */
162160
spinlock_t idr_lock;

drivers/infiniband/core/uverbs_cmd.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
8484
if (copy_from_user(&cmd, buf, sizeof cmd))
8585
return -EFAULT;
8686

87-
mutex_lock(&file->mutex);
87+
mutex_lock(&file->ucontext_lock);
8888

8989
if (file->ucontext) {
9090
ret = -EINVAL;
@@ -150,7 +150,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
150150

151151
fd_install(resp.async_fd, filp);
152152

153-
mutex_unlock(&file->mutex);
153+
mutex_unlock(&file->ucontext_lock);
154154

155155
return in_len;
156156

@@ -169,7 +169,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
169169
ib_rdmacg_uncharge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE);
170170

171171
err:
172-
mutex_unlock(&file->mutex);
172+
mutex_unlock(&file->ucontext_lock);
173173
return ret;
174174
}
175175

drivers/infiniband/core/uverbs_main.c

Lines changed: 11 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,6 @@
4141
#include <linux/fs.h>
4242
#include <linux/poll.h>
4343
#include <linux/sched.h>
44-
#include <linux/sched/mm.h>
45-
#include <linux/sched/task.h>
4644
#include <linux/file.h>
4745
#include <linux/cdev.h>
4846
#include <linux/anon_inodes.h>
@@ -227,21 +225,6 @@ void ib_uverbs_detach_umcast(struct ib_qp *qp,
227225
}
228226
}
229227

230-
static int ib_uverbs_cleanup_ufile(struct ib_uverbs_file *file,
231-
bool device_removed)
232-
{
233-
struct ib_ucontext *context = file->ucontext;
234-
235-
context->closing = 1;
236-
uverbs_cleanup_ufile(file, device_removed);
237-
put_pid(context->tgid);
238-
239-
ib_rdmacg_uncharge(&context->cg_obj, context->device,
240-
RDMACG_RESOURCE_HCA_HANDLE);
241-
242-
return context->device->dealloc_ucontext(context);
243-
}
244-
245228
static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev)
246229
{
247230
complete(&dev->comp);
@@ -886,8 +869,7 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
886869
spin_lock_init(&file->idr_lock);
887870
idr_init(&file->idr);
888871
kref_init(&file->ref);
889-
mutex_init(&file->mutex);
890-
mutex_init(&file->cleanup_mutex);
872+
mutex_init(&file->ucontext_lock);
891873

892874
spin_lock_init(&file->uobjects_lock);
893875
INIT_LIST_HEAD(&file->uobjects);
@@ -917,12 +899,7 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp)
917899
{
918900
struct ib_uverbs_file *file = filp->private_data;
919901

920-
mutex_lock(&file->cleanup_mutex);
921-
if (file->ucontext) {
922-
ib_uverbs_cleanup_ufile(file, false);
923-
file->ucontext = NULL;
924-
}
925-
mutex_unlock(&file->cleanup_mutex);
902+
uverbs_destroy_ufile_hw(file, RDMA_REMOVE_CLOSE);
926903
idr_destroy(&file->idr);
927904

928905
mutex_lock(&file->device->lists_mutex);
@@ -1109,44 +1086,6 @@ static void ib_uverbs_add_one(struct ib_device *device)
11091086
return;
11101087
}
11111088

1112-
static void ib_uverbs_disassociate_ucontext(struct ib_ucontext *ibcontext)
1113-
{
1114-
struct ib_device *ib_dev = ibcontext->device;
1115-
struct task_struct *owning_process = NULL;
1116-
struct mm_struct *owning_mm = NULL;
1117-
1118-
owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID);
1119-
if (!owning_process)
1120-
return;
1121-
1122-
owning_mm = get_task_mm(owning_process);
1123-
if (!owning_mm) {
1124-
pr_info("no mm, disassociate ucontext is pending task termination\n");
1125-
while (1) {
1126-
put_task_struct(owning_process);
1127-
usleep_range(1000, 2000);
1128-
owning_process = get_pid_task(ibcontext->tgid,
1129-
PIDTYPE_PID);
1130-
if (!owning_process ||
1131-
owning_process->state == TASK_DEAD) {
1132-
pr_info("disassociate ucontext done, task was terminated\n");
1133-
/* in case task was dead need to release the
1134-
* task struct.
1135-
*/
1136-
if (owning_process)
1137-
put_task_struct(owning_process);
1138-
return;
1139-
}
1140-
}
1141-
}
1142-
1143-
down_write(&owning_mm->mmap_sem);
1144-
ib_dev->disassociate_ucontext(ibcontext);
1145-
up_write(&owning_mm->mmap_sem);
1146-
mmput(owning_mm);
1147-
put_task_struct(owning_process);
1148-
}
1149-
11501089
static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
11511090
struct ib_device *ib_dev)
11521091
{
@@ -1162,39 +1101,24 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
11621101

11631102
mutex_lock(&uverbs_dev->lists_mutex);
11641103
while (!list_empty(&uverbs_dev->uverbs_file_list)) {
1165-
struct ib_ucontext *ucontext;
11661104
file = list_first_entry(&uverbs_dev->uverbs_file_list,
11671105
struct ib_uverbs_file, list);
11681106
file->is_closed = 1;
11691107
list_del(&file->list);
11701108
kref_get(&file->ref);
1171-
mutex_unlock(&uverbs_dev->lists_mutex);
1172-
1173-
1174-
mutex_lock(&file->cleanup_mutex);
1175-
ucontext = file->ucontext;
1176-
file->ucontext = NULL;
1177-
mutex_unlock(&file->cleanup_mutex);
11781109

1179-
/* At this point ib_uverbs_close cannot be running
1180-
* ib_uverbs_cleanup_ufile
1110+
/* We must release the mutex before going ahead and calling
1111+
* uverbs_cleanup_ufile, as it might end up indirectly calling
1112+
* uverbs_close, for example due to freeing the resources (e.g
1113+
* mmput).
11811114
*/
1182-
if (ucontext) {
1183-
/* We must release the mutex before going ahead and
1184-
* calling disassociate_ucontext. disassociate_ucontext
1185-
* might end up indirectly calling uverbs_close,
1186-
* for example due to freeing the resources
1187-
* (e.g mmput).
1188-
*/
1189-
ib_uverbs_event_handler(&file->event_handler, &event);
1190-
ib_uverbs_disassociate_ucontext(ucontext);
1191-
mutex_lock(&file->cleanup_mutex);
1192-
ib_uverbs_cleanup_ufile(file, true);
1193-
mutex_unlock(&file->cleanup_mutex);
1194-
}
1115+
mutex_unlock(&uverbs_dev->lists_mutex);
11951116

1196-
mutex_lock(&uverbs_dev->lists_mutex);
1117+
ib_uverbs_event_handler(&file->event_handler, &event);
1118+
uverbs_destroy_ufile_hw(file, RDMA_REMOVE_DRIVER_REMOVE);
11971119
kref_put(&file->ref, ib_uverbs_release_file);
1120+
1121+
mutex_lock(&uverbs_dev->lists_mutex);
11981122
}
11991123

12001124
while (!list_empty(&uverbs_dev->uverbs_events_file_list)) {

include/rdma/ib_verbs.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1479,6 +1479,11 @@ struct ib_rdmacg_object {
14791479
struct ib_ucontext {
14801480
struct ib_device *device;
14811481
struct ib_uverbs_file *ufile;
1482+
/*
1483+
* 'closing' can be read by the driver only during a destroy callback,
1484+
* it is set when we are closing the file descriptor and indicates
1485+
* that mm_sem may be locked.
1486+
*/
14821487
int closing;
14831488

14841489
bool cleanup_retryable;

0 commit comments

Comments
 (0)