Skip to content

Commit 0e4e1de

Browse files
committed
rbd: avoid a deadlock on header_rwsem when flushing notifies
rbd_unregister_watch() flushes notifies and therefore cannot be called under header_rwsem because a header update notify takes header_rwsem to synchronize with "rbd map". If mapping an image fails after the watch is established and a header update notify sneaks in, we deadlock when erroring out from rbd_dev_image_probe(). Move watch registration and unregistration out of the critical section. The only reason they were put there was to make header_rwsem management slightly more obvious. Fixes: 811c668 ("rbd: fix rbd map vs notify races") Signed-off-by: Ilya Dryomov <[email protected]> Reviewed-by: Jason Dillaman <[email protected]>
1 parent 8f3d9f3 commit 0e4e1de

File tree

1 file changed

+13
-4
lines changed

1 file changed

+13
-4
lines changed

drivers/block/rbd.c

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4527,6 +4527,10 @@ static void cancel_tasks_sync(struct rbd_device *rbd_dev)
45274527
cancel_work_sync(&rbd_dev->unlock_work);
45284528
}
45294529

4530+
/*
4531+
* header_rwsem must not be held to avoid a deadlock with
4532+
* rbd_dev_refresh() when flushing notifies.
4533+
*/
45304534
static void rbd_unregister_watch(struct rbd_device *rbd_dev)
45314535
{
45324536
cancel_tasks_sync(rbd_dev);
@@ -6907,6 +6911,9 @@ static void rbd_dev_image_release(struct rbd_device *rbd_dev)
69076911
* device. If this image is the one being mapped (i.e., not a
69086912
* parent), initiate a watch on its header object before using that
69096913
* object to get detailed information about the rbd image.
6914+
*
6915+
* On success, returns with header_rwsem held for write if called
6916+
* with @depth == 0.
69106917
*/
69116918
static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
69126919
{
@@ -6936,6 +6943,9 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
69366943
}
69376944
}
69386945

6946+
if (!depth)
6947+
down_write(&rbd_dev->header_rwsem);
6948+
69396949
ret = rbd_dev_header_info(rbd_dev);
69406950
if (ret) {
69416951
if (ret == -ENOENT && !need_watch)
@@ -6987,6 +6997,8 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
69876997
err_out_probe:
69886998
rbd_dev_unprobe(rbd_dev);
69896999
err_out_watch:
7000+
if (!depth)
7001+
up_write(&rbd_dev->header_rwsem);
69907002
if (need_watch)
69917003
rbd_unregister_watch(rbd_dev);
69927004
err_out_format:
@@ -7050,12 +7062,9 @@ static ssize_t do_rbd_add(struct bus_type *bus,
70507062
goto err_out_rbd_dev;
70517063
}
70527064

7053-
down_write(&rbd_dev->header_rwsem);
70547065
rc = rbd_dev_image_probe(rbd_dev, 0);
7055-
if (rc < 0) {
7056-
up_write(&rbd_dev->header_rwsem);
7066+
if (rc < 0)
70577067
goto err_out_rbd_dev;
7058-
}
70597068

70607069
if (rbd_dev->opts->alloc_size > rbd_dev->layout.object_size) {
70617070
rbd_warn(rbd_dev, "alloc_size adjusted to %u",

0 commit comments

Comments
 (0)