Skip to content

Commit 5b56b6e

Browse files
Christoph Hellwigaxboe
authored andcommitted
block: refactor blkdev_get
Move more code that is only run on the outer open but not the open of the underlying whole device when opening a partition into blkdev_get, which leads to a much easier to follow structure. This allows to simplify the disk and module refcounting so that one reference is held for each open, similar to what we do with normal file operations. Signed-off-by: Christoph Hellwig <[email protected]> Acked-by: Tejun Heo <[email protected]> Reviewed-by: Jan Kara <[email protected]> Reviewed-by: Hannes Reinecke <[email protected]> Signed-off-by: Jens Axboe <[email protected]>
1 parent ec5d451 commit 5b56b6e

File tree

1 file changed

+86
-99
lines changed

1 file changed

+86
-99
lines changed

fs/block_dev.c

Lines changed: 86 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -1407,46 +1407,12 @@ EXPORT_SYMBOL_GPL(bdev_disk_changed);
14071407
* mutex_lock(part->bd_mutex)
14081408
* mutex_lock_nested(whole->bd_mutex, 1)
14091409
*/
1410-
1411-
static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder,
1412-
int for_part)
1410+
static int __blkdev_get(struct block_device *bdev, struct gendisk *disk,
1411+
int partno, fmode_t mode)
14131412
{
1414-
struct block_device *whole = NULL, *claiming = NULL;
1415-
struct gendisk *disk;
14161413
int ret;
1417-
int partno;
1418-
bool first_open = false, unblock_events = true, need_restart;
1419-
1420-
restart:
1421-
need_restart = false;
1422-
ret = -ENXIO;
1423-
disk = bdev_get_gendisk(bdev, &partno);
1424-
if (!disk)
1425-
goto out;
1426-
1427-
if (partno) {
1428-
whole = bdget_disk(disk, 0);
1429-
if (!whole) {
1430-
ret = -ENOMEM;
1431-
goto out_put_disk;
1432-
}
1433-
}
14341414

1435-
if (!for_part && (mode & FMODE_EXCL)) {
1436-
WARN_ON_ONCE(!holder);
1437-
if (whole)
1438-
claiming = whole;
1439-
else
1440-
claiming = bdev;
1441-
ret = bd_prepare_to_claim(bdev, claiming, holder);
1442-
if (ret)
1443-
goto out_put_whole;
1444-
}
1445-
1446-
disk_block_events(disk);
1447-
mutex_lock_nested(&bdev->bd_mutex, for_part);
14481415
if (!bdev->bd_openers) {
1449-
first_open = true;
14501416
bdev->bd_disk = disk;
14511417
bdev->bd_contains = bdev;
14521418
bdev->bd_partno = partno;
@@ -1458,15 +1424,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder,
14581424
goto out_clear;
14591425

14601426
ret = 0;
1461-
if (disk->fops->open) {
1427+
if (disk->fops->open)
14621428
ret = disk->fops->open(bdev, mode);
1463-
/*
1464-
* If we lost a race with 'disk' being deleted,
1465-
* try again. See md.c
1466-
*/
1467-
if (ret == -ERESTARTSYS)
1468-
need_restart = true;
1469-
}
14701429

14711430
if (!ret) {
14721431
bd_set_nr_sectors(bdev, get_capacity(disk));
@@ -1486,14 +1445,23 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder,
14861445
if (ret)
14871446
goto out_clear;
14881447
} else {
1489-
BUG_ON(for_part);
1490-
ret = __blkdev_get(whole, mode, NULL, 1);
1491-
if (ret)
1448+
struct block_device *whole = bdget_disk(disk, 0);
1449+
1450+
mutex_lock_nested(&whole->bd_mutex, 1);
1451+
ret = __blkdev_get(whole, disk, 0, mode);
1452+
if (ret) {
1453+
mutex_unlock(&whole->bd_mutex);
1454+
bdput(whole);
14921455
goto out_clear;
1493-
bdev->bd_contains = bdgrab(whole);
1456+
}
1457+
whole->bd_part_count++;
1458+
mutex_unlock(&whole->bd_mutex);
1459+
1460+
bdev->bd_contains = whole;
14941461
bdev->bd_part = disk_get_part(disk, partno);
14951462
if (!(disk->flags & GENHD_FL_UP) ||
14961463
!bdev->bd_part || !bdev->bd_part->nr_sects) {
1464+
__blkdev_put(whole, mode, 1);
14971465
ret = -ENXIO;
14981466
goto out_clear;
14991467
}
@@ -1513,58 +1481,17 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder,
15131481
(!ret || ret == -ENOMEDIUM))
15141482
bdev_disk_changed(bdev, ret == -ENOMEDIUM);
15151483
if (ret)
1516-
goto out_unlock_bdev;
1484+
return ret;
15171485
}
15181486
}
15191487
bdev->bd_openers++;
1520-
if (for_part)
1521-
bdev->bd_part_count++;
1522-
if (claiming)
1523-
bd_finish_claiming(bdev, claiming, holder);
1524-
1525-
/*
1526-
* Block event polling for write claims if requested. Any write holder
1527-
* makes the write_holder state stick until all are released. This is
1528-
* good enough and tracking individual writeable reference is too
1529-
* fragile given the way @mode is used in blkdev_get/put().
1530-
*/
1531-
if (claiming && (mode & FMODE_WRITE) && !bdev->bd_write_holder &&
1532-
(disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) {
1533-
bdev->bd_write_holder = true;
1534-
unblock_events = false;
1535-
}
1536-
mutex_unlock(&bdev->bd_mutex);
1537-
1538-
if (unblock_events)
1539-
disk_unblock_events(disk);
1540-
1541-
/* only one opener holds refs to the module and disk */
1542-
if (!first_open)
1543-
put_disk_and_module(disk);
1544-
if (whole)
1545-
bdput(whole);
15461488
return 0;
15471489

15481490
out_clear:
15491491
disk_put_part(bdev->bd_part);
15501492
bdev->bd_disk = NULL;
15511493
bdev->bd_part = NULL;
1552-
if (bdev != bdev->bd_contains)
1553-
__blkdev_put(bdev->bd_contains, mode, 1);
15541494
bdev->bd_contains = NULL;
1555-
out_unlock_bdev:
1556-
if (claiming)
1557-
bd_abort_claiming(bdev, claiming, holder);
1558-
mutex_unlock(&bdev->bd_mutex);
1559-
disk_unblock_events(disk);
1560-
out_put_whole:
1561-
if (whole)
1562-
bdput(whole);
1563-
out_put_disk:
1564-
put_disk_and_module(disk);
1565-
if (need_restart)
1566-
goto restart;
1567-
out:
15681495
return ret;
15691496
}
15701497

@@ -1589,7 +1516,12 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder,
15891516
*/
15901517
static int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
15911518
{
1592-
int ret, perm = 0;
1519+
struct block_device *claiming;
1520+
bool unblock_events = true;
1521+
struct gendisk *disk;
1522+
int perm = 0;
1523+
int partno;
1524+
int ret;
15931525

15941526
if (mode & FMODE_READ)
15951527
perm |= MAY_READ;
@@ -1599,13 +1531,67 @@ static int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
15991531
if (ret)
16001532
goto bdput;
16011533

1602-
ret =__blkdev_get(bdev, mode, holder, 0);
1603-
if (ret)
1534+
/*
1535+
* If we lost a race with 'disk' being deleted, try again. See md.c.
1536+
*/
1537+
retry:
1538+
ret = -ENXIO;
1539+
disk = bdev_get_gendisk(bdev, &partno);
1540+
if (!disk)
16041541
goto bdput;
1605-
return 0;
16061542

1543+
if (mode & FMODE_EXCL) {
1544+
WARN_ON_ONCE(!holder);
1545+
1546+
ret = -ENOMEM;
1547+
claiming = bdget_disk(disk, 0);
1548+
if (!claiming)
1549+
goto put_disk;
1550+
ret = bd_prepare_to_claim(bdev, claiming, holder);
1551+
if (ret)
1552+
goto put_claiming;
1553+
}
1554+
1555+
disk_block_events(disk);
1556+
1557+
mutex_lock(&bdev->bd_mutex);
1558+
ret =__blkdev_get(bdev, disk, partno, mode);
1559+
if (!(mode & FMODE_EXCL)) {
1560+
; /* nothing to do here */
1561+
} else if (ret) {
1562+
bd_abort_claiming(bdev, claiming, holder);
1563+
} else {
1564+
bd_finish_claiming(bdev, claiming, holder);
1565+
1566+
/*
1567+
* Block event polling for write claims if requested. Any write
1568+
* holder makes the write_holder state stick until all are
1569+
* released. This is good enough and tracking individual
1570+
* writeable reference is too fragile given the way @mode is
1571+
* used in blkdev_get/put().
1572+
*/
1573+
if ((mode & FMODE_WRITE) && !bdev->bd_write_holder &&
1574+
(disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) {
1575+
bdev->bd_write_holder = true;
1576+
unblock_events = false;
1577+
}
1578+
}
1579+
mutex_unlock(&bdev->bd_mutex);
1580+
1581+
if (unblock_events)
1582+
disk_unblock_events(disk);
1583+
1584+
put_claiming:
1585+
if (mode & FMODE_EXCL)
1586+
bdput(claiming);
1587+
put_disk:
1588+
if (ret)
1589+
put_disk_and_module(disk);
1590+
if (ret == -ERESTARTSYS)
1591+
goto retry;
16071592
bdput:
1608-
bdput(bdev);
1593+
if (ret)
1594+
bdput(bdev);
16091595
return ret;
16101596
}
16111597

@@ -1753,8 +1739,6 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
17531739
if (bdev_is_partition(bdev))
17541740
victim = bdev->bd_contains;
17551741
bdev->bd_contains = NULL;
1756-
1757-
put_disk_and_module(disk);
17581742
} else {
17591743
if (!bdev_is_partition(bdev) && disk->fops->release)
17601744
disk->fops->release(disk, mode);
@@ -1767,6 +1751,8 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
17671751

17681752
void blkdev_put(struct block_device *bdev, fmode_t mode)
17691753
{
1754+
struct gendisk *disk = bdev->bd_disk;
1755+
17701756
mutex_lock(&bdev->bd_mutex);
17711757

17721758
if (mode & FMODE_EXCL) {
@@ -1795,7 +1781,7 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
17951781
* unblock evpoll if it was a write holder.
17961782
*/
17971783
if (bdev_free && bdev->bd_write_holder) {
1798-
disk_unblock_events(bdev->bd_disk);
1784+
disk_unblock_events(disk);
17991785
bdev->bd_write_holder = false;
18001786
}
18011787
}
@@ -1805,11 +1791,12 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
18051791
* event. This is to ensure detection of media removal commanded
18061792
* from userland - e.g. eject(1).
18071793
*/
1808-
disk_flush_events(bdev->bd_disk, DISK_EVENT_MEDIA_CHANGE);
1794+
disk_flush_events(disk, DISK_EVENT_MEDIA_CHANGE);
18091795

18101796
mutex_unlock(&bdev->bd_mutex);
18111797

18121798
__blkdev_put(bdev, mode, 0);
1799+
put_disk_and_module(disk);
18131800
}
18141801
EXPORT_SYMBOL(blkdev_put);
18151802

0 commit comments

Comments
 (0)