Skip to content

Commit 393f646

Browse files
naotakdave
authored andcommitted
btrfs: zoned: finish least available block group on data bg allocation
When we run out of active zones and no sufficient space is left in any block groups, we need to finish one block group to make room to activate a new block group. However, we cannot do this for metadata block groups because we can cause a deadlock by waiting for a running transaction commit. So, do that only for a data block group. Furthermore, the block group to be finished has two requirements. First, the block group must not have reserved bytes left. Having reserved bytes means we have an allocated region but did not yet send bios for it. If that region is allocated by the thread calling btrfs_zone_finish(), it results in a deadlock. Second, the block group to be finished must not be a SYSTEM block group. Finishing a SYSTEM block group easily breaks further chunk allocation by nullifying the SYSTEM free space. In a certain case, we cannot find any zone finish candidate or btrfs_zone_finish() may fail. In that case, we fall back to split the allocation bytes and fill the last spaces left in the block groups. CC: [email protected] # 5.16+ Fixes: afba2bc ("btrfs: zoned: implement active zone tracking") Reviewed-by: Johannes Thumshirn <[email protected]> Signed-off-by: Naohiro Aota <[email protected]> Signed-off-by: David Sterba <[email protected]>
1 parent bb9950d commit 393f646

File tree

3 files changed

+87
-10
lines changed

3 files changed

+87
-10
lines changed

fs/btrfs/extent-tree.c

Lines changed: 40 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3965,23 +3965,53 @@ static void found_extent(struct find_free_extent_ctl *ffe_ctl,
39653965
}
39663966
}
39673967

3968+
static int can_allocate_chunk_zoned(struct btrfs_fs_info *fs_info,
3969+
struct find_free_extent_ctl *ffe_ctl)
3970+
{
3971+
/* If we can activate new zone, just allocate a chunk and use it */
3972+
if (btrfs_can_activate_zone(fs_info->fs_devices, ffe_ctl->flags))
3973+
return 0;
3974+
3975+
/*
3976+
* We already reached the max active zones. Try to finish one block
3977+
* group to make a room for a new block group. This is only possible
3978+
* for a data block group because btrfs_zone_finish() may need to wait
3979+
* for a running transaction which can cause a deadlock for metadata
3980+
* allocation.
3981+
*/
3982+
if (ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA) {
3983+
int ret = btrfs_zone_finish_one_bg(fs_info);
3984+
3985+
if (ret == 1)
3986+
return 0;
3987+
else if (ret < 0)
3988+
return ret;
3989+
}
3990+
3991+
/*
3992+
* If we have enough free space left in an already active block group
3993+
* and we can't activate any other zone now, do not allow allocating a
3994+
* new chunk and let find_free_extent() retry with a smaller size.
3995+
*/
3996+
if (ffe_ctl->max_extent_size >= ffe_ctl->min_alloc_size)
3997+
return -ENOSPC;
3998+
3999+
/*
4000+
* We cannot activate a new block group and no enough space left in any
4001+
* block groups. So, allocating a new block group may not help. But,
4002+
* there is nothing to do anyway, so let's go with it.
4003+
*/
4004+
return 0;
4005+
}
4006+
39684007
static int can_allocate_chunk(struct btrfs_fs_info *fs_info,
39694008
struct find_free_extent_ctl *ffe_ctl)
39704009
{
39714010
switch (ffe_ctl->policy) {
39724011
case BTRFS_EXTENT_ALLOC_CLUSTERED:
39734012
return 0;
39744013
case BTRFS_EXTENT_ALLOC_ZONED:
3975-
/*
3976-
* If we have enough free space left in an already
3977-
* active block group and we can't activate any other
3978-
* zone now, do not allow allocating a new chunk and
3979-
* let find_free_extent() retry with a smaller size.
3980-
*/
3981-
if (ffe_ctl->max_extent_size >= ffe_ctl->min_alloc_size &&
3982-
!btrfs_can_activate_zone(fs_info->fs_devices, ffe_ctl->flags))
3983-
return -ENOSPC;
3984-
return 0;
4014+
return can_allocate_chunk_zoned(fs_info, ffe_ctl);
39854015
default:
39864016
BUG();
39874017
}

fs/btrfs/zoned.c

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2180,3 +2180,43 @@ void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logica
21802180
spin_unlock(&block_group->lock);
21812181
btrfs_put_block_group(block_group);
21822182
}
2183+
2184+
int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info)
2185+
{
2186+
struct btrfs_block_group *block_group;
2187+
struct btrfs_block_group *min_bg = NULL;
2188+
u64 min_avail = U64_MAX;
2189+
int ret;
2190+
2191+
spin_lock(&fs_info->zone_active_bgs_lock);
2192+
list_for_each_entry(block_group, &fs_info->zone_active_bgs,
2193+
active_bg_list) {
2194+
u64 avail;
2195+
2196+
spin_lock(&block_group->lock);
2197+
if (block_group->reserved ||
2198+
(block_group->flags & BTRFS_BLOCK_GROUP_SYSTEM)) {
2199+
spin_unlock(&block_group->lock);
2200+
continue;
2201+
}
2202+
2203+
avail = block_group->zone_capacity - block_group->alloc_offset;
2204+
if (min_avail > avail) {
2205+
if (min_bg)
2206+
btrfs_put_block_group(min_bg);
2207+
min_bg = block_group;
2208+
min_avail = avail;
2209+
btrfs_get_block_group(min_bg);
2210+
}
2211+
spin_unlock(&block_group->lock);
2212+
}
2213+
spin_unlock(&fs_info->zone_active_bgs_lock);
2214+
2215+
if (!min_bg)
2216+
return 0;
2217+
2218+
ret = btrfs_zone_finish(min_bg);
2219+
btrfs_put_block_group(min_bg);
2220+
2221+
return ret < 0 ? ret : 1;
2222+
}

fs/btrfs/zoned.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info);
8080
bool btrfs_zoned_should_reclaim(struct btrfs_fs_info *fs_info);
8181
void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logical,
8282
u64 length);
83+
int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info);
8384
#else /* CONFIG_BLK_DEV_ZONED */
8485
static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
8586
struct blk_zone *zone)
@@ -249,6 +250,12 @@ static inline bool btrfs_zoned_should_reclaim(struct btrfs_fs_info *fs_info)
249250

250251
static inline void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info,
251252
u64 logical, u64 length) { }
253+
254+
static inline int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info)
255+
{
256+
return 1;
257+
}
258+
252259
#endif
253260

254261
static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)

0 commit comments

Comments
 (0)