Skip to content

Commit 75cb379

Browse files
jeffmahoneykdave
authored andcommitted
btrfs: defer adding raid type kobject until after chunk relocation
Any time the first block group of a new type is created, we add a new kobject to sysfs to hold the attributes for that type. Kobject-internal allocations always use GFP_KERNEL, making them prone to fs-reclaim races. While it appears as if this can occur any time a block group is created, the only times the first block group of a new type can be created in memory is at mount and when we create the first new block group during raid conversion. This patch adds a new list to track pending kobject additions and then handles them after we do chunk relocation. Between relocating the target chunk (or forcing allocation of a new chunk in the case of data) and removing the old chunk, we're in a safe place for fs-reclaim to occur. We're holding the volume mutex, which is already held across page faults, and the delete_unused_bgs_mutex, which will only stall the cleaner thread. Signed-off-by: Jeff Mahoney <[email protected]> Reviewed-by: David Sterba <[email protected]> Signed-off-by: David Sterba <[email protected]>
1 parent dc2d300 commit 75cb379

File tree

5 files changed

+62
-20
lines changed

5 files changed

+62
-20
lines changed

fs/btrfs/ctree.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -385,8 +385,9 @@ struct btrfs_dev_replace {
385385

386386
/* For raid type sysfs entries */
387387
struct raid_kobject {
388-
int raid_type;
388+
u64 flags;
389389
struct kobject kobj;
390+
struct list_head list;
390391
};
391392

392393
struct btrfs_space_info {
@@ -940,6 +941,8 @@ struct btrfs_fs_info {
940941
u32 thread_pool_size;
941942

942943
struct kobject *space_info_kobj;
944+
struct list_head pending_raid_kobjs;
945+
spinlock_t pending_raid_kobjs_lock; /* uncontended */
943946

944947
u64 total_pinned;
945948

@@ -2700,6 +2703,7 @@ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr);
27002703
int btrfs_make_block_group(struct btrfs_trans_handle *trans,
27012704
struct btrfs_fs_info *fs_info, u64 bytes_used,
27022705
u64 type, u64 chunk_offset, u64 size);
2706+
void btrfs_add_raid_kobjects(struct btrfs_fs_info *fs_info);
27032707
struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
27042708
struct btrfs_fs_info *fs_info,
27052709
const u64 chunk_offset);

fs/btrfs/disk-io.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2431,6 +2431,8 @@ int open_ctree(struct super_block *sb,
24312431
INIT_LIST_HEAD(&fs_info->delayed_iputs);
24322432
INIT_LIST_HEAD(&fs_info->delalloc_roots);
24332433
INIT_LIST_HEAD(&fs_info->caching_block_groups);
2434+
INIT_LIST_HEAD(&fs_info->pending_raid_kobjs);
2435+
spin_lock_init(&fs_info->pending_raid_kobjs_lock);
24342436
spin_lock_init(&fs_info->delalloc_root_lock);
24352437
spin_lock_init(&fs_info->trans_lock);
24362438
spin_lock_init(&fs_info->fs_roots_radix_lock);

fs/btrfs/extent-tree.c

Lines changed: 42 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -9918,9 +9918,39 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
99189918
return 0;
99199919
}
99209920

9921+
/* link_block_group will queue up kobjects to add when we're reclaim-safe */
9922+
void btrfs_add_raid_kobjects(struct btrfs_fs_info *fs_info)
9923+
{
9924+
struct btrfs_space_info *space_info;
9925+
struct raid_kobject *rkobj;
9926+
LIST_HEAD(list);
9927+
int index;
9928+
int ret = 0;
9929+
9930+
spin_lock(&fs_info->pending_raid_kobjs_lock);
9931+
list_splice_init(&fs_info->pending_raid_kobjs, &list);
9932+
spin_unlock(&fs_info->pending_raid_kobjs_lock);
9933+
9934+
list_for_each_entry(rkobj, &list, list) {
9935+
space_info = __find_space_info(fs_info, rkobj->flags);
9936+
index = btrfs_bg_flags_to_raid_index(rkobj->flags);
9937+
9938+
ret = kobject_add(&rkobj->kobj, &space_info->kobj,
9939+
"%s", get_raid_name(index));
9940+
if (ret) {
9941+
kobject_put(&rkobj->kobj);
9942+
break;
9943+
}
9944+
}
9945+
if (ret)
9946+
btrfs_warn(fs_info,
9947+
"failed to add kobject for block cache, ignoring");
9948+
}
9949+
99219950
static void link_block_group(struct btrfs_block_group_cache *cache)
99229951
{
99239952
struct btrfs_space_info *space_info = cache->space_info;
9953+
struct btrfs_fs_info *fs_info = cache->fs_info;
99249954
int index = btrfs_bg_flags_to_raid_index(cache->flags);
99259955
bool first = false;
99269956

@@ -9931,27 +9961,20 @@ static void link_block_group(struct btrfs_block_group_cache *cache)
99319961
up_write(&space_info->groups_sem);
99329962

99339963
if (first) {
9934-
struct raid_kobject *rkobj;
9935-
int ret;
9936-
9937-
rkobj = kzalloc(sizeof(*rkobj), GFP_NOFS);
9938-
if (!rkobj)
9939-
goto out_err;
9940-
rkobj->raid_type = index;
9941-
kobject_init(&rkobj->kobj, &btrfs_raid_ktype);
9942-
ret = kobject_add(&rkobj->kobj, &space_info->kobj,
9943-
"%s", get_raid_name(index));
9944-
if (ret) {
9945-
kobject_put(&rkobj->kobj);
9946-
goto out_err;
9964+
struct raid_kobject *rkobj = kzalloc(sizeof(*rkobj), GFP_NOFS);
9965+
if (!rkobj) {
9966+
btrfs_warn(cache->fs_info,
9967+
"couldn't alloc memory for raid level kobject");
9968+
return;
99479969
}
9970+
rkobj->flags = cache->flags;
9971+
kobject_init(&rkobj->kobj, &btrfs_raid_ktype);
9972+
9973+
spin_lock(&fs_info->pending_raid_kobjs_lock);
9974+
list_add_tail(&rkobj->list, &fs_info->pending_raid_kobjs);
9975+
spin_unlock(&fs_info->pending_raid_kobjs_lock);
99489976
space_info->block_group_kobjs[index] = &rkobj->kobj;
99499977
}
9950-
9951-
return;
9952-
out_err:
9953-
btrfs_warn(cache->fs_info,
9954-
"failed to add kobject for block cache, ignoring");
99559978
}
99569979

99579980
static struct btrfs_block_group_cache *
@@ -10167,6 +10190,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
1016710190
inc_block_group_ro(cache, 1);
1016810191
}
1016910192

10193+
btrfs_add_raid_kobjects(info);
1017010194
init_global_block_rsv(info);
1017110195
ret = 0;
1017210196
error:

fs/btrfs/sysfs.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ static ssize_t raid_bytes_show(struct kobject *kobj,
272272
{
273273
struct btrfs_space_info *sinfo = to_space_info(kobj->parent);
274274
struct btrfs_block_group_cache *block_group;
275-
int index = to_raid_kobj(kobj)->raid_type;
275+
int index = btrfs_bg_flags_to_raid_index(to_raid_kobj(kobj)->flags);
276276
u64 val = 0;
277277

278278
down_read(&sinfo->groups_sem);

fs/btrfs/volumes.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3003,6 +3003,16 @@ static int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
30033003
if (ret)
30043004
return ret;
30053005

3006+
/*
3007+
* We add the kobjects here (and after forcing data chunk creation)
3008+
* since relocation is the only place we'll create chunks of a new
3009+
* type at runtime. The only place where we'll remove the last
3010+
* chunk of a type is the call immediately below this one. Even
3011+
* so, we're protected against races with the cleaner thread since
3012+
* we're covered by the delete_unused_bgs_mutex.
3013+
*/
3014+
btrfs_add_raid_kobjects(fs_info);
3015+
30063016
trans = btrfs_start_trans_remove_block_group(root->fs_info,
30073017
chunk_offset);
30083018
if (IS_ERR(trans)) {
@@ -3130,6 +3140,8 @@ static int btrfs_may_alloc_data_chunk(struct btrfs_fs_info *fs_info,
31303140
if (ret < 0)
31313141
return ret;
31323142

3143+
btrfs_add_raid_kobjects(fs_info);
3144+
31333145
return 1;
31343146
}
31353147
}

0 commit comments

Comments
 (0)