Skip to content

Commit 5e388e9

Browse files
lorddoskiaskdave
authored andcommitted
btrfs: Fix race condition between delayed refs and blockgroup removal
When the delayed refs for a head are all run, eventually cleanup_ref_head is called which (in case of deletion) obtains a reference for the relevant btrfs_space_info struct by querying the bg for the range. This is problematic because when the last extent of a bg is deleted a race window emerges between removal of that bg and the subsequent invocation of cleanup_ref_head. This can result in cache being null and either a null pointer dereference or assertion failure. task: ffff8d04d31ed080 task.stack: ffff9e5dc10cc000 RIP: 0010:assfail.constprop.78+0x18/0x1a [btrfs] RSP: 0018:ffff9e5dc10cfbe8 EFLAGS: 00010292 RAX: 0000000000000044 RBX: 0000000000000000 RCX: 0000000000000000 RDX: ffff8d04ffc1f868 RSI: ffff8d04ffc178c8 RDI: ffff8d04ffc178c8 RBP: ffff8d04d29e5ea0 R08: 00000000000001f0 R09: 0000000000000001 R10: ffff9e5dc0507d58 R11: 0000000000000001 R12: ffff8d04d29e5ea0 R13: ffff8d04d29e5f08 R14: ffff8d04efe29b40 R15: ffff8d04efe203e0 FS: 00007fbf58ead500(0000) GS:ffff8d04ffc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fe6c6975648 CR3: 0000000013b2a000 CR4: 00000000000006f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __btrfs_run_delayed_refs+0x10e7/0x12c0 [btrfs] btrfs_run_delayed_refs+0x68/0x250 [btrfs] btrfs_should_end_transaction+0x42/0x60 [btrfs] btrfs_truncate_inode_items+0xaac/0xfc0 [btrfs] btrfs_evict_inode+0x4c6/0x5c0 [btrfs] evict+0xc6/0x190 do_unlinkat+0x19c/0x300 do_syscall_64+0x74/0x140 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 RIP: 0033:0x7fbf589c57a7 To fix this, introduce a new flag "is_system" to head_ref structs, which is populated at insertion time. This allows to decouple the querying for the spaceinfo from querying the possibly deleted bg. Fixes: d7eae34 ("Btrfs: rework delayed ref total_bytes_pinned accounting") CC: [email protected] # 4.14+ Suggested-by: Omar Sandoval <[email protected]> Signed-off-by: Nikolay Borisov <[email protected]> Reviewed-by: Omar Sandoval <[email protected]> Signed-off-by: David Sterba <[email protected]>
1 parent 92d3217 commit 5e388e9

File tree

3 files changed

+26
-10
lines changed

3 files changed

+26
-10
lines changed

fs/btrfs/delayed-ref.c

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -540,8 +540,10 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
540540
struct btrfs_delayed_ref_head *head_ref,
541541
struct btrfs_qgroup_extent_record *qrecord,
542542
u64 bytenr, u64 num_bytes, u64 ref_root, u64 reserved,
543-
int action, int is_data, int *qrecord_inserted_ret,
543+
int action, int is_data, int is_system,
544+
int *qrecord_inserted_ret,
544545
int *old_ref_mod, int *new_ref_mod)
546+
545547
{
546548
struct btrfs_delayed_ref_head *existing;
547549
struct btrfs_delayed_ref_root *delayed_refs;
@@ -585,6 +587,7 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
585587
head_ref->ref_mod = count_mod;
586588
head_ref->must_insert_reserved = must_insert_reserved;
587589
head_ref->is_data = is_data;
590+
head_ref->is_system = is_system;
588591
head_ref->ref_tree = RB_ROOT;
589592
INIT_LIST_HEAD(&head_ref->ref_add_list);
590593
RB_CLEAR_NODE(&head_ref->href_node);
@@ -772,6 +775,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
772775
struct btrfs_delayed_ref_root *delayed_refs;
773776
struct btrfs_qgroup_extent_record *record = NULL;
774777
int qrecord_inserted;
778+
int is_system = (ref_root == BTRFS_CHUNK_TREE_OBJECTID);
775779

776780
BUG_ON(extent_op && extent_op->is_data);
777781
ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
@@ -800,8 +804,8 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
800804
*/
801805
head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record,
802806
bytenr, num_bytes, 0, 0, action, 0,
803-
&qrecord_inserted, old_ref_mod,
804-
new_ref_mod);
807+
is_system, &qrecord_inserted,
808+
old_ref_mod, new_ref_mod);
805809

806810
add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr,
807811
num_bytes, parent, ref_root, level, action);
@@ -868,7 +872,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
868872
*/
869873
head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record,
870874
bytenr, num_bytes, ref_root, reserved,
871-
action, 1, &qrecord_inserted,
875+
action, 1, 0, &qrecord_inserted,
872876
old_ref_mod, new_ref_mod);
873877

874878
add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr,
@@ -898,9 +902,14 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
898902
delayed_refs = &trans->transaction->delayed_refs;
899903
spin_lock(&delayed_refs->lock);
900904

905+
/*
906+
* extent_ops just modify the flags of an extent and they don't result
907+
* in ref count changes, hence it's safe to pass false/0 for is_system
908+
* argument
909+
*/
901910
add_delayed_ref_head(fs_info, trans, head_ref, NULL, bytenr,
902911
num_bytes, 0, 0, BTRFS_UPDATE_DELAYED_HEAD,
903-
extent_op->is_data, NULL, NULL, NULL);
912+
extent_op->is_data, 0, NULL, NULL, NULL);
904913

905914
spin_unlock(&delayed_refs->lock);
906915
return 0;

fs/btrfs/delayed-ref.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@ struct btrfs_delayed_ref_head {
127127
*/
128128
unsigned int must_insert_reserved:1;
129129
unsigned int is_data:1;
130+
unsigned int is_system:1;
130131
unsigned int processing:1;
131132
};
132133

fs/btrfs/extent-tree.c

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2601,13 +2601,19 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans,
26012601
trace_run_delayed_ref_head(fs_info, head, 0);
26022602

26032603
if (head->total_ref_mod < 0) {
2604-
struct btrfs_block_group_cache *cache;
2604+
struct btrfs_space_info *space_info;
2605+
u64 flags;
26052606

2606-
cache = btrfs_lookup_block_group(fs_info, head->bytenr);
2607-
ASSERT(cache);
2608-
percpu_counter_add(&cache->space_info->total_bytes_pinned,
2607+
if (head->is_data)
2608+
flags = BTRFS_BLOCK_GROUP_DATA;
2609+
else if (head->is_system)
2610+
flags = BTRFS_BLOCK_GROUP_SYSTEM;
2611+
else
2612+
flags = BTRFS_BLOCK_GROUP_METADATA;
2613+
space_info = __find_space_info(fs_info, flags);
2614+
ASSERT(space_info);
2615+
percpu_counter_add(&space_info->total_bytes_pinned,
26092616
-head->num_bytes);
2610-
btrfs_put_block_group(cache);
26112617

26122618
if (head->is_data) {
26132619
spin_lock(&delayed_refs->lock);

0 commit comments

Comments
 (0)