Skip to content

Commit fb235dc

Browse files
Qu Wenruokdave
authored andcommitted
btrfs: qgroup: Move half of the qgroup accounting time out of commit trans
Just as Filipe pointed out, the most time consuming parts of qgroup are btrfs_qgroup_account_extents() and btrfs_qgroup_prepare_account_extents(). Which both call btrfs_find_all_roots() to get old_roots and new_roots ulist. What makes things worse is, we're calling that expensive btrfs_find_all_roots() at transaction committing time with TRANS_STATE_COMMIT_DOING, which will blocks all incoming transaction. Such behavior is necessary for @new_roots search as current btrfs_find_all_roots() can't do it correctly so we do call it just before switch commit roots. However for @old_roots search, it's not necessary as such search is based on commit_root, so it will always be correct and we can move it out of transaction committing. This patch moves the @old_roots search part out of commit_transaction(), so in theory we can half the time qgroup time consumption at commit_transaction(). But please note that, this won't speedup qgroup overall, the total time consumption is still the same, just reduce the performance stall. Cc: Filipe Manana <[email protected]> Signed-off-by: Qu Wenruo <[email protected]> Reviewed-by: Filipe Manana <[email protected]> Signed-off-by: David Sterba <[email protected]>
1 parent 15b3451 commit fb235dc

File tree

3 files changed

+75
-11
lines changed

3 files changed

+75
-11
lines changed

fs/btrfs/delayed-ref.c

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -550,13 +550,14 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
550550
struct btrfs_delayed_ref_node *ref,
551551
struct btrfs_qgroup_extent_record *qrecord,
552552
u64 bytenr, u64 num_bytes, u64 ref_root, u64 reserved,
553-
int action, int is_data)
553+
int action, int is_data, int *qrecord_inserted_ret)
554554
{
555555
struct btrfs_delayed_ref_head *existing;
556556
struct btrfs_delayed_ref_head *head_ref = NULL;
557557
struct btrfs_delayed_ref_root *delayed_refs;
558558
int count_mod = 1;
559559
int must_insert_reserved = 0;
560+
int qrecord_inserted = 0;
560561

561562
/* If reserved is provided, it must be a data extent. */
562563
BUG_ON(!is_data && reserved);
@@ -623,6 +624,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
623624
if(btrfs_qgroup_trace_extent_nolock(fs_info,
624625
delayed_refs, qrecord))
625626
kfree(qrecord);
627+
else
628+
qrecord_inserted = 1;
626629
}
627630

628631
spin_lock_init(&head_ref->lock);
@@ -650,6 +653,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
650653
atomic_inc(&delayed_refs->num_entries);
651654
trans->delayed_ref_updates++;
652655
}
656+
if (qrecord_inserted_ret)
657+
*qrecord_inserted_ret = qrecord_inserted;
653658
return head_ref;
654659
}
655660

@@ -779,6 +784,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
779784
struct btrfs_delayed_ref_head *head_ref;
780785
struct btrfs_delayed_ref_root *delayed_refs;
781786
struct btrfs_qgroup_extent_record *record = NULL;
787+
int qrecord_inserted;
782788

783789
BUG_ON(extent_op && extent_op->is_data);
784790
ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
@@ -806,12 +812,15 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
806812
* the spin lock
807813
*/
808814
head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record,
809-
bytenr, num_bytes, 0, 0, action, 0);
815+
bytenr, num_bytes, 0, 0, action, 0,
816+
&qrecord_inserted);
810817

811818
add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr,
812819
num_bytes, parent, ref_root, level, action);
813820
spin_unlock(&delayed_refs->lock);
814821

822+
if (qrecord_inserted)
823+
return btrfs_qgroup_trace_extent_post(fs_info, record);
815824
return 0;
816825

817826
free_head_ref:
@@ -835,6 +844,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
835844
struct btrfs_delayed_ref_head *head_ref;
836845
struct btrfs_delayed_ref_root *delayed_refs;
837846
struct btrfs_qgroup_extent_record *record = NULL;
847+
int qrecord_inserted;
838848

839849
ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
840850
if (!ref)
@@ -868,13 +878,15 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
868878
*/
869879
head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record,
870880
bytenr, num_bytes, ref_root, reserved,
871-
action, 1);
881+
action, 1, &qrecord_inserted);
872882

873883
add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr,
874884
num_bytes, parent, ref_root, owner, offset,
875885
action);
876886
spin_unlock(&delayed_refs->lock);
877887

888+
if (qrecord_inserted)
889+
return btrfs_qgroup_trace_extent_post(fs_info, record);
878890
return 0;
879891
}
880892

@@ -897,7 +909,7 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
897909

898910
add_delayed_ref_head(fs_info, trans, &head_ref->node, NULL, bytenr,
899911
num_bytes, 0, 0, BTRFS_UPDATE_DELAYED_HEAD,
900-
extent_op->is_data);
912+
extent_op->is_data, NULL);
901913

902914
spin_unlock(&delayed_refs->lock);
903915
return 0;

fs/btrfs/qgroup.c

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1464,8 +1464,9 @@ int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans,
14641464
while (node) {
14651465
record = rb_entry(node, struct btrfs_qgroup_extent_record,
14661466
node);
1467-
ret = btrfs_find_all_roots(NULL, fs_info, record->bytenr, 0,
1468-
&record->old_roots);
1467+
if (WARN_ON(!record->old_roots))
1468+
ret = btrfs_find_all_roots(NULL, fs_info,
1469+
record->bytenr, 0, &record->old_roots);
14691470
if (ret < 0)
14701471
break;
14711472
if (qgroup_to_skip)
@@ -1504,6 +1505,28 @@ int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info,
15041505
return 0;
15051506
}
15061507

1508+
int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
1509+
struct btrfs_qgroup_extent_record *qrecord)
1510+
{
1511+
struct ulist *old_root;
1512+
u64 bytenr = qrecord->bytenr;
1513+
int ret;
1514+
1515+
ret = btrfs_find_all_roots(NULL, fs_info, bytenr, 0, &old_root);
1516+
if (ret < 0)
1517+
return ret;
1518+
1519+
/*
1520+
* Here we don't need to get the lock of
1521+
* trans->transaction->delayed_refs, since inserted qrecord won't
1522+
* be deleted, only qrecord->node may be modified (new qrecord insert)
1523+
*
1524+
* So modifying qrecord->old_roots is safe here
1525+
*/
1526+
qrecord->old_roots = old_root;
1527+
return 0;
1528+
}
1529+
15071530
int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans,
15081531
struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes,
15091532
gfp_t gfp_flag)
@@ -1529,9 +1552,11 @@ int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans,
15291552
spin_lock(&delayed_refs->lock);
15301553
ret = btrfs_qgroup_trace_extent_nolock(fs_info, delayed_refs, record);
15311554
spin_unlock(&delayed_refs->lock);
1532-
if (ret > 0)
1555+
if (ret > 0) {
15331556
kfree(record);
1534-
return 0;
1557+
return 0;
1558+
}
1559+
return btrfs_qgroup_trace_extent_post(fs_info, record);
15351560
}
15361561

15371562
int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,

fs/btrfs/qgroup.h

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -94,9 +94,10 @@ int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans,
9494
struct btrfs_fs_info *fs_info);
9595
/*
9696
* Inform qgroup to trace one dirty extent, its info is recorded in @record.
97-
* So qgroup can account it at commit trans time.
97+
* So qgroup can account it at transaction committing time.
9898
*
99-
* No lock version, caller must acquire delayed ref lock and allocate memory.
99+
* No lock version, caller must acquire delayed ref lock and allocated memory,
100+
* then call btrfs_qgroup_trace_extent_post() after exiting lock context.
100101
*
101102
* Return 0 for success insert
102103
* Return >0 for existing record, caller can free @record safely.
@@ -107,12 +108,38 @@ int btrfs_qgroup_trace_extent_nolock(
107108
struct btrfs_delayed_ref_root *delayed_refs,
108109
struct btrfs_qgroup_extent_record *record);
109110

111+
/*
112+
* Post handler after qgroup_trace_extent_nolock().
113+
*
114+
* NOTE: Current qgroup does the expensive backref walk at transaction
115+
* committing time with TRANS_STATE_COMMIT_DOING, this blocks incoming
116+
* new transaction.
117+
* This is designed to allow btrfs_find_all_roots() to get correct new_roots
118+
* result.
119+
*
120+
* However for old_roots there is no need to do backref walk at that time,
121+
* since we search commit roots to walk backref and result will always be
122+
* correct.
123+
*
124+
* Due to the nature of no lock version, we can't do backref there.
125+
* So we must call btrfs_qgroup_trace_extent_post() after exiting
126+
* spinlock context.
127+
*
128+
* TODO: If we can fix and prove btrfs_find_all_roots() can get correct result
129+
* using current root, then we can move all expensive backref walk out of
130+
* transaction committing, but not now as qgroup accounting will be wrong again.
131+
*/
132+
int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
133+
struct btrfs_qgroup_extent_record *qrecord);
134+
110135
/*
111136
* Inform qgroup to trace one dirty extent, specified by @bytenr and
112137
* @num_bytes.
113138
* So qgroup can account it at commit trans time.
114139
*
115-
* Better encapsulated version.
140+
* Better encapsulated version, with memory allocation and backref walk for
141+
* commit roots.
142+
* So this can sleep.
116143
*
117144
* Return 0 if the operation is done.
118145
* Return <0 for error, like memory allocation failure or invalid parameter

0 commit comments

Comments
 (0)