Skip to content

Commit adb86db

Browse files
fdmananakdave
authored andcommitted
btrfs: stop doing excessive space reservation for csum deletion
Currently when reserving space for deleting the csum items for a data extent, when adding or updating a delayed ref head, we determine how many leaves of csum items we can have and then pass that number to the helper btrfs_calc_delayed_ref_bytes(). This helper is used for calculating space for all tree modifications we need when running delayed references, however the amount of space it computes is excessive for deleting csum items because: 1) It uses btrfs_calc_insert_metadata_size() which is excessive because we only need to delete csum items from the csum tree, we don't need to insert any items, so btrfs_calc_metadata_size() is all we need (as it computes space needed to delete an item); 2) If the free space tree is enabled, it doubles the amount of space, which is pointless for csum deletion since we don't need to touch the free space tree or any other tree other than the csum tree. So improve on this by tracking how many csum deletions we have and using a new helper to calculate space for csum deletions (just a wrapper around btrfs_calc_metadata_size() with a comment). This reduces the amount of space we need to reserve for csum deletions by a factor of 4, and it helps reduce the number of times we have to block space reservations and have the reclaim task enter the space flushing algorithm (flush delayed items, flush delayed refs, etc) in order to satisfy tickets. For example this results in a total time decrease when unlinking (or truncating) files with many extents, as we end up having to block on space metadata reservations less often. Example test: $ cat test.sh #!/bin/bash DEV=/dev/nullb0 MNT=/mnt/test umount $DEV &> /dev/null mkfs.btrfs -f $DEV # Use compression to quickly create files with a lot of extents # (each with a size of 128K). mount -o compress=lzo $DEV $MNT # 100G gives at least 983040 extents with a size of 128K. xfs_io -f -c "pwrite -S 0xab -b 1M 0 120G" $MNT/foobar # Flush all delalloc and clear all metadata from memory. umount $MNT mount -o compress=lzo $DEV $MNT start=$(date +%s%N) rm -f $MNT/foobar end=$(date +%s%N) dur=$(( (end - start) / 1000000 )) echo "rm took $dur milliseconds" umount $MNT Before this change rm took: 7504 milliseconds After this change rm took: 6574 milliseconds (-12.4%) Reviewed-by: Josef Bacik <[email protected]> Signed-off-by: Filipe Manana <[email protected]> Signed-off-by: David Sterba <[email protected]>
1 parent b6ea3e6 commit adb86db

File tree

7 files changed

+45
-26
lines changed

7 files changed

+45
-26
lines changed

fs/btrfs/block-group.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1286,7 +1286,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
12861286
/* Once for the lookup reference */
12871287
btrfs_put_block_group(block_group);
12881288
if (remove_rsv)
1289-
btrfs_delayed_refs_rsv_release(fs_info, 1);
1289+
btrfs_delayed_refs_rsv_release(fs_info, 1, 0);
12901290
btrfs_free_path(path);
12911291
return ret;
12921292
}
@@ -2709,7 +2709,7 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
27092709

27102710
/* Already aborted the transaction if it failed. */
27112711
next:
2712-
btrfs_delayed_refs_rsv_release(fs_info, 1);
2712+
btrfs_delayed_refs_rsv_release(fs_info, 1, 0);
27132713
list_del_init(&block_group->bg_list);
27142714
clear_bit(BLOCK_GROUP_FLAG_NEW, &block_group->runtime_flags);
27152715
}
@@ -3370,7 +3370,7 @@ int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans)
33703370
if (should_put)
33713371
btrfs_put_block_group(cache);
33723372
if (drop_reserve)
3373-
btrfs_delayed_refs_rsv_release(fs_info, 1);
3373+
btrfs_delayed_refs_rsv_release(fs_info, 1, 0);
33743374
/*
33753375
* Avoid blocking other tasks for too long. It might even save
33763376
* us from writing caches for block groups that are going to be
@@ -3517,7 +3517,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans)
35173517
/* If its not on the io list, we need to put the block group */
35183518
if (should_put)
35193519
btrfs_put_block_group(cache);
3520-
btrfs_delayed_refs_rsv_release(fs_info, 1);
3520+
btrfs_delayed_refs_rsv_release(fs_info, 1, 0);
35213521
spin_lock(&cur_trans->dirty_bgs_lock);
35223522
}
35233523
spin_unlock(&cur_trans->dirty_bgs_lock);

fs/btrfs/delayed-ref.c

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -57,17 +57,21 @@ bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info)
5757
* Release a ref head's reservation.
5858
*
5959
* @fs_info: the filesystem
60-
* @nr: number of items to drop
60+
* @nr_refs: number of delayed refs to drop
61+
* @nr_csums: number of csum items to drop
6162
*
6263
* Drops the delayed ref head's count from the delayed refs rsv and free any
6364
* excess reservation we had.
6465
*/
65-
void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr)
66+
void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr_refs, int nr_csums)
6667
{
6768
struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv;
68-
const u64 num_bytes = btrfs_calc_delayed_ref_bytes(fs_info, nr);
69+
u64 num_bytes;
6970
u64 released;
7071

72+
num_bytes = btrfs_calc_delayed_ref_bytes(fs_info, nr_refs);
73+
num_bytes += btrfs_calc_delayed_ref_csum_bytes(fs_info, nr_csums);
74+
7175
released = btrfs_block_rsv_release(fs_info, block_rsv, num_bytes, NULL);
7276
if (released)
7377
trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
@@ -77,26 +81,29 @@ void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr)
7781
/*
7882
* Adjust the size of the delayed refs rsv.
7983
*
80-
* This is to be called anytime we may have adjusted trans->delayed_ref_updates,
81-
* it'll calculate the additional size and add it to the delayed_refs_rsv.
84+
* This is to be called anytime we may have adjusted trans->delayed_ref_updates
85+
* or trans->delayed_ref_csum_deletions, it'll calculate the additional size and
86+
* add it to the delayed_refs_rsv.
8287
*/
8388
void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans)
8489
{
8590
struct btrfs_fs_info *fs_info = trans->fs_info;
8691
struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv;
8792
u64 num_bytes;
8893

89-
if (!trans->delayed_ref_updates)
90-
return;
94+
num_bytes = btrfs_calc_delayed_ref_bytes(fs_info, trans->delayed_ref_updates);
95+
num_bytes += btrfs_calc_delayed_ref_csum_bytes(fs_info,
96+
trans->delayed_ref_csum_deletions);
9197

92-
num_bytes = btrfs_calc_delayed_ref_bytes(fs_info,
93-
trans->delayed_ref_updates);
98+
if (num_bytes == 0)
99+
return;
94100

95101
spin_lock(&delayed_rsv->lock);
96102
delayed_rsv->size += num_bytes;
97103
delayed_rsv->full = false;
98104
spin_unlock(&delayed_rsv->lock);
99105
trans->delayed_ref_updates = 0;
106+
trans->delayed_ref_csum_deletions = 0;
100107
}
101108

102109
/*
@@ -434,7 +441,7 @@ static inline void drop_delayed_ref(struct btrfs_fs_info *fs_info,
434441
list_del(&ref->add_list);
435442
btrfs_put_delayed_ref(ref);
436443
atomic_dec(&delayed_refs->num_entries);
437-
btrfs_delayed_refs_rsv_release(fs_info, 1);
444+
btrfs_delayed_refs_rsv_release(fs_info, 1, 0);
438445
}
439446

440447
static bool merge_ref(struct btrfs_fs_info *fs_info,
@@ -710,11 +717,11 @@ static noinline void update_existing_head_ref(struct btrfs_trans_handle *trans,
710717

711718
if (existing->total_ref_mod >= 0 && old_ref_mod < 0) {
712719
delayed_refs->pending_csums -= existing->num_bytes;
713-
btrfs_delayed_refs_rsv_release(fs_info, csum_leaves);
720+
btrfs_delayed_refs_rsv_release(fs_info, 0, csum_leaves);
714721
}
715722
if (existing->total_ref_mod < 0 && old_ref_mod >= 0) {
716723
delayed_refs->pending_csums += existing->num_bytes;
717-
trans->delayed_ref_updates += csum_leaves;
724+
trans->delayed_ref_csum_deletions += csum_leaves;
718725
}
719726
}
720727

@@ -834,7 +841,7 @@ add_delayed_ref_head(struct btrfs_trans_handle *trans,
834841
*/
835842
if (head_ref->is_data && head_ref->ref_mod < 0) {
836843
delayed_refs->pending_csums += head_ref->num_bytes;
837-
trans->delayed_ref_updates +=
844+
trans->delayed_ref_csum_deletions +=
838845
btrfs_csum_bytes_to_leaves(trans->fs_info,
839846
head_ref->num_bytes);
840847
}

fs/btrfs/delayed-ref.h

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,17 @@ static inline u64 btrfs_calc_delayed_ref_bytes(const struct btrfs_fs_info *fs_in
283283
return num_bytes;
284284
}
285285

286+
static inline u64 btrfs_calc_delayed_ref_csum_bytes(const struct btrfs_fs_info *fs_info,
287+
int num_csum_items)
288+
{
289+
/*
290+
* Deleting csum items does not result in new nodes/leaves and does not
291+
* require changing the free space tree, only the csum tree, so this is
292+
* all we need.
293+
*/
294+
return btrfs_calc_metadata_size(fs_info, num_csum_items);
295+
}
296+
286297
static inline void btrfs_init_generic_ref(struct btrfs_ref *generic_ref,
287298
int action, u64 bytenr, u64 len, u64 parent)
288299
{
@@ -407,7 +418,7 @@ struct btrfs_delayed_ref_head *btrfs_select_ref_head(
407418

408419
int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, u64 seq);
409420

410-
void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr);
421+
void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr_refs, int nr_csums);
411422
void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans);
412423
int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info,
413424
enum btrfs_reserve_flush_enum flush);

fs/btrfs/disk-io.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4563,7 +4563,7 @@ static void btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
45634563
list_del(&ref->add_list);
45644564
atomic_dec(&delayed_refs->num_entries);
45654565
btrfs_put_delayed_ref(ref);
4566-
btrfs_delayed_refs_rsv_release(fs_info, 1);
4566+
btrfs_delayed_refs_rsv_release(fs_info, 1, 0);
45674567
}
45684568
if (head->must_insert_reserved)
45694569
pin_bytes = true;
@@ -4761,7 +4761,7 @@ void btrfs_cleanup_dirty_bgs(struct btrfs_transaction *cur_trans,
47614761

47624762
spin_unlock(&cur_trans->dirty_bgs_lock);
47634763
btrfs_put_block_group(cache);
4764-
btrfs_delayed_refs_rsv_release(fs_info, 1);
4764+
btrfs_delayed_refs_rsv_release(fs_info, 1, 0);
47654765
spin_lock(&cur_trans->dirty_bgs_lock);
47664766
}
47674767
spin_unlock(&cur_trans->dirty_bgs_lock);

fs/btrfs/extent-tree.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1824,16 +1824,16 @@ u64 btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
18241824
* to drop the csum leaves for this update from our delayed_refs_rsv.
18251825
*/
18261826
if (head->total_ref_mod < 0 && head->is_data) {
1827-
int nr_items;
1827+
int nr_csums;
18281828

18291829
spin_lock(&delayed_refs->lock);
18301830
delayed_refs->pending_csums -= head->num_bytes;
18311831
spin_unlock(&delayed_refs->lock);
1832-
nr_items = btrfs_csum_bytes_to_leaves(fs_info, head->num_bytes);
1832+
nr_csums = btrfs_csum_bytes_to_leaves(fs_info, head->num_bytes);
18331833

1834-
btrfs_delayed_refs_rsv_release(fs_info, nr_items);
1834+
btrfs_delayed_refs_rsv_release(fs_info, 0, nr_csums);
18351835

1836-
return btrfs_calc_delayed_ref_bytes(fs_info, nr_items);
1836+
return btrfs_calc_delayed_ref_csum_bytes(fs_info, nr_csums);
18371837
}
18381838

18391839
return 0;
@@ -1985,7 +1985,7 @@ static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans,
19851985

19861986
ret = run_one_delayed_ref(trans, ref, extent_op,
19871987
must_insert_reserved);
1988-
btrfs_delayed_refs_rsv_release(fs_info, 1);
1988+
btrfs_delayed_refs_rsv_release(fs_info, 1, 0);
19891989
*bytes_released += btrfs_calc_delayed_ref_bytes(fs_info, 1);
19901990
btrfs_free_delayed_extent_op(extent_op);
19911991
if (ret) {

fs/btrfs/transaction.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2085,7 +2085,7 @@ static void btrfs_cleanup_pending_block_groups(struct btrfs_trans_handle *trans)
20852085
struct btrfs_block_group *block_group, *tmp;
20862086

20872087
list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) {
2088-
btrfs_delayed_refs_rsv_release(fs_info, 1);
2088+
btrfs_delayed_refs_rsv_release(fs_info, 1, 0);
20892089
list_del_init(&block_group->bg_list);
20902090
}
20912091
}

fs/btrfs/transaction.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ struct btrfs_trans_handle {
120120
u64 bytes_reserved;
121121
u64 chunk_bytes_reserved;
122122
unsigned long delayed_ref_updates;
123+
unsigned long delayed_ref_csum_deletions;
123124
struct btrfs_transaction *transaction;
124125
struct btrfs_block_rsv *block_rsv;
125126
struct btrfs_block_rsv *orig_rsv;

0 commit comments

Comments
 (0)