Skip to content

Commit 3ee56a5

Browse files
fdmananakdave
authored andcommitted
btrfs: reserve space for delayed refs on a per ref basis
Currently when reserving space for delayed refs we do it on a per ref head basis. This is generally enough because most back refs for an extent end up being inlined in the extent item - with the default leaf size of 16K we can have at most 33 inline back refs (this is calculated by the macro BTRFS_MAX_EXTENT_ITEM_SIZE()). The amount of bytes reserved for each ref head is given by btrfs_calc_delayed_ref_bytes(), which basically corresponds to a single path for insertion into the extent tree plus another path for insertion into the free space tree if it's enabled. However if we have reached the limit of inline refs or we have a mix of inline and non-inline refs, then we will need to insert a non-inline ref and update the existing extent item to update the total number of references for the extent. This implies we need reserved space for two insertion paths in the extent tree, but we only reserved for one path. The extent item and the non-inline ref item may be located in different leaves, or even if they are located in the same leaf, after updating the extent item and before inserting the non-inline ref item, the extent buffers in the btree path may have been written (due to memory pressure for e.g.), in which case we need to COW the entire path again. In this case since we have not reserved enough space for the delayed refs block reserve, we will use the global block reserve. If we are in a situation where the fs has no more unallocated space enough to allocate a new metadata block group and available space in the existing metadata block groups is close to the maximum size of the global block reserve (512M), we may end up consuming too much of the free metadata space to the point where we can't commit any future transaction because it will fail, with -ENOSPC, during its commit when trying to allocate an extent for some COW operation (running delayed refs generated by running delayed refs or COWing the root tree's root node at commit_cowonly_roots() for example). Such dramatic scenario can happen if we have many delayed refs that require the insertion of non-inline ref items, due to too many reflinks or snapshots. We also have situations where we use the global block reserve because we could not in advance know that we will need space to update some trees (block group creation for example), so this all adds up to increase the chances of exhausting the global block reserve and making any future transaction commit to fail with -ENOSPC and turn the fs into RO mode, or fail the mount operation in case the mount needs to start and commit a transaction, such as when we have orphans to cleanup for example - such case was reported and hit by someone running a SLE (SUSE Linux Enterprise) distribution for example - where the fs had no more unallocated space that could be used to allocate a new metadata block group, and the available metadata space was about 1.5M, not enough to commit a transaction to cleanup an orphan inode (or do relocation of data block groups that were far from being full). So reserve space for delayed refs by individual refs and not by ref heads, as we may need to COW multiple extent tree paths due to non-inline ref items. Reviewed-by: Josef Bacik <[email protected]> Signed-off-by: Filipe Manana <[email protected]> Signed-off-by: David Sterba <[email protected]>
1 parent 8a526c4 commit 3ee56a5

File tree

3 files changed

+38
-24
lines changed

3 files changed

+38
-24
lines changed

fs/btrfs/delayed-ref.c

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -422,7 +422,8 @@ int btrfs_delayed_ref_lock(struct btrfs_delayed_ref_root *delayed_refs,
422422
return 0;
423423
}
424424

425-
static inline void drop_delayed_ref(struct btrfs_delayed_ref_root *delayed_refs,
425+
static inline void drop_delayed_ref(struct btrfs_fs_info *fs_info,
426+
struct btrfs_delayed_ref_root *delayed_refs,
426427
struct btrfs_delayed_ref_head *head,
427428
struct btrfs_delayed_ref_node *ref)
428429
{
@@ -433,9 +434,11 @@ static inline void drop_delayed_ref(struct btrfs_delayed_ref_root *delayed_refs,
433434
list_del(&ref->add_list);
434435
btrfs_put_delayed_ref(ref);
435436
atomic_dec(&delayed_refs->num_entries);
437+
btrfs_delayed_refs_rsv_release(fs_info, 1);
436438
}
437439

438-
static bool merge_ref(struct btrfs_delayed_ref_root *delayed_refs,
440+
static bool merge_ref(struct btrfs_fs_info *fs_info,
441+
struct btrfs_delayed_ref_root *delayed_refs,
439442
struct btrfs_delayed_ref_head *head,
440443
struct btrfs_delayed_ref_node *ref,
441444
u64 seq)
@@ -464,10 +467,10 @@ static bool merge_ref(struct btrfs_delayed_ref_root *delayed_refs,
464467
mod = -next->ref_mod;
465468
}
466469

467-
drop_delayed_ref(delayed_refs, head, next);
470+
drop_delayed_ref(fs_info, delayed_refs, head, next);
468471
ref->ref_mod += mod;
469472
if (ref->ref_mod == 0) {
470-
drop_delayed_ref(delayed_refs, head, ref);
473+
drop_delayed_ref(fs_info, delayed_refs, head, ref);
471474
done = true;
472475
} else {
473476
/*
@@ -505,7 +508,7 @@ void btrfs_merge_delayed_refs(struct btrfs_fs_info *fs_info,
505508
ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
506509
if (seq && ref->seq >= seq)
507510
continue;
508-
if (merge_ref(delayed_refs, head, ref, seq))
511+
if (merge_ref(fs_info, delayed_refs, head, ref, seq))
509512
goto again;
510513
}
511514
}
@@ -584,10 +587,11 @@ void btrfs_delete_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
584587
* Return true if the ref was merged into an existing one (and therefore can be
585588
* freed by the caller).
586589
*/
587-
static bool insert_delayed_ref(struct btrfs_delayed_ref_root *root,
590+
static bool insert_delayed_ref(struct btrfs_trans_handle *trans,
588591
struct btrfs_delayed_ref_head *href,
589592
struct btrfs_delayed_ref_node *ref)
590593
{
594+
struct btrfs_delayed_ref_root *root = &trans->transaction->delayed_refs;
591595
struct btrfs_delayed_ref_node *exist;
592596
int mod;
593597

@@ -598,6 +602,7 @@ static bool insert_delayed_ref(struct btrfs_delayed_ref_root *root,
598602
list_add_tail(&ref->add_list, &href->ref_add_list);
599603
atomic_inc(&root->num_entries);
600604
spin_unlock(&href->lock);
605+
trans->delayed_ref_updates++;
601606
return false;
602607
}
603608

@@ -626,7 +631,7 @@ static bool insert_delayed_ref(struct btrfs_delayed_ref_root *root,
626631

627632
/* remove existing tail if its ref_mod is zero */
628633
if (exist->ref_mod == 0)
629-
drop_delayed_ref(root, href, exist);
634+
drop_delayed_ref(trans->fs_info, root, href, exist);
630635
spin_unlock(&href->lock);
631636
return true;
632637
}
@@ -695,6 +700,8 @@ static noinline void update_existing_head_ref(struct btrfs_trans_handle *trans,
695700
/*
696701
* If we are going to from a positive ref mod to a negative or vice
697702
* versa we need to make sure to adjust pending_csums accordingly.
703+
* We reserve bytes for csum deletion when adding or updating a ref head
704+
* see add_delayed_ref_head() for more details.
698705
*/
699706
if (existing->is_data) {
700707
u64 csum_leaves =
@@ -819,6 +826,12 @@ add_delayed_ref_head(struct btrfs_trans_handle *trans,
819826
kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
820827
head_ref = existing;
821828
} else {
829+
/*
830+
* We reserve the amount of bytes needed to delete csums when
831+
* adding the ref head and not when adding individual drop refs
832+
* since the csum items are deleted only after running the last
833+
* delayed drop ref (the data extent's ref count drops to 0).
834+
*/
822835
if (head_ref->is_data && head_ref->ref_mod < 0) {
823836
delayed_refs->pending_csums += head_ref->num_bytes;
824837
trans->delayed_ref_updates +=
@@ -828,7 +841,6 @@ add_delayed_ref_head(struct btrfs_trans_handle *trans,
828841
delayed_refs->num_heads++;
829842
delayed_refs->num_heads_ready++;
830843
atomic_inc(&delayed_refs->num_entries);
831-
trans->delayed_ref_updates++;
832844
}
833845
if (qrecord_inserted_ret)
834846
*qrecord_inserted_ret = qrecord_inserted;
@@ -958,7 +970,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
958970
head_ref = add_delayed_ref_head(trans, head_ref, record,
959971
action, &qrecord_inserted);
960972

961-
merged = insert_delayed_ref(delayed_refs, head_ref, &ref->node);
973+
merged = insert_delayed_ref(trans, head_ref, &ref->node);
962974
spin_unlock(&delayed_refs->lock);
963975

964976
/*
@@ -1050,7 +1062,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
10501062
head_ref = add_delayed_ref_head(trans, head_ref, record,
10511063
action, &qrecord_inserted);
10521064

1053-
merged = insert_delayed_ref(delayed_refs, head_ref, &ref->node);
1065+
merged = insert_delayed_ref(trans, head_ref, &ref->node);
10541066
spin_unlock(&delayed_refs->lock);
10551067

10561068
/*

fs/btrfs/disk-io.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4563,6 +4563,7 @@ static void btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
45634563
list_del(&ref->add_list);
45644564
atomic_dec(&delayed_refs->num_entries);
45654565
btrfs_put_delayed_ref(ref);
4566+
btrfs_delayed_refs_rsv_release(fs_info, 1);
45664567
}
45674568
if (head->must_insert_reserved)
45684569
pin_bytes = true;

fs/btrfs/extent-tree.c

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1819,22 +1819,24 @@ u64 btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
18191819
struct btrfs_delayed_ref_root *delayed_refs,
18201820
struct btrfs_delayed_ref_head *head)
18211821
{
1822-
int nr_items = 1; /* Dropping this ref head update. */
1823-
18241822
/*
18251823
* We had csum deletions accounted for in our delayed refs rsv, we need
18261824
* to drop the csum leaves for this update from our delayed_refs_rsv.
18271825
*/
18281826
if (head->total_ref_mod < 0 && head->is_data) {
1827+
int nr_items;
1828+
18291829
spin_lock(&delayed_refs->lock);
18301830
delayed_refs->pending_csums -= head->num_bytes;
18311831
spin_unlock(&delayed_refs->lock);
1832-
nr_items += btrfs_csum_bytes_to_leaves(fs_info, head->num_bytes);
1833-
}
1832+
nr_items = btrfs_csum_bytes_to_leaves(fs_info, head->num_bytes);
1833+
1834+
btrfs_delayed_refs_rsv_release(fs_info, nr_items);
18341835

1835-
btrfs_delayed_refs_rsv_release(fs_info, nr_items);
1836+
return btrfs_calc_delayed_ref_bytes(fs_info, nr_items);
1837+
}
18361838

1837-
return btrfs_calc_delayed_ref_bytes(fs_info, nr_items);
1839+
return 0;
18381840
}
18391841

18401842
static int cleanup_ref_head(struct btrfs_trans_handle *trans,
@@ -1884,7 +1886,7 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans,
18841886
}
18851887
}
18861888

1887-
*bytes_released = btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head);
1889+
*bytes_released += btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head);
18881890

18891891
trace_run_delayed_ref_head(fs_info, head, 0);
18901892
btrfs_delayed_ref_unlock(head);
@@ -1926,7 +1928,8 @@ static struct btrfs_delayed_ref_head *btrfs_obtain_ref_head(
19261928
}
19271929

19281930
static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans,
1929-
struct btrfs_delayed_ref_head *locked_ref)
1931+
struct btrfs_delayed_ref_head *locked_ref,
1932+
u64 *bytes_released)
19301933
{
19311934
struct btrfs_fs_info *fs_info = trans->fs_info;
19321935
struct btrfs_delayed_ref_root *delayed_refs;
@@ -1982,7 +1985,8 @@ static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans,
19821985

19831986
ret = run_one_delayed_ref(trans, ref, extent_op,
19841987
must_insert_reserved);
1985-
1988+
btrfs_delayed_refs_rsv_release(fs_info, 1);
1989+
*bytes_released += btrfs_calc_delayed_ref_bytes(fs_info, 1);
19861990
btrfs_free_delayed_extent_op(extent_op);
19871991
if (ret) {
19881992
unselect_delayed_ref_head(delayed_refs, locked_ref);
@@ -2048,22 +2052,19 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
20482052
spin_lock(&locked_ref->lock);
20492053
btrfs_merge_delayed_refs(fs_info, delayed_refs, locked_ref);
20502054

2051-
ret = btrfs_run_delayed_refs_for_head(trans, locked_ref);
2055+
ret = btrfs_run_delayed_refs_for_head(trans, locked_ref, &bytes_processed);
20522056
if (ret < 0 && ret != -EAGAIN) {
20532057
/*
20542058
* Error, btrfs_run_delayed_refs_for_head already
20552059
* unlocked everything so just bail out
20562060
*/
20572061
return ret;
20582062
} else if (!ret) {
2059-
u64 bytes_released = 0;
2060-
20612063
/*
20622064
* Success, perform the usual cleanup of a processed
20632065
* head
20642066
*/
2065-
ret = cleanup_ref_head(trans, locked_ref, &bytes_released);
2066-
bytes_processed += bytes_released;
2067+
ret = cleanup_ref_head(trans, locked_ref, &bytes_processed);
20672068
if (ret > 0 ) {
20682069
/* We dropped our lock, we need to loop. */
20692070
ret = 0;

0 commit comments

Comments
 (0)