Skip to content

Commit 3083ee2

Browse files
Josef Bacikchrismason-xx
authored andcommitted
Btrfs: introduce free_extent_buffer_stale
Because btrfs cow's we can end up with extent buffers that are no longer necessary just sitting around in memory. So instead of evicting these pages, we could end up evicting things we actually care about. Thus we have free_extent_buffer_stale for use when we are freeing tree blocks. This will make it so that the ref for the eb being in the radix tree is dropped as soon as possible and then is freed when the refcount hits 0 instead of waiting to be released by releasepage. Thanks, Signed-off-by: Josef Bacik <[email protected]>
1 parent 115391d commit 3083ee2

File tree

5 files changed

+201
-60
lines changed

5 files changed

+201
-60
lines changed

fs/btrfs/ctree.c

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -156,10 +156,23 @@ struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
156156
{
157157
struct extent_buffer *eb;
158158

159-
rcu_read_lock();
160-
eb = rcu_dereference(root->node);
161-
extent_buffer_get(eb);
162-
rcu_read_unlock();
159+
while (1) {
160+
rcu_read_lock();
161+
eb = rcu_dereference(root->node);
162+
163+
/*
164+
* RCU really hurts here, we could free up the root node because
165+
* it was cow'ed but we may not get the new root node yet so do
166+
* the inc_not_zero dance and if it doesn't work then
167+
* synchronize_rcu and try again.
168+
*/
169+
if (atomic_inc_not_zero(&eb->refs)) {
170+
rcu_read_unlock();
171+
break;
172+
}
173+
rcu_read_unlock();
174+
synchronize_rcu();
175+
}
163176
return eb;
164177
}
165178

@@ -504,7 +517,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
504517
}
505518
if (unlock_orig)
506519
btrfs_tree_unlock(buf);
507-
free_extent_buffer(buf);
520+
free_extent_buffer_stale(buf);
508521
btrfs_mark_buffer_dirty(cow);
509522
*cow_ret = cow;
510523
return 0;
@@ -959,7 +972,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
959972
root_sub_used(root, mid->len);
960973
btrfs_free_tree_block(trans, root, mid, 0, 1, 0);
961974
/* once for the root ptr */
962-
free_extent_buffer(mid);
975+
free_extent_buffer_stale(mid);
963976
return 0;
964977
}
965978
if (btrfs_header_nritems(mid) >
@@ -1016,7 +1029,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
10161029
ret = wret;
10171030
root_sub_used(root, right->len);
10181031
btrfs_free_tree_block(trans, root, right, 0, 1, 0);
1019-
free_extent_buffer(right);
1032+
free_extent_buffer_stale(right);
10201033
right = NULL;
10211034
} else {
10221035
struct btrfs_disk_key right_key;
@@ -1056,7 +1069,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
10561069
ret = wret;
10571070
root_sub_used(root, mid->len);
10581071
btrfs_free_tree_block(trans, root, mid, 0, 1, 0);
1059-
free_extent_buffer(mid);
1072+
free_extent_buffer_stale(mid);
10601073
mid = NULL;
10611074
} else {
10621075
/* update the parent key to reflect our changes */
@@ -3781,7 +3794,9 @@ static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
37813794

37823795
root_sub_used(root, leaf->len);
37833796

3797+
extent_buffer_get(leaf);
37843798
btrfs_free_tree_block(trans, root, leaf, 0, 1, 0);
3799+
free_extent_buffer_stale(leaf);
37853800
return 0;
37863801
}
37873802
/*

fs/btrfs/disk-io.c

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -923,28 +923,16 @@ static int btree_readpage(struct file *file, struct page *page)
923923

924924
static int btree_releasepage(struct page *page, gfp_t gfp_flags)
925925
{
926-
struct extent_map_tree *map;
927-
struct extent_io_tree *tree;
928-
int ret;
929-
930926
if (PageWriteback(page) || PageDirty(page))
931927
return 0;
932-
933-
tree = &BTRFS_I(page->mapping->host)->io_tree;
934-
map = &BTRFS_I(page->mapping->host)->extent_tree;
935-
936928
/*
937929
* We need to mask out eg. __GFP_HIGHMEM and __GFP_DMA32 as we're doing
938930
* slab allocation from alloc_extent_state down the callchain where
939931
* it'd hit a BUG_ON as those flags are not allowed.
940932
*/
941933
gfp_flags &= ~GFP_SLAB_BUG_MASK;
942934

943-
ret = try_release_extent_state(map, tree, page, gfp_flags);
944-
if (!ret)
945-
return 0;
946-
947-
return try_release_extent_buffer(tree, page);
935+
return try_release_extent_buffer(page, gfp_flags);
948936
}
949937

950938
static void btree_invalidatepage(struct page *page, unsigned long offset)

fs/btrfs/extent-tree.c

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5018,10 +5018,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
50185018
if (is_data) {
50195019
ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
50205020
BUG_ON(ret);
5021-
} else {
5022-
invalidate_mapping_pages(info->btree_inode->i_mapping,
5023-
bytenr >> PAGE_CACHE_SHIFT,
5024-
(bytenr + num_bytes - 1) >> PAGE_CACHE_SHIFT);
50255021
}
50265022

50275023
ret = update_block_group(trans, root, bytenr, num_bytes, 0);
@@ -6022,6 +6018,7 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
60226018
btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
60236019
btrfs_tree_lock(buf);
60246020
clean_tree_block(trans, root, buf);
6021+
clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
60256022

60266023
btrfs_set_lock_blocking(buf);
60276024
btrfs_set_buffer_uptodate(buf);

fs/btrfs/extent_io.c

Lines changed: 171 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -3607,6 +3607,7 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
36073607
list_add(&eb->leak_list, &buffers);
36083608
spin_unlock_irqrestore(&leak_lock, flags);
36093609
#endif
3610+
spin_lock_init(&eb->refs_lock);
36103611
atomic_set(&eb->refs, 1);
36113612
atomic_set(&eb->pages_reading, 0);
36123613

@@ -3654,6 +3655,8 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
36543655
*/
36553656
if (PagePrivate(page) &&
36563657
page->private == (unsigned long)eb) {
3658+
BUG_ON(PageDirty(page));
3659+
BUG_ON(PageWriteback(page));
36573660
/*
36583661
* We need to make sure we haven't be attached
36593662
* to a new eb.
@@ -3763,7 +3766,6 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
37633766
if (!atomic_inc_not_zero(&exists->refs)) {
37643767
spin_unlock(&tree->buffer_lock);
37653768
radix_tree_preload_end();
3766-
synchronize_rcu();
37673769
exists = NULL;
37683770
goto again;
37693771
}
@@ -3772,7 +3774,10 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
37723774
goto free_eb;
37733775
}
37743776
/* add one reference for the tree */
3777+
spin_lock(&eb->refs_lock);
37753778
atomic_inc(&eb->refs);
3779+
set_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags);
3780+
spin_unlock(&eb->refs_lock);
37763781
spin_unlock(&tree->buffer_lock);
37773782
radix_tree_preload_end();
37783783

@@ -3823,15 +3828,143 @@ struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
38233828
return NULL;
38243829
}
38253830

3831+
static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
3832+
{
3833+
struct extent_buffer *eb =
3834+
container_of(head, struct extent_buffer, rcu_head);
3835+
3836+
__free_extent_buffer(eb);
3837+
}
3838+
3839+
static int extent_buffer_under_io(struct extent_buffer *eb,
3840+
struct page *locked_page)
3841+
{
3842+
unsigned long num_pages, i;
3843+
3844+
num_pages = num_extent_pages(eb->start, eb->len);
3845+
for (i = 0; i < num_pages; i++) {
3846+
struct page *page = eb->pages[i];
3847+
int need_unlock = 0;
3848+
3849+
if (!page)
3850+
continue;
3851+
3852+
if (page != locked_page) {
3853+
if (!trylock_page(page))
3854+
return 1;
3855+
need_unlock = 1;
3856+
}
3857+
3858+
if (PageDirty(page) || PageWriteback(page)) {
3859+
if (need_unlock)
3860+
unlock_page(page);
3861+
return 1;
3862+
}
3863+
if (need_unlock)
3864+
unlock_page(page);
3865+
}
3866+
3867+
return 0;
3868+
}
3869+
3870+
/* Expects to have eb->eb_lock already held */
3871+
static void release_extent_buffer(struct extent_buffer *eb, gfp_t mask)
3872+
{
3873+
WARN_ON(atomic_read(&eb->refs) == 0);
3874+
if (atomic_dec_and_test(&eb->refs)) {
3875+
struct extent_io_tree *tree = eb->tree;
3876+
int ret;
3877+
3878+
spin_unlock(&eb->refs_lock);
3879+
3880+
might_sleep_if(mask & __GFP_WAIT);
3881+
ret = clear_extent_bit(tree, eb->start,
3882+
eb->start + eb->len - 1, -1, 0, 0,
3883+
NULL, mask);
3884+
if (ret < 0) {
3885+
unsigned long num_pages, i;
3886+
3887+
num_pages = num_extent_pages(eb->start, eb->len);
3888+
/*
3889+
* We failed to clear the state bits which likely means
3890+
* ENOMEM, so just re-up the eb ref and continue, we
3891+
* will get freed later on via releasepage or something
3892+
* else and will be ok.
3893+
*/
3894+
spin_lock(&eb->tree->mapping->private_lock);
3895+
spin_lock(&eb->refs_lock);
3896+
set_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags);
3897+
atomic_inc(&eb->refs);
3898+
3899+
/*
3900+
* We may have started to reclaim the pages for a newly
3901+
* allocated eb, make sure we own all of them again.
3902+
*/
3903+
for (i = 0; i < num_pages; i++) {
3904+
struct page *page = eb->pages[i];
3905+
3906+
if (!page) {
3907+
WARN_ON(1);
3908+
continue;
3909+
}
3910+
3911+
BUG_ON(!PagePrivate(page));
3912+
if (page->private != (unsigned long)eb) {
3913+
ClearPagePrivate(page);
3914+
page_cache_release(page);
3915+
attach_extent_buffer_page(eb, page);
3916+
}
3917+
}
3918+
spin_unlock(&eb->refs_lock);
3919+
spin_unlock(&eb->tree->mapping->private_lock);
3920+
return;
3921+
}
3922+
3923+
spin_lock(&tree->buffer_lock);
3924+
radix_tree_delete(&tree->buffer,
3925+
eb->start >> PAGE_CACHE_SHIFT);
3926+
spin_unlock(&tree->buffer_lock);
3927+
3928+
/* Should be safe to release our pages at this point */
3929+
btrfs_release_extent_buffer_page(eb, 0);
3930+
3931+
call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
3932+
return;
3933+
}
3934+
spin_unlock(&eb->refs_lock);
3935+
}
3936+
38263937
void free_extent_buffer(struct extent_buffer *eb)
38273938
{
38283939
if (!eb)
38293940
return;
38303941

3831-
if (!atomic_dec_and_test(&eb->refs))
3942+
spin_lock(&eb->refs_lock);
3943+
if (atomic_read(&eb->refs) == 2 &&
3944+
test_bit(EXTENT_BUFFER_STALE, &eb->bflags) &&
3945+
!extent_buffer_under_io(eb, NULL) &&
3946+
test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
3947+
atomic_dec(&eb->refs);
3948+
3949+
/*
3950+
* I know this is terrible, but it's temporary until we stop tracking
3951+
* the uptodate bits and such for the extent buffers.
3952+
*/
3953+
release_extent_buffer(eb, GFP_ATOMIC);
3954+
}
3955+
3956+
void free_extent_buffer_stale(struct extent_buffer *eb)
3957+
{
3958+
if (!eb)
38323959
return;
38333960

3834-
WARN_ON(1);
3961+
spin_lock(&eb->refs_lock);
3962+
set_bit(EXTENT_BUFFER_STALE, &eb->bflags);
3963+
3964+
if (atomic_read(&eb->refs) == 2 && !extent_buffer_under_io(eb, NULL) &&
3965+
test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
3966+
atomic_dec(&eb->refs);
3967+
release_extent_buffer(eb, GFP_NOFS);
38353968
}
38363969

38373970
int clear_extent_buffer_dirty(struct extent_io_tree *tree,
@@ -3874,6 +4007,7 @@ int set_extent_buffer_dirty(struct extent_io_tree *tree,
38744007

38754008
was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
38764009
num_pages = num_extent_pages(eb->start, eb->len);
4010+
WARN_ON(atomic_read(&eb->refs) == 0);
38774011
for (i = 0; i < num_pages; i++)
38784012
__set_page_dirty_nobuffers(extent_buffer_page(eb, i));
38794013
return was_dirty;
@@ -4440,45 +4574,48 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
44404574
}
44414575
}
44424576

4443-
static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
4444-
{
4445-
struct extent_buffer *eb =
4446-
container_of(head, struct extent_buffer, rcu_head);
4447-
4448-
__free_extent_buffer(eb);
4449-
}
4450-
4451-
int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page)
4577+
int try_release_extent_buffer(struct page *page, gfp_t mask)
44524578
{
4453-
u64 start = page_offset(page);
4454-
struct extent_buffer *eb = (struct extent_buffer *)page->private;
4455-
int ret = 1;
4579+
struct extent_buffer *eb;
44564580

4457-
if (!PagePrivate(page) || !eb)
4581+
/*
4582+
* We need to make sure noboody is attaching this page to an eb right
4583+
* now.
4584+
*/
4585+
spin_lock(&page->mapping->private_lock);
4586+
if (!PagePrivate(page)) {
4587+
spin_unlock(&page->mapping->private_lock);
44584588
return 1;
4589+
}
44594590

4460-
spin_lock(&tree->buffer_lock);
4461-
if (atomic_read(&eb->refs) > 1 ||
4462-
test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
4463-
ret = 0;
4464-
goto out;
4591+
eb = (struct extent_buffer *)page->private;
4592+
BUG_ON(!eb);
4593+
4594+
/*
4595+
* This is a little awful but should be ok, we need to make sure that
4596+
* the eb doesn't disappear out from under us while we're looking at
4597+
* this page.
4598+
*/
4599+
spin_lock(&eb->refs_lock);
4600+
if (atomic_read(&eb->refs) != 1 || extent_buffer_under_io(eb, page)) {
4601+
spin_unlock(&eb->refs_lock);
4602+
spin_unlock(&page->mapping->private_lock);
4603+
return 0;
44654604
}
4605+
spin_unlock(&page->mapping->private_lock);
4606+
4607+
if ((mask & GFP_NOFS) == GFP_NOFS)
4608+
mask = GFP_NOFS;
44664609

44674610
/*
4468-
* set @eb->refs to 0 if it is already 1, and then release the @eb.
4469-
* Or go back.
4611+
* If tree ref isn't set then we know the ref on this eb is a real ref,
4612+
* so just return, this page will likely be freed soon anyway.
44704613
*/
4471-
if (atomic_cmpxchg(&eb->refs, 1, 0) != 1) {
4472-
ret = 0;
4473-
goto out;
4614+
if (!test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) {
4615+
spin_unlock(&eb->refs_lock);
4616+
return 0;
44744617
}
4475-
radix_tree_delete(&tree->buffer, start >> PAGE_CACHE_SHIFT);
4476-
btrfs_release_extent_buffer_page(eb, 0);
4477-
out:
4478-
spin_unlock(&tree->buffer_lock);
4618+
release_extent_buffer(eb, mask);
44794619

4480-
/* at this point we can safely release the extent buffer */
4481-
if (atomic_read(&eb->refs) == 0)
4482-
call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
4483-
return ret;
4620+
return 1;
44844621
}

0 commit comments

Comments
 (0)