Skip to content

Commit 69fe2d7

Browse files
josefbacikkdave
authored andcommitted
btrfs: make the delalloc block rsv per inode
The way we handle delalloc metadata reservations has gotten progressively more complicated over the years. There is so much cruft and weirdness around keeping the reserved count and outstanding counters consistent and handling the error cases that it's impossible to understand. Fix this by making the delalloc block rsv per-inode. This way we can calculate the actual size of the outstanding metadata reservations every time we make a change, and then reserve the delta based on that amount. This greatly simplifies the code everywhere, and makes the error handling in btrfs_delalloc_reserve_metadata far less terrifying. Signed-off-by: Josef Bacik <[email protected]> Signed-off-by: David Sterba <[email protected]>
1 parent dd48d40 commit 69fe2d7

File tree

6 files changed

+141
-292
lines changed

6 files changed

+141
-292
lines changed

fs/btrfs/btrfs_inode.h

Lines changed: 9 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -36,14 +36,13 @@
3636
#define BTRFS_INODE_ORPHAN_META_RESERVED 1
3737
#define BTRFS_INODE_DUMMY 2
3838
#define BTRFS_INODE_IN_DEFRAG 3
39-
#define BTRFS_INODE_DELALLOC_META_RESERVED 4
40-
#define BTRFS_INODE_HAS_ORPHAN_ITEM 5
41-
#define BTRFS_INODE_HAS_ASYNC_EXTENT 6
42-
#define BTRFS_INODE_NEEDS_FULL_SYNC 7
43-
#define BTRFS_INODE_COPY_EVERYTHING 8
44-
#define BTRFS_INODE_IN_DELALLOC_LIST 9
45-
#define BTRFS_INODE_READDIO_NEED_LOCK 10
46-
#define BTRFS_INODE_HAS_PROPS 11
39+
#define BTRFS_INODE_HAS_ORPHAN_ITEM 4
40+
#define BTRFS_INODE_HAS_ASYNC_EXTENT 5
41+
#define BTRFS_INODE_NEEDS_FULL_SYNC 6
42+
#define BTRFS_INODE_COPY_EVERYTHING 7
43+
#define BTRFS_INODE_IN_DELALLOC_LIST 8
44+
#define BTRFS_INODE_READDIO_NEED_LOCK 9
45+
#define BTRFS_INODE_HAS_PROPS 10
4746

4847
/* in memory btrfs inode */
4948
struct btrfs_inode {
@@ -176,7 +175,8 @@ struct btrfs_inode {
176175
* of extent items we've reserved metadata for.
177176
*/
178177
unsigned outstanding_extents;
179-
unsigned reserved_extents;
178+
179+
struct btrfs_block_rsv block_rsv;
180180

181181
/*
182182
* Cached values of inode properties
@@ -278,14 +278,6 @@ static inline void btrfs_mod_outstanding_extents(struct btrfs_inode *inode,
278278
mod);
279279
}
280280

281-
static inline void btrfs_mod_reserved_extents(struct btrfs_inode *inode, int mod)
282-
{
283-
lockdep_assert_held(&inode->lock);
284-
inode->reserved_extents += mod;
285-
if (btrfs_is_free_space_inode(inode))
286-
return;
287-
}
288-
289281
static inline int btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation)
290282
{
291283
int ret = 0;

fs/btrfs/ctree.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -763,8 +763,6 @@ struct btrfs_fs_info {
763763
* delayed dir index item
764764
*/
765765
struct btrfs_block_rsv global_block_rsv;
766-
/* block reservation for delay allocation */
767-
struct btrfs_block_rsv delalloc_block_rsv;
768766
/* block reservation for metadata operations */
769767
struct btrfs_block_rsv trans_block_rsv;
770768
/* block reservation for chunk tree */
@@ -2757,6 +2755,9 @@ int btrfs_delalloc_reserve_space(struct inode *inode,
27572755
void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type);
27582756
struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info,
27592757
unsigned short type);
2758+
void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info,
2759+
struct btrfs_block_rsv *rsv,
2760+
unsigned short type);
27602761
void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info,
27612762
struct btrfs_block_rsv *rsv);
27622763
void __btrfs_free_block_rsv(struct btrfs_block_rsv *rsv);

fs/btrfs/delayed-inode.c

Lines changed: 1 addition & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -581,44 +581,20 @@ static int btrfs_delayed_inode_reserve_metadata(
581581
struct btrfs_block_rsv *dst_rsv;
582582
u64 num_bytes;
583583
int ret;
584-
bool release = false;
585584

586585
src_rsv = trans->block_rsv;
587586
dst_rsv = &fs_info->delayed_block_rsv;
588587

589588
num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
590589

591-
/*
592-
* If our block_rsv is the delalloc block reserve then check and see if
593-
* we have our extra reservation for updating the inode. If not fall
594-
* through and try to reserve space quickly.
595-
*
596-
* We used to try and steal from the delalloc block rsv or the global
597-
* reserve, but we'd steal a full reservation, which isn't kind. We are
598-
* here through delalloc which means we've likely just cowed down close
599-
* to the leaf that contains the inode, so we would steal less just
600-
* doing the fallback inode update, so if we do end up having to steal
601-
* from the global block rsv we hopefully only steal one or two blocks
602-
* worth which is less likely to hurt us.
603-
*/
604-
if (src_rsv && src_rsv->type == BTRFS_BLOCK_RSV_DELALLOC) {
605-
spin_lock(&inode->lock);
606-
if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
607-
&inode->runtime_flags))
608-
release = true;
609-
else
610-
src_rsv = NULL;
611-
spin_unlock(&inode->lock);
612-
}
613-
614590
/*
615591
* btrfs_dirty_inode will update the inode under btrfs_join_transaction
616592
* which doesn't reserve space for speed. This is a problem since we
617593
* still need to reserve space for this update, so try to reserve the
618594
* space.
619595
*
620596
* Now if src_rsv == delalloc_block_rsv we'll let it just steal since
621-
* we're accounted for.
597+
* we always reserve enough to update the inode item.
622598
*/
623599
if (!src_rsv || (!trans->bytes_reserved &&
624600
src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) {
@@ -643,32 +619,12 @@ static int btrfs_delayed_inode_reserve_metadata(
643619
}
644620

645621
ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
646-
647-
/*
648-
* Migrate only takes a reservation, it doesn't touch the size of the
649-
* block_rsv. This is to simplify people who don't normally have things
650-
* migrated from their block rsv. If they go to release their
651-
* reservation, that will decrease the size as well, so if migrate
652-
* reduced size we'd end up with a negative size. But for the
653-
* delalloc_meta_reserved stuff we will only know to drop 1 reservation,
654-
* but we could in fact do this reserve/migrate dance several times
655-
* between the time we did the original reservation and we'd clean it
656-
* up. So to take care of this, release the space for the meta
657-
* reservation here. I think it may be time for a documentation page on
658-
* how block rsvs. work.
659-
*/
660622
if (!ret) {
661623
trace_btrfs_space_reservation(fs_info, "delayed_inode",
662624
btrfs_ino(inode), num_bytes, 1);
663625
node->bytes_reserved = num_bytes;
664626
}
665627

666-
if (release) {
667-
trace_btrfs_space_reservation(fs_info, "delalloc",
668-
btrfs_ino(inode), num_bytes, 0);
669-
btrfs_block_rsv_release(fs_info, src_rsv, num_bytes);
670-
}
671-
672628
return ret;
673629
}
674630

fs/btrfs/disk-io.c

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2447,14 +2447,6 @@ int open_ctree(struct super_block *sb,
24472447
goto fail_delalloc_bytes;
24482448
}
24492449

2450-
fs_info->btree_inode = new_inode(sb);
2451-
if (!fs_info->btree_inode) {
2452-
err = -ENOMEM;
2453-
goto fail_bio_counter;
2454-
}
2455-
2456-
mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
2457-
24582450
INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
24592451
INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC);
24602452
INIT_LIST_HEAD(&fs_info->trans_list);
@@ -2487,8 +2479,6 @@ int open_ctree(struct super_block *sb,
24872479
btrfs_mapping_init(&fs_info->mapping_tree);
24882480
btrfs_init_block_rsv(&fs_info->global_block_rsv,
24892481
BTRFS_BLOCK_RSV_GLOBAL);
2490-
btrfs_init_block_rsv(&fs_info->delalloc_block_rsv,
2491-
BTRFS_BLOCK_RSV_DELALLOC);
24922482
btrfs_init_block_rsv(&fs_info->trans_block_rsv, BTRFS_BLOCK_RSV_TRANS);
24932483
btrfs_init_block_rsv(&fs_info->chunk_block_rsv, BTRFS_BLOCK_RSV_CHUNK);
24942484
btrfs_init_block_rsv(&fs_info->empty_block_rsv, BTRFS_BLOCK_RSV_EMPTY);
@@ -2517,6 +2507,14 @@ int open_ctree(struct super_block *sb,
25172507

25182508
INIT_LIST_HEAD(&fs_info->ordered_roots);
25192509
spin_lock_init(&fs_info->ordered_root_lock);
2510+
2511+
fs_info->btree_inode = new_inode(sb);
2512+
if (!fs_info->btree_inode) {
2513+
err = -ENOMEM;
2514+
goto fail_bio_counter;
2515+
}
2516+
mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
2517+
25202518
fs_info->delayed_root = kmalloc(sizeof(struct btrfs_delayed_root),
25212519
GFP_KERNEL);
25222520
if (!fs_info->delayed_root) {

0 commit comments

Comments
 (0)