Skip to content

Commit 1851309

Browse files
wangxiaoguangmasoncl
authored andcommitted
btrfs: update btrfs_space_info's bytes_may_use timely
This patch can fix some false ENOSPC errors, below test script can reproduce one false ENOSPC error: #!/bin/bash dd if=/dev/zero of=fs.img bs=$((1024*1024)) count=128 dev=$(losetup --show -f fs.img) mkfs.btrfs -f -M $dev mkdir /tmp/mntpoint mount $dev /tmp/mntpoint cd /tmp/mntpoint xfs_io -f -c "falloc 0 $((64*1024*1024))" testfile Above script will fail for ENOSPC reason, but indeed fs still has free space to satisfy this request. Please see call graph: btrfs_fallocate() |-> btrfs_alloc_data_chunk_ondemand() | bytes_may_use += 64M |-> btrfs_prealloc_file_range() |-> btrfs_reserve_extent() |-> btrfs_add_reserved_bytes() | alloc_type is RESERVE_ALLOC_NO_ACCOUNT, so it does not | change bytes_may_use, and bytes_reserved += 64M. Now | bytes_may_use + bytes_reserved == 128M, which is greater | than btrfs_space_info's total_bytes, false enospc occurs. | Note, the bytes_may_use decrease operation will be done in | end of btrfs_fallocate(), which is too late. Here is another simple case for buffered write: CPU 1 | CPU 2 | |-> cow_file_range() |-> __btrfs_buffered_write() |-> btrfs_reserve_extent() | | | | | | | | | ..... | |-> btrfs_check_data_free_space() | | | | |-> extent_clear_unlock_delalloc() | In CPU 1, btrfs_reserve_extent()->find_free_extent()-> btrfs_add_reserved_bytes() do not decrease bytes_may_use, the decrease operation will be delayed to be done in extent_clear_unlock_delalloc(). Assume in this case, btrfs_reserve_extent() reserved 128MB data, CPU2's btrfs_check_data_free_space() tries to reserve 100MB data space. If 100MB > data_sinfo->total_bytes - data_sinfo->bytes_used - data_sinfo->bytes_reserved - data_sinfo->bytes_pinned - data_sinfo->bytes_readonly - data_sinfo->bytes_may_use btrfs_check_data_free_space() will try to allcate new data chunk or call btrfs_start_delalloc_roots(), or commit current transaction in order to reserve some free space, obviously a lot of work. But indeed it's not necessary as long as decreasing bytes_may_use timely, we still have free space, decreasing 128M from bytes_may_use. To fix this issue, this patch chooses to update bytes_may_use for both data and metadata in btrfs_add_reserved_bytes(). For compress path, real extent length may not be equal to file content length, so introduce a ram_bytes argument for btrfs_reserve_extent(), find_free_extent() and btrfs_add_reserved_bytes(), it's becasue bytes_may_use is increased by file content length. Then compress path can update bytes_may_use correctly. Also now we can discard RESERVE_ALLOC_NO_ACCOUNT, RESERVE_ALLOC and RESERVE_FREE. As we know, usually EXTENT_DO_ACCOUNTING is used for error path. In run_delalloc_nocow(), for inode marked as NODATACOW or extent marked as PREALLOC, we also need to update bytes_may_use, but can not pass EXTENT_DO_ACCOUNTING, because it also clears metadata reservation, so here we introduce EXTENT_CLEAR_DATA_RESV flag to indicate btrfs_clear_bit_hook() to update btrfs_space_info's bytes_may_use. Meanwhile __btrfs_prealloc_file_range() will call btrfs_free_reserved_data_space() internally for both sucessful and failed path, btrfs_prealloc_file_range()'s callers does not need to call btrfs_free_reserved_data_space() any more. Signed-off-by: Wang Xiaoguang <[email protected]> Reviewed-by: Josef Bacik <[email protected]> Signed-off-by: David Sterba <[email protected]> Signed-off-by: Chris Mason <[email protected]>
1 parent 4824f1f commit 1851309

File tree

7 files changed

+73
-63
lines changed

7 files changed

+73
-63
lines changed

fs/btrfs/ctree.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2579,7 +2579,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
25792579
struct btrfs_root *root,
25802580
u64 root_objectid, u64 owner, u64 offset,
25812581
struct btrfs_key *ins);
2582-
int btrfs_reserve_extent(struct btrfs_root *root, u64 num_bytes,
2582+
int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, u64 num_bytes,
25832583
u64 min_alloc_size, u64 empty_size, u64 hint_byte,
25842584
struct btrfs_key *ins, int is_data, int delalloc);
25852585
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,

fs/btrfs/extent-tree.c

Lines changed: 19 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -60,21 +60,6 @@ enum {
6060
CHUNK_ALLOC_FORCE = 2,
6161
};
6262

63-
/*
64-
* Control how reservations are dealt with.
65-
*
66-
* RESERVE_FREE - freeing a reservation.
67-
* RESERVE_ALLOC - allocating space and we need to update bytes_may_use for
68-
* ENOSPC accounting
69-
* RESERVE_ALLOC_NO_ACCOUNT - allocating space and we should not update
70-
* bytes_may_use as the ENOSPC accounting is done elsewhere
71-
*/
72-
enum {
73-
RESERVE_FREE = 0,
74-
RESERVE_ALLOC = 1,
75-
RESERVE_ALLOC_NO_ACCOUNT = 2,
76-
};
77-
7863
static int update_block_group(struct btrfs_trans_handle *trans,
7964
struct btrfs_root *root, u64 bytenr,
8065
u64 num_bytes, int alloc);
@@ -105,7 +90,7 @@ static int find_next_key(struct btrfs_path *path, int level,
10590
static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
10691
int dump_block_groups);
10792
static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache,
108-
u64 num_bytes, int reserve, int delalloc);
93+
u64 ram_bytes, u64 num_bytes, int delalloc);
10994
static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache,
11095
u64 num_bytes, int delalloc);
11196
static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
@@ -3502,7 +3487,6 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group,
35023487
dcs = BTRFS_DC_SETUP;
35033488
else if (ret == -ENOSPC)
35043489
set_bit(BTRFS_TRANS_CACHE_ENOSPC, &trans->transaction->flags);
3505-
btrfs_free_reserved_data_space(inode, 0, num_pages);
35063490

35073491
out_put:
35083492
iput(inode);
@@ -6500,8 +6484,9 @@ void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg)
65006484
/**
65016485
* btrfs_add_reserved_bytes - update the block_group and space info counters
65026486
* @cache: The cache we are manipulating
6487+
* @ram_bytes: The number of bytes of file content, and will be same to
6488+
* @num_bytes except for the compress path.
65036489
* @num_bytes: The number of bytes in question
6504-
* @reserve: One of the reservation enums
65056490
* @delalloc: The blocks are allocated for the delalloc write
65066491
*
65076492
* This is called by the allocator when it reserves space. Metadata
@@ -6516,7 +6501,7 @@ void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg)
65166501
* succeeds.
65176502
*/
65186503
static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache,
6519-
u64 num_bytes, int reserve, int delalloc)
6504+
u64 ram_bytes, u64 num_bytes, int delalloc)
65206505
{
65216506
struct btrfs_space_info *space_info = cache->space_info;
65226507
int ret = 0;
@@ -6528,13 +6513,11 @@ static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache,
65286513
} else {
65296514
cache->reserved += num_bytes;
65306515
space_info->bytes_reserved += num_bytes;
6531-
if (reserve == RESERVE_ALLOC) {
6532-
trace_btrfs_space_reservation(cache->fs_info,
6533-
"space_info", space_info->flags,
6534-
num_bytes, 0);
6535-
space_info->bytes_may_use -= num_bytes;
6536-
}
65376516

6517+
trace_btrfs_space_reservation(cache->fs_info,
6518+
"space_info", space_info->flags,
6519+
ram_bytes, 0);
6520+
space_info->bytes_may_use -= ram_bytes;
65386521
if (delalloc)
65396522
cache->delalloc_bytes += num_bytes;
65406523
}
@@ -7433,9 +7416,9 @@ btrfs_release_block_group(struct btrfs_block_group_cache *cache,
74337416
* the free space extent currently.
74347417
*/
74357418
static noinline int find_free_extent(struct btrfs_root *orig_root,
7436-
u64 num_bytes, u64 empty_size,
7437-
u64 hint_byte, struct btrfs_key *ins,
7438-
u64 flags, int delalloc)
7419+
u64 ram_bytes, u64 num_bytes, u64 empty_size,
7420+
u64 hint_byte, struct btrfs_key *ins,
7421+
u64 flags, int delalloc)
74397422
{
74407423
int ret = 0;
74417424
struct btrfs_root *root = orig_root->fs_info->extent_root;
@@ -7447,8 +7430,6 @@ static noinline int find_free_extent(struct btrfs_root *orig_root,
74477430
struct btrfs_space_info *space_info;
74487431
int loop = 0;
74497432
int index = __get_raid_index(flags);
7450-
int alloc_type = (flags & BTRFS_BLOCK_GROUP_DATA) ?
7451-
RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC;
74527433
bool failed_cluster_refill = false;
74537434
bool failed_alloc = false;
74547435
bool use_cluster = true;
@@ -7780,8 +7761,8 @@ static noinline int find_free_extent(struct btrfs_root *orig_root,
77807761
search_start - offset);
77817762
BUG_ON(offset > search_start);
77827763

7783-
ret = btrfs_add_reserved_bytes(block_group, num_bytes,
7784-
alloc_type, delalloc);
7764+
ret = btrfs_add_reserved_bytes(block_group, ram_bytes,
7765+
num_bytes, delalloc);
77857766
if (ret == -EAGAIN) {
77867767
btrfs_add_free_space(block_group, offset, num_bytes);
77877768
goto loop;
@@ -7953,7 +7934,7 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
79537934
up_read(&info->groups_sem);
79547935
}
79557936

7956-
int btrfs_reserve_extent(struct btrfs_root *root,
7937+
int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes,
79577938
u64 num_bytes, u64 min_alloc_size,
79587939
u64 empty_size, u64 hint_byte,
79597940
struct btrfs_key *ins, int is_data, int delalloc)
@@ -7965,8 +7946,8 @@ int btrfs_reserve_extent(struct btrfs_root *root,
79657946
flags = btrfs_get_alloc_profile(root, is_data);
79667947
again:
79677948
WARN_ON(num_bytes < root->sectorsize);
7968-
ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins,
7969-
flags, delalloc);
7949+
ret = find_free_extent(root, ram_bytes, num_bytes, empty_size,
7950+
hint_byte, ins, flags, delalloc);
79707951
if (!ret && !is_data) {
79717952
btrfs_dec_block_group_reservations(root->fs_info,
79727953
ins->objectid);
@@ -7975,6 +7956,7 @@ int btrfs_reserve_extent(struct btrfs_root *root,
79757956
num_bytes = min(num_bytes >> 1, ins->offset);
79767957
num_bytes = round_down(num_bytes, root->sectorsize);
79777958
num_bytes = max(num_bytes, min_alloc_size);
7959+
ram_bytes = num_bytes;
79787960
if (num_bytes == min_alloc_size)
79797961
final_tried = true;
79807962
goto again;
@@ -8241,7 +8223,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
82418223
return -EINVAL;
82428224

82438225
ret = btrfs_add_reserved_bytes(block_group, ins->offset,
8244-
RESERVE_ALLOC_NO_ACCOUNT, 0);
8226+
ins->offset, 0);
82458227
BUG_ON(ret); /* logic error */
82468228
ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
82478229
0, owner, offset, ins, 1);
@@ -8385,7 +8367,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
83858367
if (IS_ERR(block_rsv))
83868368
return ERR_CAST(block_rsv);
83878369

8388-
ret = btrfs_reserve_extent(root, blocksize, blocksize,
8370+
ret = btrfs_reserve_extent(root, blocksize, blocksize, blocksize,
83898371
empty_size, hint, &ins, 0, 0);
83908372
if (ret)
83918373
goto out_unuse;

fs/btrfs/extent_io.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#define EXTENT_DAMAGED (1U << 14)
2121
#define EXTENT_NORESERVE (1U << 15)
2222
#define EXTENT_QGROUP_RESERVED (1U << 16)
23+
#define EXTENT_CLEAR_DATA_RESV (1U << 17)
2324
#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
2425
#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC)
2526

fs/btrfs/file.c

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2675,6 +2675,7 @@ static long btrfs_fallocate(struct file *file, int mode,
26752675

26762676
alloc_start = round_down(offset, blocksize);
26772677
alloc_end = round_up(offset + len, blocksize);
2678+
cur_offset = alloc_start;
26782679

26792680
/* Make sure we aren't being give some crap mode */
26802681
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
@@ -2767,7 +2768,6 @@ static long btrfs_fallocate(struct file *file, int mode,
27672768

27682769
/* First, check if we exceed the qgroup limit */
27692770
INIT_LIST_HEAD(&reserve_list);
2770-
cur_offset = alloc_start;
27712771
while (1) {
27722772
em = btrfs_get_extent(inode, NULL, 0, cur_offset,
27732773
alloc_end - cur_offset, 0);
@@ -2794,6 +2794,14 @@ static long btrfs_fallocate(struct file *file, int mode,
27942794
last_byte - cur_offset);
27952795
if (ret < 0)
27962796
break;
2797+
} else {
2798+
/*
2799+
* Do not need to reserve unwritten extent for this
2800+
* range, free reserved data space first, otherwise
2801+
* it'll result in false ENOSPC error.
2802+
*/
2803+
btrfs_free_reserved_data_space(inode, cur_offset,
2804+
last_byte - cur_offset);
27972805
}
27982806
free_extent_map(em);
27992807
cur_offset = last_byte;
@@ -2811,6 +2819,9 @@ static long btrfs_fallocate(struct file *file, int mode,
28112819
range->start,
28122820
range->len, 1 << inode->i_blkbits,
28132821
offset + len, &alloc_hint);
2822+
else
2823+
btrfs_free_reserved_data_space(inode, range->start,
2824+
range->len);
28142825
list_del(&range->list);
28152826
kfree(range);
28162827
}
@@ -2845,18 +2856,11 @@ static long btrfs_fallocate(struct file *file, int mode,
28452856
unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
28462857
&cached_state, GFP_KERNEL);
28472858
out:
2848-
/*
2849-
* As we waited the extent range, the data_rsv_map must be empty
2850-
* in the range, as written data range will be released from it.
2851-
* And for prealloacted extent, it will also be released when
2852-
* its metadata is written.
2853-
* So this is completely used as cleanup.
2854-
*/
2855-
btrfs_qgroup_free_data(inode, alloc_start, alloc_end - alloc_start);
28562859
inode_unlock(inode);
28572860
/* Let go of our reservation. */
2858-
btrfs_free_reserved_data_space(inode, alloc_start,
2859-
alloc_end - alloc_start);
2861+
if (ret != 0)
2862+
btrfs_free_reserved_data_space(inode, alloc_start,
2863+
alloc_end - cur_offset);
28602864
return ret;
28612865
}
28622866

fs/btrfs/inode-map.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -495,10 +495,9 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
495495
ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc,
496496
prealloc, prealloc, &alloc_hint);
497497
if (ret) {
498-
btrfs_delalloc_release_space(inode, 0, prealloc);
498+
btrfs_delalloc_release_metadata(inode, prealloc);
499499
goto out_put;
500500
}
501-
btrfs_free_reserved_data_space(inode, 0, prealloc);
502501

503502
ret = btrfs_write_out_ino_cache(root, trans, path, inode);
504503
out_put:

fs/btrfs/inode.c

Lines changed: 27 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -566,6 +566,8 @@ static noinline void compress_file_range(struct inode *inode,
566566
PAGE_SET_WRITEBACK |
567567
page_error_op |
568568
PAGE_END_WRITEBACK);
569+
btrfs_free_reserved_data_space_noquota(inode, start,
570+
end - start + 1);
569571
goto free_pages_out;
570572
}
571573
}
@@ -742,7 +744,7 @@ static noinline void submit_compressed_extents(struct inode *inode,
742744
lock_extent(io_tree, async_extent->start,
743745
async_extent->start + async_extent->ram_size - 1);
744746

745-
ret = btrfs_reserve_extent(root,
747+
ret = btrfs_reserve_extent(root, async_extent->ram_size,
746748
async_extent->compressed_size,
747749
async_extent->compressed_size,
748750
0, alloc_hint, &ins, 1, 1);
@@ -969,7 +971,8 @@ static noinline int cow_file_range(struct inode *inode,
969971
EXTENT_DEFRAG, PAGE_UNLOCK |
970972
PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
971973
PAGE_END_WRITEBACK);
972-
974+
btrfs_free_reserved_data_space_noquota(inode, start,
975+
end - start + 1);
973976
*nr_written = *nr_written +
974977
(end - start + PAGE_SIZE) / PAGE_SIZE;
975978
*page_started = 1;
@@ -989,7 +992,7 @@ static noinline int cow_file_range(struct inode *inode,
989992
unsigned long op;
990993

991994
cur_alloc_size = disk_num_bytes;
992-
ret = btrfs_reserve_extent(root, cur_alloc_size,
995+
ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
993996
root->sectorsize, 0, alloc_hint,
994997
&ins, 1, 1);
995998
if (ret < 0)
@@ -1489,8 +1492,10 @@ static noinline int run_delalloc_nocow(struct inode *inode,
14891492
extent_clear_unlock_delalloc(inode, cur_offset,
14901493
cur_offset + num_bytes - 1,
14911494
locked_page, EXTENT_LOCKED |
1492-
EXTENT_DELALLOC, PAGE_UNLOCK |
1493-
PAGE_SET_PRIVATE2);
1495+
EXTENT_DELALLOC |
1496+
EXTENT_CLEAR_DATA_RESV,
1497+
PAGE_UNLOCK | PAGE_SET_PRIVATE2);
1498+
14941499
if (!nolock && nocow)
14951500
btrfs_end_write_no_snapshoting(root);
14961501
cur_offset = extent_end;
@@ -1807,7 +1812,9 @@ static void btrfs_clear_bit_hook(struct inode *inode,
18071812
return;
18081813

18091814
if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
1810-
&& do_list && !(state->state & EXTENT_NORESERVE))
1815+
&& do_list && !(state->state & EXTENT_NORESERVE)
1816+
&& (*bits & (EXTENT_DO_ACCOUNTING |
1817+
EXTENT_CLEAR_DATA_RESV)))
18111818
btrfs_free_reserved_data_space_noquota(inode,
18121819
state->start, len);
18131820

@@ -7252,7 +7259,7 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
72527259
int ret;
72537260

72547261
alloc_hint = get_extent_allocation_hint(inode, start, len);
7255-
ret = btrfs_reserve_extent(root, len, root->sectorsize, 0,
7262+
ret = btrfs_reserve_extent(root, len, len, root->sectorsize, 0,
72567263
alloc_hint, &ins, 1, 1);
72577264
if (ret)
72587265
return ERR_PTR(ret);
@@ -7752,6 +7759,13 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
77527759
ret = PTR_ERR(em2);
77537760
goto unlock_err;
77547761
}
7762+
/*
7763+
* For inode marked NODATACOW or extent marked PREALLOC,
7764+
* use the existing or preallocated extent, so does not
7765+
* need to adjust btrfs_space_info's bytes_may_use.
7766+
*/
7767+
btrfs_free_reserved_data_space_noquota(inode,
7768+
start, len);
77557769
goto unlock;
77567770
}
77577771
}
@@ -7786,7 +7800,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
77867800
i_size_write(inode, start + len);
77877801

77887802
adjust_dio_outstanding_extents(inode, dio_data, len);
7789-
btrfs_free_reserved_data_space(inode, start, len);
77907803
WARN_ON(dio_data->reserve < len);
77917804
dio_data->reserve -= len;
77927805
dio_data->unsubmitted_oe_range_end = start + len;
@@ -10306,6 +10319,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
1030610319
u64 last_alloc = (u64)-1;
1030710320
int ret = 0;
1030810321
bool own_trans = true;
10322+
u64 end = start + num_bytes - 1;
1030910323

1031010324
if (trans)
1031110325
own_trans = false;
@@ -10327,8 +10341,8 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
1032710341
* sized chunks.
1032810342
*/
1032910343
cur_bytes = min(cur_bytes, last_alloc);
10330-
ret = btrfs_reserve_extent(root, cur_bytes, min_size, 0,
10331-
*alloc_hint, &ins, 1, 0);
10344+
ret = btrfs_reserve_extent(root, cur_bytes, cur_bytes,
10345+
min_size, 0, *alloc_hint, &ins, 1, 0);
1033210346
if (ret) {
1033310347
if (own_trans)
1033410348
btrfs_end_transaction(trans, root);
@@ -10414,6 +10428,9 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
1041410428
if (own_trans)
1041510429
btrfs_end_transaction(trans, root);
1041610430
}
10431+
if (cur_offset < end)
10432+
btrfs_free_reserved_data_space(inode, cur_offset,
10433+
end - cur_offset + 1);
1041710434
return ret;
1041810435
}
1041910436

0 commit comments

Comments
 (0)