Skip to content

Commit 7ee9e44

Browse files
author
Josef Bacik
committed
Btrfs: check if we can nocow if we don't have data space
We always just try and reserve data space when we write, but if we are out of space but have prealloc'ed extents we should still successfully write. This patch will try and see if we can write to prealloc'ed space and if we can go ahead and allow the write to continue. With this patch we now pass xfstests generic/274. Thanks, Signed-off-by: Josef Bacik <[email protected]>
1 parent 925a6ef commit 7ee9e44

File tree

6 files changed

+148
-26
lines changed

6 files changed

+148
-26
lines changed

fs/btrfs/ctree.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3552,6 +3552,10 @@ void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work);
35523552
struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
35533553
size_t pg_offset, u64 start, u64 len,
35543554
int create);
3555+
noinline int can_nocow_extent(struct btrfs_trans_handle *trans,
3556+
struct inode *inode, u64 offset, u64 *len,
3557+
u64 *orig_start, u64 *orig_block_len,
3558+
u64 *ram_bytes);
35553559

35563560
/* RHEL and EL kernels have a patch that renames PG_checked to FsMisc */
35573561
#if defined(ClearPageFsMisc) && !defined(ClearPageChecked)

fs/btrfs/extent-tree.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3666,6 +3666,7 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes)
36663666

36673667
data_sinfo = root->fs_info->data_sinfo;
36683668
spin_lock(&data_sinfo->lock);
3669+
WARN_ON(data_sinfo->bytes_may_use < bytes);
36693670
data_sinfo->bytes_may_use -= bytes;
36703671
trace_btrfs_space_reservation(root->fs_info, "space_info",
36713672
data_sinfo->flags, bytes, 0);

fs/btrfs/extent_io.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -543,6 +543,9 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
543543

544544
btrfs_debug_check_extent_io_range(tree->mapping->host, start, end);
545545

546+
if (bits & EXTENT_DELALLOC)
547+
bits |= EXTENT_NORESERVE;
548+
546549
if (delete)
547550
bits |= ~EXTENT_CTLBITS;
548551
bits |= EXTENT_FIRST_DELALLOC;

fs/btrfs/extent_io.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#define EXTENT_FIRST_DELALLOC (1 << 12)
2020
#define EXTENT_NEED_WAIT (1 << 13)
2121
#define EXTENT_DAMAGED (1 << 14)
22+
#define EXTENT_NORESERVE (1 << 15)
2223
#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
2324
#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC)
2425

fs/btrfs/file.c

Lines changed: 114 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1312,17 +1312,69 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
13121312

13131313
}
13141314

1315+
static noinline int check_can_nocow(struct inode *inode, loff_t pos,
1316+
size_t *write_bytes)
1317+
{
1318+
struct btrfs_trans_handle *trans;
1319+
struct btrfs_root *root = BTRFS_I(inode)->root;
1320+
struct btrfs_ordered_extent *ordered;
1321+
u64 lockstart, lockend;
1322+
u64 num_bytes;
1323+
int ret;
1324+
1325+
lockstart = round_down(pos, root->sectorsize);
1326+
lockend = lockstart + round_up(*write_bytes, root->sectorsize) - 1;
1327+
1328+
while (1) {
1329+
lock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
1330+
ordered = btrfs_lookup_ordered_range(inode, lockstart,
1331+
lockend - lockstart + 1);
1332+
if (!ordered) {
1333+
break;
1334+
}
1335+
unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
1336+
btrfs_start_ordered_extent(inode, ordered, 1);
1337+
btrfs_put_ordered_extent(ordered);
1338+
}
1339+
1340+
trans = btrfs_join_transaction(root);
1341+
if (IS_ERR(trans)) {
1342+
unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
1343+
return PTR_ERR(trans);
1344+
}
1345+
1346+
num_bytes = lockend - lockstart + 1;
1347+
ret = can_nocow_extent(trans, inode, lockstart, &num_bytes, NULL, NULL,
1348+
NULL);
1349+
btrfs_end_transaction(trans, root);
1350+
if (ret <= 0) {
1351+
ret = 0;
1352+
} else {
1353+
clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
1354+
EXTENT_DIRTY | EXTENT_DELALLOC |
1355+
EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0,
1356+
NULL, GFP_NOFS);
1357+
*write_bytes = min_t(size_t, *write_bytes, num_bytes);
1358+
}
1359+
1360+
unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
1361+
1362+
return ret;
1363+
}
1364+
13151365
static noinline ssize_t __btrfs_buffered_write(struct file *file,
13161366
struct iov_iter *i,
13171367
loff_t pos)
13181368
{
13191369
struct inode *inode = file_inode(file);
13201370
struct btrfs_root *root = BTRFS_I(inode)->root;
13211371
struct page **pages = NULL;
1372+
u64 release_bytes = 0;
13221373
unsigned long first_index;
13231374
size_t num_written = 0;
13241375
int nrptrs;
13251376
int ret = 0;
1377+
bool only_release_metadata = false;
13261378
bool force_page_uptodate = false;
13271379

13281380
nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) /
@@ -1343,6 +1395,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
13431395
offset);
13441396
size_t num_pages = (write_bytes + offset +
13451397
PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1398+
size_t reserve_bytes;
13461399
size_t dirty_pages;
13471400
size_t copied;
13481401

@@ -1357,11 +1410,41 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
13571410
break;
13581411
}
13591412

1360-
ret = btrfs_delalloc_reserve_space(inode,
1361-
num_pages << PAGE_CACHE_SHIFT);
1413+
reserve_bytes = num_pages << PAGE_CACHE_SHIFT;
1414+
ret = btrfs_check_data_free_space(inode, reserve_bytes);
1415+
if (ret == -ENOSPC &&
1416+
(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
1417+
BTRFS_INODE_PREALLOC))) {
1418+
ret = check_can_nocow(inode, pos, &write_bytes);
1419+
if (ret > 0) {
1420+
only_release_metadata = true;
1421+
/*
1422+
* our prealloc extent may be smaller than
1423+
* write_bytes, so scale down.
1424+
*/
1425+
num_pages = (write_bytes + offset +
1426+
PAGE_CACHE_SIZE - 1) >>
1427+
PAGE_CACHE_SHIFT;
1428+
reserve_bytes = num_pages << PAGE_CACHE_SHIFT;
1429+
ret = 0;
1430+
} else {
1431+
ret = -ENOSPC;
1432+
}
1433+
}
1434+
13621435
if (ret)
13631436
break;
13641437

1438+
ret = btrfs_delalloc_reserve_metadata(inode, reserve_bytes);
1439+
if (ret) {
1440+
if (!only_release_metadata)
1441+
btrfs_free_reserved_data_space(inode,
1442+
reserve_bytes);
1443+
break;
1444+
}
1445+
1446+
release_bytes = reserve_bytes;
1447+
13651448
/*
13661449
* This is going to setup the pages array with the number of
13671450
* pages we want, so we don't really need to worry about the
@@ -1370,11 +1453,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
13701453
ret = prepare_pages(root, file, pages, num_pages,
13711454
pos, first_index, write_bytes,
13721455
force_page_uptodate);
1373-
if (ret) {
1374-
btrfs_delalloc_release_space(inode,
1375-
num_pages << PAGE_CACHE_SHIFT);
1456+
if (ret)
13761457
break;
1377-
}
13781458

13791459
copied = btrfs_copy_from_user(pos, num_pages,
13801460
write_bytes, pages, i);
@@ -1404,30 +1484,46 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
14041484
* managed to copy.
14051485
*/
14061486
if (num_pages > dirty_pages) {
1487+
release_bytes = (num_pages - dirty_pages) <<
1488+
PAGE_CACHE_SHIFT;
14071489
if (copied > 0) {
14081490
spin_lock(&BTRFS_I(inode)->lock);
14091491
BTRFS_I(inode)->outstanding_extents++;
14101492
spin_unlock(&BTRFS_I(inode)->lock);
14111493
}
1412-
btrfs_delalloc_release_space(inode,
1413-
(num_pages - dirty_pages) <<
1414-
PAGE_CACHE_SHIFT);
1494+
if (only_release_metadata)
1495+
btrfs_delalloc_release_metadata(inode,
1496+
release_bytes);
1497+
else
1498+
btrfs_delalloc_release_space(inode,
1499+
release_bytes);
14151500
}
14161501

1502+
release_bytes = dirty_pages << PAGE_CACHE_SHIFT;
14171503
if (copied > 0) {
14181504
ret = btrfs_dirty_pages(root, inode, pages,
14191505
dirty_pages, pos, copied,
14201506
NULL);
14211507
if (ret) {
1422-
btrfs_delalloc_release_space(inode,
1423-
dirty_pages << PAGE_CACHE_SHIFT);
14241508
btrfs_drop_pages(pages, num_pages);
14251509
break;
14261510
}
14271511
}
14281512

1513+
release_bytes = 0;
14291514
btrfs_drop_pages(pages, num_pages);
14301515

1516+
if (only_release_metadata && copied > 0) {
1517+
u64 lockstart = round_down(pos, root->sectorsize);
1518+
u64 lockend = lockstart +
1519+
(dirty_pages << PAGE_CACHE_SHIFT) - 1;
1520+
1521+
set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
1522+
lockend, EXTENT_NORESERVE, NULL,
1523+
NULL, GFP_NOFS);
1524+
only_release_metadata = false;
1525+
}
1526+
14311527
cond_resched();
14321528

14331529
balance_dirty_pages_ratelimited(inode->i_mapping);
@@ -1440,6 +1536,13 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
14401536

14411537
kfree(pages);
14421538

1539+
if (release_bytes) {
1540+
if (only_release_metadata)
1541+
btrfs_delalloc_release_metadata(inode, release_bytes);
1542+
else
1543+
btrfs_delalloc_release_space(inode, release_bytes);
1544+
}
1545+
14431546
return num_written ? num_written : ret;
14441547
}
14451548

fs/btrfs/inode.c

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1641,7 +1641,7 @@ static void btrfs_clear_bit_hook(struct inode *inode,
16411641
btrfs_delalloc_release_metadata(inode, len);
16421642

16431643
if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
1644-
&& do_list)
1644+
&& do_list && !(state->state & EXTENT_NORESERVE))
16451645
btrfs_free_reserved_data_space(inode, len);
16461646

16471647
__percpu_counter_add(&root->fs_info->delalloc_bytes, -len,
@@ -6396,10 +6396,10 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
63966396
* returns 1 when the nocow is safe, < 1 on error, 0 if the
63976397
* block must be cow'd
63986398
*/
6399-
static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans,
6400-
struct inode *inode, u64 offset, u64 *len,
6401-
u64 *orig_start, u64 *orig_block_len,
6402-
u64 *ram_bytes)
6399+
noinline int can_nocow_extent(struct btrfs_trans_handle *trans,
6400+
struct inode *inode, u64 offset, u64 *len,
6401+
u64 *orig_start, u64 *orig_block_len,
6402+
u64 *ram_bytes)
64036403
{
64046404
struct btrfs_path *path;
64056405
int ret;
@@ -6413,7 +6413,7 @@ static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans,
64136413
u64 num_bytes;
64146414
int slot;
64156415
int found_type;
6416-
6416+
bool nocow = (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW);
64176417
path = btrfs_alloc_path();
64186418
if (!path)
64196419
return -ENOMEM;
@@ -6453,18 +6453,28 @@ static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans,
64536453
/* not a regular extent, must cow */
64546454
goto out;
64556455
}
6456+
6457+
if (!nocow && found_type == BTRFS_FILE_EXTENT_REG)
6458+
goto out;
6459+
64566460
disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6461+
if (disk_bytenr == 0)
6462+
goto out;
6463+
6464+
if (btrfs_file_extent_compression(leaf, fi) ||
6465+
btrfs_file_extent_encryption(leaf, fi) ||
6466+
btrfs_file_extent_other_encoding(leaf, fi))
6467+
goto out;
6468+
64576469
backref_offset = btrfs_file_extent_offset(leaf, fi);
64586470

6459-
*orig_start = key.offset - backref_offset;
6460-
*orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi);
6461-
*ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
6471+
if (orig_start) {
6472+
*orig_start = key.offset - backref_offset;
6473+
*orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi);
6474+
*ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
6475+
}
64626476

64636477
extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
6464-
if (extent_end < offset + *len) {
6465-
/* extent doesn't include our full range, must cow */
6466-
goto out;
6467-
}
64686478

64696479
if (btrfs_extent_readonly(root, disk_bytenr))
64706480
goto out;
@@ -6708,8 +6718,8 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
67086718
if (IS_ERR(trans))
67096719
goto must_cow;
67106720

6711-
if (can_nocow_odirect(trans, inode, start, &len, &orig_start,
6712-
&orig_block_len, &ram_bytes) == 1) {
6721+
if (can_nocow_extent(trans, inode, start, &len, &orig_start,
6722+
&orig_block_len, &ram_bytes) == 1) {
67136723
if (type == BTRFS_ORDERED_PREALLOC) {
67146724
free_extent_map(em);
67156725
em = create_pinned_em(inode, start, len,

0 commit comments

Comments
 (0)