Skip to content

Commit a315e68

Browse files
committed
Btrfs: fix invalid attempt to free reserved space on failure to cow range
When attempting to COW a file range (we are starting writeback and doing COW), if we manage to reserve an extent for the range we will write into but fail after reserving it and before creating the respective ordered extent, we end up in an error path where we attempt to decrement the data space's bytes_may_use counter after we already did it while reserving the extent, leading to a warning/trace like the following: [ 847.621524] ------------[ cut here ]------------ [ 847.625441] WARNING: CPU: 5 PID: 4905 at fs/btrfs/extent-tree.c:4316 btrfs_free_reserved_data_space_noquota+0x60/0x9f [btrfs] [ 847.633704] Modules linked in: btrfs crc32c_generic xor raid6_pq acpi_cpufreq i2c_piix4 ppdev psmouse tpm_tis serio_raw pcspkr parport_pc tpm_tis_core i2c_core sg [ 847.644616] CPU: 5 PID: 4905 Comm: xfs_io Not tainted 4.10.0-rc8-btrfs-next-37+ #2 [ 847.648601] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.1-0-gb3ef39f-prebuilt.qemu-project.org 04/01/2014 [ 847.648601] Call Trace: [ 847.648601] dump_stack+0x67/0x90 [ 847.648601] __warn+0xc2/0xdd [ 847.648601] warn_slowpath_null+0x1d/0x1f [ 847.648601] btrfs_free_reserved_data_space_noquota+0x60/0x9f [btrfs] [ 847.648601] btrfs_clear_bit_hook+0x140/0x258 [btrfs] [ 847.648601] clear_state_bit+0x87/0x128 [btrfs] [ 847.648601] __clear_extent_bit+0x222/0x2b7 [btrfs] [ 847.648601] clear_extent_bit+0x17/0x19 [btrfs] [ 847.648601] extent_clear_unlock_delalloc+0x3b/0x6b [btrfs] [ 847.648601] cow_file_range.isra.39+0x387/0x39a [btrfs] [ 847.648601] run_delalloc_nocow+0x4d7/0x70e [btrfs] [ 847.648601] ? arch_local_irq_save+0x9/0xc [ 847.648601] run_delalloc_range+0xa7/0x2b5 [btrfs] [ 847.648601] writepage_delalloc.isra.31+0xb9/0x15c [btrfs] [ 847.648601] __extent_writepage+0x249/0x2e8 [btrfs] [ 847.648601] extent_write_cache_pages.constprop.33+0x28b/0x36c [btrfs] [ 847.648601] ? arch_local_irq_save+0x9/0xc [ 847.648601] ? mark_lock+0x24/0x201 [ 847.648601] extent_writepages+0x4b/0x5c [btrfs] [ 847.648601] ? btrfs_writepage_start_hook+0xed/0xed [btrfs] [ 847.648601] btrfs_writepages+0x28/0x2a [btrfs] [ 847.648601] do_writepages+0x23/0x2c [ 847.648601] __filemap_fdatawrite_range+0x5a/0x61 [ 847.648601] filemap_fdatawrite_range+0x13/0x15 [ 847.648601] btrfs_fdatawrite_range+0x20/0x46 [btrfs] [ 847.648601] start_ordered_ops+0x19/0x23 [btrfs] [ 847.648601] btrfs_sync_file+0x136/0x42c [btrfs] [ 847.648601] vfs_fsync_range+0x8c/0x9e [ 847.648601] vfs_fsync+0x1c/0x1e [ 847.648601] do_fsync+0x31/0x4a [ 847.648601] SyS_fsync+0x10/0x14 [ 847.648601] entry_SYSCALL_64_fastpath+0x18/0xad [ 847.648601] RIP: 0033:0x7f5b05200800 [ 847.648601] RSP: 002b:00007ffe204f71c8 EFLAGS: 00000246 ORIG_RAX: 000000000000004a [ 847.648601] RAX: ffffffffffffffda RBX: ffffffff8109637b RCX: 00007f5b05200800 [ 847.648601] RDX: 00000000008bd0a0 RSI: 00000000008bd2e0 RDI: 0000000000000003 [ 847.648601] RBP: ffffc90001d67f98 R08: 000000000000ffff R09: 000000000000001f [ 847.648601] R10: 00000000000001f6 R11: 0000000000000246 R12: 0000000000000046 [ 847.648601] R13: ffffc90001d67f78 R14: 00007f5b054be740 R15: 00007f5b054be740 [ 847.648601] ? trace_hardirqs_off_caller+0x3f/0xaa [ 847.685787] ---[ end trace 2a4a3e15382508e8 ]--- So fix this by not attempting to decrement the data space info's bytes_may_use counter if we already reserved the extent and an error happened before creating the ordered extent. We are already correctly freeing the reserved extent if an error happens, so there's no additional measure needed. Signed-off-by: Filipe Manana <[email protected]> Reviewed-by: Liu Bo <[email protected]>
1 parent 5242726 commit a315e68

File tree

2 files changed

+45
-18
lines changed

2 files changed

+45
-18
lines changed

fs/btrfs/extent_io.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,16 @@
1414
#define EXTENT_DEFRAG (1U << 6)
1515
#define EXTENT_BOUNDARY (1U << 9)
1616
#define EXTENT_NODATASUM (1U << 10)
17-
#define EXTENT_DO_ACCOUNTING (1U << 11)
17+
#define EXTENT_CLEAR_META_RESV (1U << 11)
1818
#define EXTENT_FIRST_DELALLOC (1U << 12)
1919
#define EXTENT_NEED_WAIT (1U << 13)
2020
#define EXTENT_DAMAGED (1U << 14)
2121
#define EXTENT_NORESERVE (1U << 15)
2222
#define EXTENT_QGROUP_RESERVED (1U << 16)
2323
#define EXTENT_CLEAR_DATA_RESV (1U << 17)
2424
#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
25+
#define EXTENT_DO_ACCOUNTING (EXTENT_CLEAR_META_RESV | \
26+
EXTENT_CLEAR_DATA_RESV)
2527
#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC)
2628

2729
/*

fs/btrfs/inode.c

Lines changed: 42 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -943,10 +943,13 @@ static noinline int cow_file_range(struct inode *inode,
943943
u64 num_bytes;
944944
unsigned long ram_size;
945945
u64 disk_num_bytes;
946-
u64 cur_alloc_size;
946+
u64 cur_alloc_size = 0;
947947
u64 blocksize = fs_info->sectorsize;
948948
struct btrfs_key ins;
949949
struct extent_map *em;
950+
unsigned clear_bits;
951+
unsigned long page_ops;
952+
bool extent_reserved = false;
950953
int ret = 0;
951954

952955
if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
@@ -991,14 +994,14 @@ static noinline int cow_file_range(struct inode *inode,
991994
start + num_bytes - 1, 0);
992995

993996
while (disk_num_bytes > 0) {
994-
unsigned long op;
995-
996997
cur_alloc_size = disk_num_bytes;
997998
ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
998999
fs_info->sectorsize, 0, alloc_hint,
9991000
&ins, 1, 1);
10001001
if (ret < 0)
10011002
goto out_unlock;
1003+
cur_alloc_size = ins.offset;
1004+
extent_reserved = true;
10021005

10031006
ram_size = ins.offset;
10041007
em = create_io_em(inode, start, ins.offset, /* len */
@@ -1013,7 +1016,6 @@ static noinline int cow_file_range(struct inode *inode,
10131016
goto out_reserve;
10141017
free_extent_map(em);
10151018

1016-
cur_alloc_size = ins.offset;
10171019
ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
10181020
ram_size, cur_alloc_size, 0);
10191021
if (ret)
@@ -1048,21 +1050,22 @@ static noinline int cow_file_range(struct inode *inode,
10481050
* Do set the Private2 bit so we know this page was properly
10491051
* setup for writepage
10501052
*/
1051-
op = unlock ? PAGE_UNLOCK : 0;
1052-
op |= PAGE_SET_PRIVATE2;
1053+
page_ops = unlock ? PAGE_UNLOCK : 0;
1054+
page_ops |= PAGE_SET_PRIVATE2;
10531055

10541056
extent_clear_unlock_delalloc(inode, start,
10551057
start + ram_size - 1,
10561058
delalloc_end, locked_page,
10571059
EXTENT_LOCKED | EXTENT_DELALLOC,
1058-
op);
1060+
page_ops);
10591061
if (disk_num_bytes < cur_alloc_size)
10601062
disk_num_bytes = 0;
10611063
else
10621064
disk_num_bytes -= cur_alloc_size;
10631065
num_bytes -= cur_alloc_size;
10641066
alloc_hint = ins.objectid + ins.offset;
10651067
start += cur_alloc_size;
1068+
extent_reserved = false;
10661069

10671070
/*
10681071
* btrfs_reloc_clone_csums() error, since start is increased
@@ -1081,12 +1084,35 @@ static noinline int cow_file_range(struct inode *inode,
10811084
btrfs_dec_block_group_reservations(fs_info, ins.objectid);
10821085
btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
10831086
out_unlock:
1087+
clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DEFRAG |
1088+
EXTENT_CLEAR_META_RESV;
1089+
page_ops = PAGE_UNLOCK | PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
1090+
PAGE_END_WRITEBACK;
1091+
/*
1092+
* If we reserved an extent for our delalloc range (or a subrange) and
1093+
* failed to create the respective ordered extent, then it means that
1094+
* when we reserved the extent we decremented the extent's size from
1095+
* the data space_info's bytes_may_use counter and incremented the
1096+
* space_info's bytes_reserved counter by the same amount. We must make
1097+
* sure extent_clear_unlock_delalloc() does not try to decrement again
1098+
* the data space_info's bytes_may_use counter, therefore we do not pass
1099+
* it the flag EXTENT_CLEAR_DATA_RESV.
1100+
*/
1101+
if (extent_reserved) {
1102+
extent_clear_unlock_delalloc(inode, start,
1103+
start + cur_alloc_size,
1104+
start + cur_alloc_size,
1105+
locked_page,
1106+
clear_bits,
1107+
page_ops);
1108+
start += cur_alloc_size;
1109+
if (start >= end)
1110+
goto out;
1111+
}
10841112
extent_clear_unlock_delalloc(inode, start, end, delalloc_end,
10851113
locked_page,
1086-
EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
1087-
EXTENT_DELALLOC | EXTENT_DEFRAG,
1088-
PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
1089-
PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK);
1114+
clear_bits | EXTENT_CLEAR_DATA_RESV,
1115+
page_ops);
10901116
goto out;
10911117
}
10921118

@@ -1776,7 +1802,7 @@ static void btrfs_clear_bit_hook(struct btrfs_inode *inode,
17761802

17771803
if (*bits & EXTENT_FIRST_DELALLOC) {
17781804
*bits &= ~EXTENT_FIRST_DELALLOC;
1779-
} else if (!(*bits & EXTENT_DO_ACCOUNTING)) {
1805+
} else if (!(*bits & EXTENT_CLEAR_META_RESV)) {
17801806
spin_lock(&inode->lock);
17811807
inode->outstanding_extents -= num_extents;
17821808
spin_unlock(&inode->lock);
@@ -1787,18 +1813,17 @@ static void btrfs_clear_bit_hook(struct btrfs_inode *inode,
17871813
* don't need to call dellalloc_release_metadata if there is an
17881814
* error.
17891815
*/
1790-
if (*bits & EXTENT_DO_ACCOUNTING &&
1816+
if (*bits & EXTENT_CLEAR_META_RESV &&
17911817
root != fs_info->tree_root)
17921818
btrfs_delalloc_release_metadata(inode, len);
17931819

17941820
/* For sanity tests. */
17951821
if (btrfs_is_testing(fs_info))
17961822
return;
17971823

1798-
if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
1799-
&& do_list && !(state->state & EXTENT_NORESERVE)
1800-
&& (*bits & (EXTENT_DO_ACCOUNTING |
1801-
EXTENT_CLEAR_DATA_RESV)))
1824+
if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID &&
1825+
do_list && !(state->state & EXTENT_NORESERVE) &&
1826+
(*bits & EXTENT_CLEAR_DATA_RESV))
18021827
btrfs_free_reserved_data_space_noquota(
18031828
&inode->vfs_inode,
18041829
state->start, len);

0 commit comments

Comments
 (0)