Skip to content

Commit a1ed835

Browse files
committed
Btrfs: Fix extent replacment race
Data COW means that whenever we write to a file, we replace any old extent pointers with new ones. There was a window where a readpage might find the old extent pointers on disk and cache them in the extent_map tree in ram in the middle of a given write replacing them. Even though both the readpage and the write had their respective bytes in the file locked, the extent readpage inserts may cover more bytes than it had locked down. This commit closes the race by keeping the new extent pinned in the extent map tree until after the on-disk btree is properly setup with the new extent pointers. Signed-off-by: Chris Mason <[email protected]>
1 parent 8b62b72 commit a1ed835

File tree

7 files changed

+80
-13
lines changed

7 files changed

+80
-13
lines changed

fs/btrfs/ctree.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2292,7 +2292,7 @@ extern struct file_operations btrfs_file_operations;
22922292
int btrfs_drop_extents(struct btrfs_trans_handle *trans,
22932293
struct btrfs_root *root, struct inode *inode,
22942294
u64 start, u64 end, u64 locked_end,
2295-
u64 inline_limit, u64 *hint_block);
2295+
u64 inline_limit, u64 *hint_block, int drop_cache);
22962296
int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
22972297
struct btrfs_root *root,
22982298
struct inode *inode, u64 start, u64 end);

fs/btrfs/extent_map.c

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,56 @@ static int mergable_maps(struct extent_map *prev, struct extent_map *next)
198198
return 0;
199199
}
200200

201+
int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len)
202+
{
203+
int ret = 0;
204+
struct extent_map *merge = NULL;
205+
struct rb_node *rb;
206+
struct extent_map *em;
207+
208+
write_lock(&tree->lock);
209+
em = lookup_extent_mapping(tree, start, len);
210+
211+
WARN_ON(em->start != start || !em);
212+
213+
if (!em)
214+
goto out;
215+
216+
clear_bit(EXTENT_FLAG_PINNED, &em->flags);
217+
218+
if (em->start != 0) {
219+
rb = rb_prev(&em->rb_node);
220+
if (rb)
221+
merge = rb_entry(rb, struct extent_map, rb_node);
222+
if (rb && mergable_maps(merge, em)) {
223+
em->start = merge->start;
224+
em->len += merge->len;
225+
em->block_len += merge->block_len;
226+
em->block_start = merge->block_start;
227+
merge->in_tree = 0;
228+
rb_erase(&merge->rb_node, &tree->map);
229+
free_extent_map(merge);
230+
}
231+
}
232+
233+
rb = rb_next(&em->rb_node);
234+
if (rb)
235+
merge = rb_entry(rb, struct extent_map, rb_node);
236+
if (rb && mergable_maps(em, merge)) {
237+
em->len += merge->len;
238+
em->block_len += merge->len;
239+
rb_erase(&merge->rb_node, &tree->map);
240+
merge->in_tree = 0;
241+
free_extent_map(merge);
242+
}
243+
244+
free_extent_map(em);
245+
out:
246+
write_unlock(&tree->lock);
247+
return ret;
248+
249+
}
250+
201251
/**
202252
* add_extent_mapping - add new extent map to the extent tree
203253
* @tree: tree to insert new map in

fs/btrfs/extent_map.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,4 +59,5 @@ struct extent_map *alloc_extent_map(gfp_t mask);
5959
void free_extent_map(struct extent_map *em);
6060
int __init extent_map_init(void);
6161
void extent_map_exit(void);
62+
int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len);
6263
#endif

fs/btrfs/file.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -177,10 +177,10 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
177177
}
178178
flags = em->flags;
179179
if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
180-
write_unlock(&em_tree->lock);
181180
if (em->start <= start &&
182181
(!testend || em->start + em->len >= start + len)) {
183182
free_extent_map(em);
183+
write_unlock(&em_tree->lock);
184184
break;
185185
}
186186
if (start < em->start) {
@@ -190,6 +190,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
190190
start = em->start + em->len;
191191
}
192192
free_extent_map(em);
193+
write_unlock(&em_tree->lock);
193194
continue;
194195
}
195196
compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
@@ -269,7 +270,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
269270
noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans,
270271
struct btrfs_root *root, struct inode *inode,
271272
u64 start, u64 end, u64 locked_end,
272-
u64 inline_limit, u64 *hint_byte)
273+
u64 inline_limit, u64 *hint_byte, int drop_cache)
273274
{
274275
u64 extent_end = 0;
275276
u64 search_start = start;
@@ -294,7 +295,8 @@ noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans,
294295
int ret;
295296

296297
inline_limit = 0;
297-
btrfs_drop_extent_cache(inode, start, end - 1, 0);
298+
if (drop_cache)
299+
btrfs_drop_extent_cache(inode, start, end - 1, 0);
298300

299301
path = btrfs_alloc_path();
300302
if (!path)

fs/btrfs/inode.c

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,8 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
232232
}
233233

234234
ret = btrfs_drop_extents(trans, root, inode, start,
235-
aligned_end, aligned_end, start, &hint_byte);
235+
aligned_end, aligned_end, start,
236+
&hint_byte, 1);
236237
BUG_ON(ret);
237238

238239
if (isize > actual_end)
@@ -241,7 +242,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
241242
inline_len, compressed_size,
242243
compressed_pages);
243244
BUG_ON(ret);
244-
btrfs_drop_extent_cache(inode, start, aligned_end, 0);
245+
btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
245246
return 0;
246247
}
247248

@@ -1455,9 +1456,19 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
14551456
BUG_ON(!path);
14561457

14571458
path->leave_spinning = 1;
1459+
1460+
/*
1461+
* we may be replacing one extent in the tree with another.
1462+
* The new extent is pinned in the extent map, and we don't want
1463+
* to drop it from the cache until it is completely in the btree.
1464+
*
1465+
* So, tell btrfs_drop_extents to leave this extent in the cache.
1466+
* the caller is expected to unpin it and allow it to be merged
1467+
* with the others.
1468+
*/
14581469
ret = btrfs_drop_extents(trans, root, inode, file_pos,
14591470
file_pos + num_bytes, locked_end,
1460-
file_pos, &hint);
1471+
file_pos, &hint, 0);
14611472
BUG_ON(ret);
14621473

14631474
ins.objectid = inode->i_ino;
@@ -1485,7 +1496,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
14851496
btrfs_mark_buffer_dirty(leaf);
14861497

14871498
inode_add_bytes(inode, num_bytes);
1488-
btrfs_drop_extent_cache(inode, file_pos, file_pos + num_bytes - 1, 0);
14891499

14901500
ins.objectid = disk_bytenr;
14911501
ins.offset = disk_num_bytes;
@@ -1596,6 +1606,9 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
15961606
ordered_extent->len,
15971607
compressed, 0, 0,
15981608
BTRFS_FILE_EXTENT_REG);
1609+
unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
1610+
ordered_extent->file_offset,
1611+
ordered_extent->len);
15991612
BUG_ON(ret);
16001613
}
16011614
unlock_extent(io_tree, ordered_extent->file_offset,
@@ -2940,7 +2953,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
29402953
cur_offset,
29412954
cur_offset + hole_size,
29422955
block_end,
2943-
cur_offset, &hint_byte);
2956+
cur_offset, &hint_byte, 1);
29442957
if (err)
29452958
break;
29462959
err = btrfs_insert_file_extent(trans, root,
@@ -5086,6 +5099,8 @@ static int prealloc_file_range(struct btrfs_trans_handle *trans,
50865099
0, 0, 0,
50875100
BTRFS_FILE_EXTENT_PREALLOC);
50885101
BUG_ON(ret);
5102+
btrfs_drop_extent_cache(inode, cur_offset,
5103+
cur_offset + ins.offset -1, 0);
50895104
num_bytes -= ins.offset;
50905105
cur_offset += ins.offset;
50915106
alloc_hint = ins.objectid + ins.offset;

fs/btrfs/ioctl.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -597,9 +597,8 @@ static int btrfs_defrag_file(struct file *file)
597597
clear_page_dirty_for_io(page);
598598

599599
btrfs_set_extent_delalloc(inode, page_start, page_end);
600-
601-
unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
602600
set_page_dirty(page);
601+
unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
603602
unlock_page(page);
604603
page_cache_release(page);
605604
balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1);
@@ -977,7 +976,7 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
977976

978977
/* punch hole in destination first */
979978
btrfs_drop_extents(trans, root, inode, off, off + len,
980-
off + len, 0, &hint_byte);
979+
off + len, 0, &hint_byte, 1);
981980

982981
/* clone data */
983982
key.objectid = src->i_ino;

fs/btrfs/tree-log.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -534,7 +534,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
534534
saved_nbytes = inode_get_bytes(inode);
535535
/* drop any overlapping extents */
536536
ret = btrfs_drop_extents(trans, root, inode,
537-
start, extent_end, extent_end, start, &alloc_hint);
537+
start, extent_end, extent_end, start, &alloc_hint, 1);
538538
BUG_ON(ret);
539539

540540
if (found_type == BTRFS_FILE_EXTENT_REG ||

0 commit comments

Comments
 (0)