Skip to content

Commit b2d5ad8

Browse files
committed
Merge branch 'for-linus-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs fixes from Chris Mason: "The important part of this pull is Filipe's set of fixes for btrfs device replacement. Filipe fixed a few issues seen on the list and a number he found on his own" * 'for-linus-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: Btrfs: deal with duplciates during extent_map insertion in btrfs_get_extent Btrfs: fix race between device replace and read repair Btrfs: fix race between device replace and discard Btrfs: fix race between device replace and chunk allocation Btrfs: fix race setting block group back to RW mode during device replace Btrfs: fix unprotected assignment of the left cursor for device replace Btrfs: fix race setting block group readonly during device replace Btrfs: fix race between device replace and block group removal Btrfs: fix race between readahead and device replace/removal
2 parents a3021a5 + 8dff9c8 commit b2d5ad8

File tree

8 files changed

+103
-18
lines changed

8 files changed

+103
-18
lines changed

fs/btrfs/extent-tree.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2042,6 +2042,11 @@ int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
20422042
struct btrfs_bio *bbio = NULL;
20432043

20442044

2045+
/*
2046+
* Avoid races with device replace and make sure our bbio has devices
2047+
* associated to its stripes that don't go away while we are discarding.
2048+
*/
2049+
btrfs_bio_counter_inc_blocked(root->fs_info);
20452050
/* Tell the block device(s) that the sectors can be discarded */
20462051
ret = btrfs_map_block(root->fs_info, REQ_DISCARD,
20472052
bytenr, &num_bytes, &bbio, 0);
@@ -2074,6 +2079,7 @@ int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
20742079
}
20752080
btrfs_put_bbio(bbio);
20762081
}
2082+
btrfs_bio_counter_dec(root->fs_info);
20772083

20782084
if (actual_bytes)
20792085
*actual_bytes = discarded_bytes;

fs/btrfs/extent_io.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2025,9 +2025,16 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
20252025
bio->bi_iter.bi_size = 0;
20262026
map_length = length;
20272027

2028+
/*
2029+
* Avoid races with device replace and make sure our bbio has devices
2030+
* associated to its stripes that don't go away while we are doing the
2031+
* read repair operation.
2032+
*/
2033+
btrfs_bio_counter_inc_blocked(fs_info);
20282034
ret = btrfs_map_block(fs_info, WRITE, logical,
20292035
&map_length, &bbio, mirror_num);
20302036
if (ret) {
2037+
btrfs_bio_counter_dec(fs_info);
20312038
bio_put(bio);
20322039
return -EIO;
20332040
}
@@ -2037,6 +2044,7 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
20372044
dev = bbio->stripes[mirror_num-1].dev;
20382045
btrfs_put_bbio(bbio);
20392046
if (!dev || !dev->bdev || !dev->writeable) {
2047+
btrfs_bio_counter_dec(fs_info);
20402048
bio_put(bio);
20412049
return -EIO;
20422050
}
@@ -2045,6 +2053,7 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
20452053

20462054
if (btrfsic_submit_bio_wait(WRITE_SYNC, bio)) {
20472055
/* try to remap that extent elsewhere? */
2056+
btrfs_bio_counter_dec(fs_info);
20482057
bio_put(bio);
20492058
btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
20502059
return -EIO;
@@ -2054,6 +2063,7 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
20542063
"read error corrected: ino %llu off %llu (dev %s sector %llu)",
20552064
btrfs_ino(inode), start,
20562065
rcu_str_deref(dev->name), sector);
2066+
btrfs_bio_counter_dec(fs_info);
20572067
bio_put(bio);
20582068
return 0;
20592069
}

fs/btrfs/inode.c

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6979,7 +6979,18 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
69796979
* existing will always be non-NULL, since there must be
69806980
* extent causing the -EEXIST.
69816981
*/
6982-
if (start >= extent_map_end(existing) ||
6982+
if (existing->start == em->start &&
6983+
extent_map_end(existing) == extent_map_end(em) &&
6984+
em->block_start == existing->block_start) {
6985+
/*
6986+
* these two extents are the same, it happens
6987+
* with inlines especially
6988+
*/
6989+
free_extent_map(em);
6990+
em = existing;
6991+
err = 0;
6992+
6993+
} else if (start >= extent_map_end(existing) ||
69836994
start <= existing->start) {
69846995
/*
69856996
* The existing extent map is the one nearest to

fs/btrfs/ordered-data.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -718,12 +718,13 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr,
718718
return count;
719719
}
720720

721-
void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
721+
int btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
722722
const u64 range_start, const u64 range_len)
723723
{
724724
struct btrfs_root *root;
725725
struct list_head splice;
726726
int done;
727+
int total_done = 0;
727728

728729
INIT_LIST_HEAD(&splice);
729730

@@ -742,6 +743,7 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
742743
done = btrfs_wait_ordered_extents(root, nr,
743744
range_start, range_len);
744745
btrfs_put_fs_root(root);
746+
total_done += done;
745747

746748
spin_lock(&fs_info->ordered_root_lock);
747749
if (nr != -1) {
@@ -752,6 +754,8 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
752754
list_splice_tail(&splice, &fs_info->ordered_roots);
753755
spin_unlock(&fs_info->ordered_root_lock);
754756
mutex_unlock(&fs_info->ordered_operations_mutex);
757+
758+
return total_done;
755759
}
756760

757761
/*

fs/btrfs/ordered-data.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
199199
u32 *sum, int len);
200200
int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr,
201201
const u64 range_start, const u64 range_len);
202-
void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
202+
int btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
203203
const u64 range_start, const u64 range_len);
204204
void btrfs_get_logged_extents(struct inode *inode,
205205
struct list_head *logged_list,

fs/btrfs/reada.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -761,12 +761,14 @@ static void __reada_start_machine(struct btrfs_fs_info *fs_info)
761761

762762
do {
763763
enqueued = 0;
764+
mutex_lock(&fs_devices->device_list_mutex);
764765
list_for_each_entry(device, &fs_devices->devices, dev_list) {
765766
if (atomic_read(&device->reada_in_flight) <
766767
MAX_IN_FLIGHT)
767768
enqueued += reada_start_machine_dev(fs_info,
768769
device);
769770
}
771+
mutex_unlock(&fs_devices->device_list_mutex);
770772
total += enqueued;
771773
} while (enqueued && total < 10000);
772774

fs/btrfs/scrub.c

Lines changed: 47 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3582,6 +3582,46 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
35823582
*/
35833583
scrub_pause_on(fs_info);
35843584
ret = btrfs_inc_block_group_ro(root, cache);
3585+
if (!ret && is_dev_replace) {
3586+
/*
3587+
* If we are doing a device replace wait for any tasks
3588+
* that started dellaloc right before we set the block
3589+
* group to RO mode, as they might have just allocated
3590+
* an extent from it or decided they could do a nocow
3591+
* write. And if any such tasks did that, wait for their
3592+
* ordered extents to complete and then commit the
3593+
* current transaction, so that we can later see the new
3594+
* extent items in the extent tree - the ordered extents
3595+
* create delayed data references (for cow writes) when
3596+
* they complete, which will be run and insert the
3597+
* corresponding extent items into the extent tree when
3598+
* we commit the transaction they used when running
3599+
* inode.c:btrfs_finish_ordered_io(). We later use
3600+
* the commit root of the extent tree to find extents
3601+
* to copy from the srcdev into the tgtdev, and we don't
3602+
* want to miss any new extents.
3603+
*/
3604+
btrfs_wait_block_group_reservations(cache);
3605+
btrfs_wait_nocow_writers(cache);
3606+
ret = btrfs_wait_ordered_roots(fs_info, -1,
3607+
cache->key.objectid,
3608+
cache->key.offset);
3609+
if (ret > 0) {
3610+
struct btrfs_trans_handle *trans;
3611+
3612+
trans = btrfs_join_transaction(root);
3613+
if (IS_ERR(trans))
3614+
ret = PTR_ERR(trans);
3615+
else
3616+
ret = btrfs_commit_transaction(trans,
3617+
root);
3618+
if (ret) {
3619+
scrub_pause_off(fs_info);
3620+
btrfs_put_block_group(cache);
3621+
break;
3622+
}
3623+
}
3624+
}
35853625
scrub_pause_off(fs_info);
35863626

35873627
if (ret == 0) {
@@ -3602,9 +3642,11 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
36023642
break;
36033643
}
36043644

3645+
btrfs_dev_replace_lock(&fs_info->dev_replace, 1);
36053646
dev_replace->cursor_right = found_key.offset + length;
36063647
dev_replace->cursor_left = found_key.offset;
36073648
dev_replace->item_needs_writeback = 1;
3649+
btrfs_dev_replace_unlock(&fs_info->dev_replace, 1);
36083650
ret = scrub_chunk(sctx, scrub_dev, chunk_offset, length,
36093651
found_key.offset, cache, is_dev_replace);
36103652

@@ -3640,6 +3682,11 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
36403682

36413683
scrub_pause_off(fs_info);
36423684

3685+
btrfs_dev_replace_lock(&fs_info->dev_replace, 1);
3686+
dev_replace->cursor_left = dev_replace->cursor_right;
3687+
dev_replace->item_needs_writeback = 1;
3688+
btrfs_dev_replace_unlock(&fs_info->dev_replace, 1);
3689+
36433690
if (ro_set)
36443691
btrfs_dec_block_group_ro(root, cache);
36453692

@@ -3677,9 +3724,6 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
36773724
ret = -ENOMEM;
36783725
break;
36793726
}
3680-
3681-
dev_replace->cursor_left = dev_replace->cursor_right;
3682-
dev_replace->item_needs_writeback = 1;
36833727
skip:
36843728
key.offset = found_key.offset + length;
36853729
btrfs_release_path(path);

fs/btrfs/volumes.c

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2761,6 +2761,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
27612761
u64 dev_extent_len = 0;
27622762
u64 chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
27632763
int i, ret = 0;
2764+
struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
27642765

27652766
/* Just in case */
27662767
root = root->fs_info->chunk_root;
@@ -2787,12 +2788,19 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
27872788
check_system_chunk(trans, extent_root, map->type);
27882789
unlock_chunks(root->fs_info->chunk_root);
27892790

2791+
/*
2792+
* Take the device list mutex to prevent races with the final phase of
2793+
* a device replace operation that replaces the device object associated
2794+
* with map stripes (dev-replace.c:btrfs_dev_replace_finishing()).
2795+
*/
2796+
mutex_lock(&fs_devices->device_list_mutex);
27902797
for (i = 0; i < map->num_stripes; i++) {
27912798
struct btrfs_device *device = map->stripes[i].dev;
27922799
ret = btrfs_free_dev_extent(trans, device,
27932800
map->stripes[i].physical,
27942801
&dev_extent_len);
27952802
if (ret) {
2803+
mutex_unlock(&fs_devices->device_list_mutex);
27962804
btrfs_abort_transaction(trans, root, ret);
27972805
goto out;
27982806
}
@@ -2811,11 +2819,14 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
28112819
if (map->stripes[i].dev) {
28122820
ret = btrfs_update_device(trans, map->stripes[i].dev);
28132821
if (ret) {
2822+
mutex_unlock(&fs_devices->device_list_mutex);
28142823
btrfs_abort_transaction(trans, root, ret);
28152824
goto out;
28162825
}
28172826
}
28182827
}
2828+
mutex_unlock(&fs_devices->device_list_mutex);
2829+
28192830
ret = btrfs_free_chunk(trans, root, chunk_objectid, chunk_offset);
28202831
if (ret) {
28212832
btrfs_abort_transaction(trans, root, ret);
@@ -5762,20 +5773,17 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
57625773
}
57635774
}
57645775
if (found) {
5765-
if (physical_of_found + map->stripe_len <=
5766-
dev_replace->cursor_left) {
5767-
struct btrfs_bio_stripe *tgtdev_stripe =
5768-
bbio->stripes + num_stripes;
5776+
struct btrfs_bio_stripe *tgtdev_stripe =
5777+
bbio->stripes + num_stripes;
57695778

5770-
tgtdev_stripe->physical = physical_of_found;
5771-
tgtdev_stripe->length =
5772-
bbio->stripes[index_srcdev].length;
5773-
tgtdev_stripe->dev = dev_replace->tgtdev;
5774-
bbio->tgtdev_map[index_srcdev] = num_stripes;
5779+
tgtdev_stripe->physical = physical_of_found;
5780+
tgtdev_stripe->length =
5781+
bbio->stripes[index_srcdev].length;
5782+
tgtdev_stripe->dev = dev_replace->tgtdev;
5783+
bbio->tgtdev_map[index_srcdev] = num_stripes;
57755784

5776-
tgtdev_indexes++;
5777-
num_stripes++;
5778-
}
5785+
tgtdev_indexes++;
5786+
num_stripes++;
57795787
}
57805788
}
57815789

0 commit comments

Comments
 (0)