Skip to content

Commit f7ef528

Browse files
naotakdave
authored andcommitted
btrfs: zoned: relocate block group to repair IO failure in zoned filesystems
When a bad checksum is found and if the filesystem has a mirror of the damaged data, we read the correct data from the mirror and writes it to damaged blocks. This however, violates the sequential write constraints of a zoned block device. We can consider three methods to repair an IO failure in zoned filesystems: (1) Reset and rewrite the damaged zone (2) Allocate new device extent and replace the damaged device extent to the new extent (3) Relocate the corresponding block group Method (1) is most similar to a behavior done with regular devices. However, it also wipes non-damaged data in the same device extent, and so it unnecessary degrades non-damaged data. Method (2) is much like device replacing but done in the same device. It is safe because it keeps the device extent until the replacing finish. However, extending device replacing is non-trivial. It assumes "src_dev->physical == dst_dev->physical". Also, the extent mapping replacing function should be extended to support replacing device extent position in one device. Method (3) invokes relocation of the damaged block group and is straightforward to implement. It relocates all the mirrored device extents, so it potentially is a more costly operation than method (1) or (2). But it relocates only used extents which reduce the total IO size. Let's apply method (3) for now. In the future, we can extend device-replace and apply method (2). For protecting a block group gets relocated multiple time with multiple IO errors, this commit introduces "relocating_repair" bit to show it's now relocating to repair IO failures. Also it uses a new kthread "btrfs-relocating-repair", not to block IO path with relocating process. This commit also supports repairing in the scrub process. Reviewed-by: Josef Bacik <[email protected]> Signed-off-by: Naohiro Aota <[email protected]> Signed-off-by: David Sterba <[email protected]>
1 parent 32430c6 commit f7ef528

File tree

5 files changed

+80
-0
lines changed

5 files changed

+80
-0
lines changed

fs/btrfs/block-group.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ struct btrfs_block_group {
9696
unsigned int has_caching_ctl:1;
9797
unsigned int removed:1;
9898
unsigned int to_copy:1;
99+
unsigned int relocating_repair:1;
99100

100101
int disk_cache_state;
101102

fs/btrfs/extent_io.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2260,6 +2260,9 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
22602260
ASSERT(!(fs_info->sb->s_flags & SB_RDONLY));
22612261
BUG_ON(!mirror_num);
22622262

2263+
if (btrfs_is_zoned(fs_info))
2264+
return btrfs_repair_one_zone(fs_info, logical);
2265+
22632266
bio = btrfs_io_bio_alloc(1);
22642267
bio->bi_iter.bi_size = 0;
22652268
map_length = length;

fs/btrfs/scrub.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -857,6 +857,9 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
857857
have_csum = sblock_to_check->pagev[0]->have_csum;
858858
dev = sblock_to_check->pagev[0]->dev;
859859

860+
if (btrfs_is_zoned(fs_info) && !sctx->is_dev_replace)
861+
return btrfs_repair_one_zone(fs_info, logical);
862+
860863
/*
861864
* We must use GFP_NOFS because the scrub task might be waiting for a
862865
* worker task executing this function and in turn a transaction commit

fs/btrfs/volumes.c

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7980,3 +7980,75 @@ bool btrfs_pinned_by_swapfile(struct btrfs_fs_info *fs_info, void *ptr)
79807980
spin_unlock(&fs_info->swapfile_pins_lock);
79817981
return node != NULL;
79827982
}
7983+
7984+
static int relocating_repair_kthread(void *data)
7985+
{
7986+
struct btrfs_block_group *cache = (struct btrfs_block_group *)data;
7987+
struct btrfs_fs_info *fs_info = cache->fs_info;
7988+
u64 target;
7989+
int ret = 0;
7990+
7991+
target = cache->start;
7992+
btrfs_put_block_group(cache);
7993+
7994+
if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) {
7995+
btrfs_info(fs_info,
7996+
"zoned: skip relocating block group %llu to repair: EBUSY",
7997+
target);
7998+
return -EBUSY;
7999+
}
8000+
8001+
mutex_lock(&fs_info->delete_unused_bgs_mutex);
8002+
8003+
/* Ensure block group still exists */
8004+
cache = btrfs_lookup_block_group(fs_info, target);
8005+
if (!cache)
8006+
goto out;
8007+
8008+
if (!cache->relocating_repair)
8009+
goto out;
8010+
8011+
ret = btrfs_may_alloc_data_chunk(fs_info, target);
8012+
if (ret < 0)
8013+
goto out;
8014+
8015+
btrfs_info(fs_info,
8016+
"zoned: relocating block group %llu to repair IO failure",
8017+
target);
8018+
ret = btrfs_relocate_chunk(fs_info, target);
8019+
8020+
out:
8021+
if (cache)
8022+
btrfs_put_block_group(cache);
8023+
mutex_unlock(&fs_info->delete_unused_bgs_mutex);
8024+
btrfs_exclop_finish(fs_info);
8025+
8026+
return ret;
8027+
}
8028+
8029+
int btrfs_repair_one_zone(struct btrfs_fs_info *fs_info, u64 logical)
8030+
{
8031+
struct btrfs_block_group *cache;
8032+
8033+
/* Do not attempt to repair in degraded state */
8034+
if (btrfs_test_opt(fs_info, DEGRADED))
8035+
return 0;
8036+
8037+
cache = btrfs_lookup_block_group(fs_info, logical);
8038+
if (!cache)
8039+
return 0;
8040+
8041+
spin_lock(&cache->lock);
8042+
if (cache->relocating_repair) {
8043+
spin_unlock(&cache->lock);
8044+
btrfs_put_block_group(cache);
8045+
return 0;
8046+
}
8047+
cache->relocating_repair = 1;
8048+
spin_unlock(&cache->lock);
8049+
8050+
kthread_run(relocating_repair_kthread, cache,
8051+
"btrfs-relocating-repair");
8052+
8053+
return 0;
8054+
}

fs/btrfs/volumes.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -599,5 +599,6 @@ void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info,
599599
int btrfs_bg_type_to_factor(u64 flags);
600600
const char *btrfs_bg_type_to_raid_name(u64 flags);
601601
int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info);
602+
int btrfs_repair_one_zone(struct btrfs_fs_info *fs_info, u64 logical);
602603

603604
#endif

0 commit comments

Comments
 (0)