Skip to content

Commit 9ecb5ef

Browse files
adam900710kdave
authored andcommitted
btrfs: scrub: introduce the main read repair worker for scrub_stripe
The new helper, scrub_stripe_read_repair_worker(), would handle the read-repair part: - Wait for the previous submitted read IO to finish - Verify the contents of the stripe - Go through the remaining mirrors, using as large blocksize as possible At this stage, we just read out all the failed sectors from each mirror and re-verify. If no more failed sector, we can exit. - Go through all mirrors again, sector-by-sector This time, we read sector by sector, this is to address cases where one bad sector mismatches the drive's internal checksum, and cause the whole read range to fail. We put this recovery method as the last resort, as sector-by-sector reading is slow, and reading from other mirrors may have already fixed the errors. Signed-off-by: Qu Wenruo <[email protected]> Reviewed-by: David Sterba <[email protected]> Signed-off-by: David Sterba <[email protected]>
1 parent 97cf8f3 commit 9ecb5ef

File tree

2 files changed

+204
-4
lines changed

2 files changed

+204
-4
lines changed

fs/btrfs/scrub.c

Lines changed: 203 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ struct scrub_stripe {
121121

122122
atomic_t pending_io;
123123
wait_queue_head_t io_wait;
124+
wait_queue_head_t repair_wait;
124125

125126
/*
126127
* Indicate the states of the stripe. Bits are defined in
@@ -156,6 +157,8 @@ struct scrub_stripe {
156157
* group.
157158
*/
158159
u8 *csums;
160+
161+
struct work_struct work;
159162
};
160163

161164
struct scrub_recover {
@@ -381,6 +384,7 @@ int init_scrub_stripe(struct btrfs_fs_info *fs_info, struct scrub_stripe *stripe
381384
stripe->state = 0;
382385

383386
init_waitqueue_head(&stripe->io_wait);
387+
init_waitqueue_head(&stripe->repair_wait);
384388
atomic_set(&stripe->pending_io, 0);
385389

386390
ret = btrfs_alloc_page_array(SCRUB_STRIPE_PAGES, stripe->pages);
@@ -403,7 +407,7 @@ int init_scrub_stripe(struct btrfs_fs_info *fs_info, struct scrub_stripe *stripe
403407
return -ENOMEM;
404408
}
405409

406-
void wait_scrub_stripe_io(struct scrub_stripe *stripe)
410+
static void wait_scrub_stripe_io(struct scrub_stripe *stripe)
407411
{
408412
wait_event(stripe->io_wait, atomic_read(&stripe->pending_io) == 0);
409413
}
@@ -2327,7 +2331,7 @@ static void scrub_verify_one_sector(struct scrub_stripe *stripe, int sector_nr)
23272331
}
23282332

23292333
/* Verify specified sectors of a stripe. */
2330-
void scrub_verify_one_stripe(struct scrub_stripe *stripe, unsigned long bitmap)
2334+
static void scrub_verify_one_stripe(struct scrub_stripe *stripe, unsigned long bitmap)
23312335
{
23322336
struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
23332337
const u32 sectors_per_tree = fs_info->nodesize >> fs_info->sectorsize_bits;
@@ -2340,6 +2344,203 @@ void scrub_verify_one_stripe(struct scrub_stripe *stripe, unsigned long bitmap)
23402344
}
23412345
}
23422346

2347+
static int calc_sector_number(struct scrub_stripe *stripe, struct bio_vec *first_bvec)
2348+
{
2349+
int i;
2350+
2351+
for (i = 0; i < stripe->nr_sectors; i++) {
2352+
if (scrub_stripe_get_page(stripe, i) == first_bvec->bv_page &&
2353+
scrub_stripe_get_page_offset(stripe, i) == first_bvec->bv_offset)
2354+
break;
2355+
}
2356+
ASSERT(i < stripe->nr_sectors);
2357+
return i;
2358+
}
2359+
2360+
/*
2361+
* Repair read is different to the regular read:
2362+
*
2363+
* - Only reads the failed sectors
2364+
* - May have extra blocksize limits
2365+
*/
2366+
static void scrub_repair_read_endio(struct btrfs_bio *bbio)
2367+
{
2368+
struct scrub_stripe *stripe = bbio->private;
2369+
struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
2370+
struct bio_vec *bvec;
2371+
int sector_nr = calc_sector_number(stripe, bio_first_bvec_all(&bbio->bio));
2372+
u32 bio_size = 0;
2373+
int i;
2374+
2375+
ASSERT(sector_nr < stripe->nr_sectors);
2376+
2377+
bio_for_each_bvec_all(bvec, &bbio->bio, i)
2378+
bio_size += bvec->bv_len;
2379+
2380+
if (bbio->bio.bi_status) {
2381+
bitmap_set(&stripe->io_error_bitmap, sector_nr,
2382+
bio_size >> fs_info->sectorsize_bits);
2383+
bitmap_set(&stripe->error_bitmap, sector_nr,
2384+
bio_size >> fs_info->sectorsize_bits);
2385+
} else {
2386+
bitmap_clear(&stripe->io_error_bitmap, sector_nr,
2387+
bio_size >> fs_info->sectorsize_bits);
2388+
}
2389+
bio_put(&bbio->bio);
2390+
if (atomic_dec_and_test(&stripe->pending_io))
2391+
wake_up(&stripe->io_wait);
2392+
}
2393+
2394+
static int calc_next_mirror(int mirror, int num_copies)
2395+
{
2396+
ASSERT(mirror <= num_copies);
2397+
return (mirror + 1 > num_copies) ? 1 : mirror + 1;
2398+
}
2399+
2400+
static void scrub_stripe_submit_repair_read(struct scrub_stripe *stripe,
2401+
int mirror, int blocksize, bool wait)
2402+
{
2403+
struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
2404+
struct btrfs_bio *bbio = NULL;
2405+
const unsigned long old_error_bitmap = stripe->error_bitmap;
2406+
int i;
2407+
2408+
ASSERT(stripe->mirror_num >= 1);
2409+
ASSERT(atomic_read(&stripe->pending_io) == 0);
2410+
2411+
for_each_set_bit(i, &old_error_bitmap, stripe->nr_sectors) {
2412+
struct page *page;
2413+
int pgoff;
2414+
int ret;
2415+
2416+
page = scrub_stripe_get_page(stripe, i);
2417+
pgoff = scrub_stripe_get_page_offset(stripe, i);
2418+
2419+
/* The current sector cannot be merged, submit the bio. */
2420+
if (bbio && ((i > 0 && !test_bit(i - 1, &stripe->error_bitmap)) ||
2421+
bbio->bio.bi_iter.bi_size >= blocksize)) {
2422+
ASSERT(bbio->bio.bi_iter.bi_size);
2423+
atomic_inc(&stripe->pending_io);
2424+
btrfs_submit_bio(bbio, mirror);
2425+
if (wait)
2426+
wait_scrub_stripe_io(stripe);
2427+
bbio = NULL;
2428+
}
2429+
2430+
if (!bbio) {
2431+
bbio = btrfs_bio_alloc(stripe->nr_sectors, REQ_OP_READ,
2432+
fs_info, scrub_repair_read_endio, stripe);
2433+
bbio->bio.bi_iter.bi_sector = (stripe->logical +
2434+
(i << fs_info->sectorsize_bits)) >> SECTOR_SHIFT;
2435+
}
2436+
2437+
ret = bio_add_page(&bbio->bio, page, fs_info->sectorsize, pgoff);
2438+
ASSERT(ret == fs_info->sectorsize);
2439+
}
2440+
if (bbio) {
2441+
ASSERT(bbio->bio.bi_iter.bi_size);
2442+
atomic_inc(&stripe->pending_io);
2443+
btrfs_submit_bio(bbio, mirror);
2444+
if (wait)
2445+
wait_scrub_stripe_io(stripe);
2446+
}
2447+
}
2448+
2449+
/*
2450+
* The main entrance for all read related scrub work, including:
2451+
*
2452+
* - Wait for the initial read to finish
2453+
* - Verify and locate any bad sectors
2454+
* - Go through the remaining mirrors and try to read as large blocksize as
2455+
* possible
2456+
* - Go through all mirrors (including the failed mirror) sector-by-sector
2457+
*
2458+
* Writeback does not happen here, it needs extra synchronization.
2459+
*/
2460+
static void scrub_stripe_read_repair_worker(struct work_struct *work)
2461+
{
2462+
struct scrub_stripe *stripe = container_of(work, struct scrub_stripe, work);
2463+
struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
2464+
int num_copies = btrfs_num_copies(fs_info, stripe->bg->start,
2465+
stripe->bg->length);
2466+
int mirror;
2467+
int i;
2468+
2469+
ASSERT(stripe->mirror_num > 0);
2470+
2471+
wait_scrub_stripe_io(stripe);
2472+
scrub_verify_one_stripe(stripe, stripe->extent_sector_bitmap);
2473+
/* Save the initial failed bitmap for later repair and report usage. */
2474+
stripe->init_error_bitmap = stripe->error_bitmap;
2475+
2476+
if (bitmap_empty(&stripe->init_error_bitmap, stripe->nr_sectors))
2477+
goto out;
2478+
2479+
/*
2480+
* Try all remaining mirrors.
2481+
*
2482+
* Here we still try to read as large block as possible, as this is
2483+
* faster and we have extra safety nets to rely on.
2484+
*/
2485+
for (mirror = calc_next_mirror(stripe->mirror_num, num_copies);
2486+
mirror != stripe->mirror_num;
2487+
mirror = calc_next_mirror(mirror, num_copies)) {
2488+
const unsigned long old_error_bitmap = stripe->error_bitmap;
2489+
2490+
scrub_stripe_submit_repair_read(stripe, mirror,
2491+
BTRFS_STRIPE_LEN, false);
2492+
wait_scrub_stripe_io(stripe);
2493+
scrub_verify_one_stripe(stripe, old_error_bitmap);
2494+
if (bitmap_empty(&stripe->error_bitmap, stripe->nr_sectors))
2495+
goto out;
2496+
}
2497+
2498+
/*
2499+
* Last safety net, try re-checking all mirrors, including the failed
2500+
* one, sector-by-sector.
2501+
*
2502+
* As if one sector failed the drive's internal csum, the whole read
2503+
* containing the offending sector would be marked as error.
2504+
* Thus here we do sector-by-sector read.
2505+
*
2506+
* This can be slow, thus we only try it as the last resort.
2507+
*/
2508+
2509+
for (i = 0, mirror = stripe->mirror_num;
2510+
i < num_copies;
2511+
i++, mirror = calc_next_mirror(mirror, num_copies)) {
2512+
const unsigned long old_error_bitmap = stripe->error_bitmap;
2513+
2514+
scrub_stripe_submit_repair_read(stripe, mirror,
2515+
fs_info->sectorsize, true);
2516+
wait_scrub_stripe_io(stripe);
2517+
scrub_verify_one_stripe(stripe, old_error_bitmap);
2518+
if (bitmap_empty(&stripe->error_bitmap, stripe->nr_sectors))
2519+
goto out;
2520+
}
2521+
out:
2522+
set_bit(SCRUB_STRIPE_FLAG_REPAIR_DONE, &stripe->state);
2523+
wake_up(&stripe->repair_wait);
2524+
}
2525+
2526+
void scrub_read_endio(struct btrfs_bio *bbio)
2527+
{
2528+
struct scrub_stripe *stripe = bbio->private;
2529+
2530+
if (bbio->bio.bi_status) {
2531+
bitmap_set(&stripe->io_error_bitmap, 0, stripe->nr_sectors);
2532+
bitmap_set(&stripe->error_bitmap, 0, stripe->nr_sectors);
2533+
} else {
2534+
bitmap_clear(&stripe->io_error_bitmap, 0, stripe->nr_sectors);
2535+
}
2536+
bio_put(&bbio->bio);
2537+
if (atomic_dec_and_test(&stripe->pending_io)) {
2538+
wake_up(&stripe->io_wait);
2539+
INIT_WORK(&stripe->work, scrub_stripe_read_repair_worker);
2540+
queue_work(stripe->bg->fs_info->scrub_workers, &stripe->work);
2541+
}
2542+
}
2543+
23432544
static int scrub_checksum_tree_block(struct scrub_block *sblock)
23442545
{
23452546
struct scrub_ctx *sctx = sblock->sctx;

fs/btrfs/scrub.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,10 @@ int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid,
1919
*/
2020
struct scrub_stripe;
2121
int init_scrub_stripe(struct btrfs_fs_info *fs_info, struct scrub_stripe *stripe);
22-
void wait_scrub_stripe_io(struct scrub_stripe *stripe);
2322
int scrub_find_fill_first_stripe(struct btrfs_block_group *bg,
2423
struct btrfs_device *dev, u64 physical,
2524
int mirror_num, u64 logical_start,
2625
u32 logical_len, struct scrub_stripe *stripe);
27-
void scrub_verify_one_stripe(struct scrub_stripe *stripe, unsigned long bitmap);
26+
void scrub_read_endio(struct btrfs_bio *bbio);
2827

2928
#endif

0 commit comments

Comments
 (0)