@@ -121,6 +121,7 @@ struct scrub_stripe {
121
121
122
122
atomic_t pending_io ;
123
123
wait_queue_head_t io_wait ;
124
+ wait_queue_head_t repair_wait ;
124
125
125
126
/*
126
127
* Indicate the states of the stripe. Bits are defined in
@@ -156,6 +157,8 @@ struct scrub_stripe {
156
157
* group.
157
158
*/
158
159
u8 * csums ;
160
+
161
+ struct work_struct work ;
159
162
};
160
163
161
164
struct scrub_recover {
@@ -381,6 +384,7 @@ int init_scrub_stripe(struct btrfs_fs_info *fs_info, struct scrub_stripe *stripe
381
384
stripe -> state = 0 ;
382
385
383
386
init_waitqueue_head (& stripe -> io_wait );
387
+ init_waitqueue_head (& stripe -> repair_wait );
384
388
atomic_set (& stripe -> pending_io , 0 );
385
389
386
390
ret = btrfs_alloc_page_array (SCRUB_STRIPE_PAGES , stripe -> pages );
@@ -403,7 +407,7 @@ int init_scrub_stripe(struct btrfs_fs_info *fs_info, struct scrub_stripe *stripe
403
407
return - ENOMEM ;
404
408
}
405
409
406
- void wait_scrub_stripe_io (struct scrub_stripe * stripe )
410
+ static void wait_scrub_stripe_io (struct scrub_stripe * stripe )
407
411
{
408
412
wait_event (stripe -> io_wait , atomic_read (& stripe -> pending_io ) == 0 );
409
413
}
@@ -2327,7 +2331,7 @@ static void scrub_verify_one_sector(struct scrub_stripe *stripe, int sector_nr)
2327
2331
}
2328
2332
2329
2333
/* Verify specified sectors of a stripe. */
2330
- void scrub_verify_one_stripe (struct scrub_stripe * stripe , unsigned long bitmap )
2334
+ static void scrub_verify_one_stripe (struct scrub_stripe * stripe , unsigned long bitmap )
2331
2335
{
2332
2336
struct btrfs_fs_info * fs_info = stripe -> bg -> fs_info ;
2333
2337
const u32 sectors_per_tree = fs_info -> nodesize >> fs_info -> sectorsize_bits ;
@@ -2340,6 +2344,203 @@ void scrub_verify_one_stripe(struct scrub_stripe *stripe, unsigned long bitmap)
2340
2344
}
2341
2345
}
2342
2346
2347
+ static int calc_sector_number (struct scrub_stripe * stripe , struct bio_vec * first_bvec )
2348
+ {
2349
+ int i ;
2350
+
2351
+ for (i = 0 ; i < stripe -> nr_sectors ; i ++ ) {
2352
+ if (scrub_stripe_get_page (stripe , i ) == first_bvec -> bv_page &&
2353
+ scrub_stripe_get_page_offset (stripe , i ) == first_bvec -> bv_offset )
2354
+ break ;
2355
+ }
2356
+ ASSERT (i < stripe -> nr_sectors );
2357
+ return i ;
2358
+ }
2359
+
2360
+ /*
2361
+ * Repair read is different to the regular read:
2362
+ *
2363
+ * - Only reads the failed sectors
2364
+ * - May have extra blocksize limits
2365
+ */
2366
+ static void scrub_repair_read_endio (struct btrfs_bio * bbio )
2367
+ {
2368
+ struct scrub_stripe * stripe = bbio -> private ;
2369
+ struct btrfs_fs_info * fs_info = stripe -> bg -> fs_info ;
2370
+ struct bio_vec * bvec ;
2371
+ int sector_nr = calc_sector_number (stripe , bio_first_bvec_all (& bbio -> bio ));
2372
+ u32 bio_size = 0 ;
2373
+ int i ;
2374
+
2375
+ ASSERT (sector_nr < stripe -> nr_sectors );
2376
+
2377
+ bio_for_each_bvec_all (bvec , & bbio -> bio , i )
2378
+ bio_size += bvec -> bv_len ;
2379
+
2380
+ if (bbio -> bio .bi_status ) {
2381
+ bitmap_set (& stripe -> io_error_bitmap , sector_nr ,
2382
+ bio_size >> fs_info -> sectorsize_bits );
2383
+ bitmap_set (& stripe -> error_bitmap , sector_nr ,
2384
+ bio_size >> fs_info -> sectorsize_bits );
2385
+ } else {
2386
+ bitmap_clear (& stripe -> io_error_bitmap , sector_nr ,
2387
+ bio_size >> fs_info -> sectorsize_bits );
2388
+ }
2389
+ bio_put (& bbio -> bio );
2390
+ if (atomic_dec_and_test (& stripe -> pending_io ))
2391
+ wake_up (& stripe -> io_wait );
2392
+ }
2393
+
2394
+ static int calc_next_mirror (int mirror , int num_copies )
2395
+ {
2396
+ ASSERT (mirror <= num_copies );
2397
+ return (mirror + 1 > num_copies ) ? 1 : mirror + 1 ;
2398
+ }
2399
+
2400
+ static void scrub_stripe_submit_repair_read (struct scrub_stripe * stripe ,
2401
+ int mirror , int blocksize , bool wait )
2402
+ {
2403
+ struct btrfs_fs_info * fs_info = stripe -> bg -> fs_info ;
2404
+ struct btrfs_bio * bbio = NULL ;
2405
+ const unsigned long old_error_bitmap = stripe -> error_bitmap ;
2406
+ int i ;
2407
+
2408
+ ASSERT (stripe -> mirror_num >= 1 );
2409
+ ASSERT (atomic_read (& stripe -> pending_io ) == 0 );
2410
+
2411
+ for_each_set_bit (i , & old_error_bitmap , stripe -> nr_sectors ) {
2412
+ struct page * page ;
2413
+ int pgoff ;
2414
+ int ret ;
2415
+
2416
+ page = scrub_stripe_get_page (stripe , i );
2417
+ pgoff = scrub_stripe_get_page_offset (stripe , i );
2418
+
2419
+ /* The current sector cannot be merged, submit the bio. */
2420
+ if (bbio && ((i > 0 && !test_bit (i - 1 , & stripe -> error_bitmap )) ||
2421
+ bbio -> bio .bi_iter .bi_size >= blocksize )) {
2422
+ ASSERT (bbio -> bio .bi_iter .bi_size );
2423
+ atomic_inc (& stripe -> pending_io );
2424
+ btrfs_submit_bio (bbio , mirror );
2425
+ if (wait )
2426
+ wait_scrub_stripe_io (stripe );
2427
+ bbio = NULL ;
2428
+ }
2429
+
2430
+ if (!bbio ) {
2431
+ bbio = btrfs_bio_alloc (stripe -> nr_sectors , REQ_OP_READ ,
2432
+ fs_info , scrub_repair_read_endio , stripe );
2433
+ bbio -> bio .bi_iter .bi_sector = (stripe -> logical +
2434
+ (i << fs_info -> sectorsize_bits )) >> SECTOR_SHIFT ;
2435
+ }
2436
+
2437
+ ret = bio_add_page (& bbio -> bio , page , fs_info -> sectorsize , pgoff );
2438
+ ASSERT (ret == fs_info -> sectorsize );
2439
+ }
2440
+ if (bbio ) {
2441
+ ASSERT (bbio -> bio .bi_iter .bi_size );
2442
+ atomic_inc (& stripe -> pending_io );
2443
+ btrfs_submit_bio (bbio , mirror );
2444
+ if (wait )
2445
+ wait_scrub_stripe_io (stripe );
2446
+ }
2447
+ }
2448
+
2449
+ /*
2450
+ * The main entrance for all read related scrub work, including:
2451
+ *
2452
+ * - Wait for the initial read to finish
2453
+ * - Verify and locate any bad sectors
2454
+ * - Go through the remaining mirrors and try to read as large blocksize as
2455
+ * possible
2456
+ * - Go through all mirrors (including the failed mirror) sector-by-sector
2457
+ *
2458
+ * Writeback does not happen here, it needs extra synchronization.
2459
+ */
2460
+ static void scrub_stripe_read_repair_worker (struct work_struct * work )
2461
+ {
2462
+ struct scrub_stripe * stripe = container_of (work , struct scrub_stripe , work );
2463
+ struct btrfs_fs_info * fs_info = stripe -> bg -> fs_info ;
2464
+ int num_copies = btrfs_num_copies (fs_info , stripe -> bg -> start ,
2465
+ stripe -> bg -> length );
2466
+ int mirror ;
2467
+ int i ;
2468
+
2469
+ ASSERT (stripe -> mirror_num > 0 );
2470
+
2471
+ wait_scrub_stripe_io (stripe );
2472
+ scrub_verify_one_stripe (stripe , stripe -> extent_sector_bitmap );
2473
+ /* Save the initial failed bitmap for later repair and report usage. */
2474
+ stripe -> init_error_bitmap = stripe -> error_bitmap ;
2475
+
2476
+ if (bitmap_empty (& stripe -> init_error_bitmap , stripe -> nr_sectors ))
2477
+ goto out ;
2478
+
2479
+ /*
2480
+ * Try all remaining mirrors.
2481
+ *
2482
+ * Here we still try to read as large block as possible, as this is
2483
+ * faster and we have extra safety nets to rely on.
2484
+ */
2485
+ for (mirror = calc_next_mirror (stripe -> mirror_num , num_copies );
2486
+ mirror != stripe -> mirror_num ;
2487
+ mirror = calc_next_mirror (mirror , num_copies )) {
2488
+ const unsigned long old_error_bitmap = stripe -> error_bitmap ;
2489
+
2490
+ scrub_stripe_submit_repair_read (stripe , mirror ,
2491
+ BTRFS_STRIPE_LEN , false);
2492
+ wait_scrub_stripe_io (stripe );
2493
+ scrub_verify_one_stripe (stripe , old_error_bitmap );
2494
+ if (bitmap_empty (& stripe -> error_bitmap , stripe -> nr_sectors ))
2495
+ goto out ;
2496
+ }
2497
+
2498
+ /*
2499
+ * Last safety net, try re-checking all mirrors, including the failed
2500
+ * one, sector-by-sector.
2501
+ *
2502
+ * As if one sector failed the drive's internal csum, the whole read
2503
+ * containing the offending sector would be marked as error.
2504
+ * Thus here we do sector-by-sector read.
2505
+ *
2506
+ * This can be slow, thus we only try it as the last resort.
2507
+ */
2508
+
2509
+ for (i = 0 , mirror = stripe -> mirror_num ;
2510
+ i < num_copies ;
2511
+ i ++ , mirror = calc_next_mirror (mirror , num_copies )) {
2512
+ const unsigned long old_error_bitmap = stripe -> error_bitmap ;
2513
+
2514
+ scrub_stripe_submit_repair_read (stripe , mirror ,
2515
+ fs_info -> sectorsize , true);
2516
+ wait_scrub_stripe_io (stripe );
2517
+ scrub_verify_one_stripe (stripe , old_error_bitmap );
2518
+ if (bitmap_empty (& stripe -> error_bitmap , stripe -> nr_sectors ))
2519
+ goto out ;
2520
+ }
2521
+ out :
2522
+ set_bit (SCRUB_STRIPE_FLAG_REPAIR_DONE , & stripe -> state );
2523
+ wake_up (& stripe -> repair_wait );
2524
+ }
2525
+
2526
+ void scrub_read_endio (struct btrfs_bio * bbio )
2527
+ {
2528
+ struct scrub_stripe * stripe = bbio -> private ;
2529
+
2530
+ if (bbio -> bio .bi_status ) {
2531
+ bitmap_set (& stripe -> io_error_bitmap , 0 , stripe -> nr_sectors );
2532
+ bitmap_set (& stripe -> error_bitmap , 0 , stripe -> nr_sectors );
2533
+ } else {
2534
+ bitmap_clear (& stripe -> io_error_bitmap , 0 , stripe -> nr_sectors );
2535
+ }
2536
+ bio_put (& bbio -> bio );
2537
+ if (atomic_dec_and_test (& stripe -> pending_io )) {
2538
+ wake_up (& stripe -> io_wait );
2539
+ INIT_WORK (& stripe -> work , scrub_stripe_read_repair_worker );
2540
+ queue_work (stripe -> bg -> fs_info -> scrub_workers , & stripe -> work );
2541
+ }
2542
+ }
2543
+
2343
2544
static int scrub_checksum_tree_block (struct scrub_block * sblock )
2344
2545
{
2345
2546
struct scrub_ctx * sctx = sblock -> sctx ;
0 commit comments