@@ -50,6 +50,7 @@ struct scrub_ctx;
50
50
*/
51
51
#define SCRUB_SECTORS_PER_BIO 32 /* 128KiB per bio for 4KiB pages */
52
52
#define SCRUB_BIOS_PER_SCTX 64 /* 8MiB per device in flight for 4KiB pages */
53
+ #define SCRUB_STRIPES_PER_SCTX 8 /* That would be 8 64K stripe per-device. */
53
54
54
55
/*
55
56
* The following value times PAGE_SIZE needs to be large enough to match the
@@ -277,9 +278,11 @@ struct scrub_parity {
277
278
278
279
struct scrub_ctx {
279
280
struct scrub_bio * bios [SCRUB_BIOS_PER_SCTX ];
281
+ struct scrub_stripe stripes [SCRUB_STRIPES_PER_SCTX ];
280
282
struct btrfs_fs_info * fs_info ;
281
283
int first_free ;
282
284
int curr ;
285
+ int cur_stripe ;
283
286
atomic_t bios_in_flight ;
284
287
atomic_t workers_pending ;
285
288
spinlock_t list_lock ;
@@ -389,7 +392,8 @@ static void release_scrub_stripe(struct scrub_stripe *stripe)
389
392
stripe -> state = 0 ;
390
393
}
391
394
392
- int init_scrub_stripe (struct btrfs_fs_info * fs_info , struct scrub_stripe * stripe )
395
+ static int init_scrub_stripe (struct btrfs_fs_info * fs_info ,
396
+ struct scrub_stripe * stripe )
393
397
{
394
398
int ret ;
395
399
@@ -895,6 +899,9 @@ static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
895
899
kfree (sbio );
896
900
}
897
901
902
+ for (i = 0 ; i < SCRUB_STRIPES_PER_SCTX ; i ++ )
903
+ release_scrub_stripe (& sctx -> stripes [i ]);
904
+
898
905
kfree (sctx -> wr_curr_bio );
899
906
scrub_free_csums (sctx );
900
907
kfree (sctx );
@@ -939,6 +946,14 @@ static noinline_for_stack struct scrub_ctx *scrub_setup_ctx(
939
946
else
940
947
sctx -> bios [i ]-> next_free = -1 ;
941
948
}
949
+ for (i = 0 ; i < SCRUB_STRIPES_PER_SCTX ; i ++ ) {
950
+ int ret ;
951
+
952
+ ret = init_scrub_stripe (fs_info , & sctx -> stripes [i ]);
953
+ if (ret < 0 )
954
+ goto nomem ;
955
+ sctx -> stripes [i ].sctx = sctx ;
956
+ }
942
957
sctx -> first_free = 0 ;
943
958
atomic_set (& sctx -> bios_in_flight , 0 );
944
959
atomic_set (& sctx -> workers_pending , 0 );
@@ -2668,7 +2683,7 @@ static void scrub_stripe_read_repair_worker(struct work_struct *work)
2668
2683
wake_up (& stripe -> repair_wait );
2669
2684
}
2670
2685
2671
- void scrub_read_endio (struct btrfs_bio * bbio )
2686
+ static void scrub_read_endio (struct btrfs_bio * bbio )
2672
2687
{
2673
2688
struct scrub_stripe * stripe = bbio -> private ;
2674
2689
@@ -2725,8 +2740,8 @@ static void scrub_write_endio(struct btrfs_bio *bbio)
2725
2740
*
2726
2741
* - Handle dev-replace and read-repair writeback differently
2727
2742
*/
2728
- void scrub_write_sectors (struct scrub_ctx * sctx , struct scrub_stripe * stripe ,
2729
- unsigned long write_bitmap , bool dev_replace )
2743
+ static void scrub_write_sectors (struct scrub_ctx * sctx , struct scrub_stripe * stripe ,
2744
+ unsigned long write_bitmap , bool dev_replace )
2730
2745
{
2731
2746
struct btrfs_fs_info * fs_info = stripe -> bg -> fs_info ;
2732
2747
struct btrfs_bio * bbio = NULL ;
@@ -4294,10 +4309,11 @@ static void scrub_stripe_reset_bitmaps(struct scrub_stripe *stripe)
4294
4309
* Return >0 if there is no such stripe in the specified range.
4295
4310
* Return <0 for error.
4296
4311
*/
4297
- int scrub_find_fill_first_stripe (struct btrfs_block_group * bg ,
4298
- struct btrfs_device * dev , u64 physical ,
4299
- int mirror_num , u64 logical_start ,
4300
- u32 logical_len , struct scrub_stripe * stripe )
4312
+ static int scrub_find_fill_first_stripe (struct btrfs_block_group * bg ,
4313
+ struct btrfs_device * dev , u64 physical ,
4314
+ int mirror_num , u64 logical_start ,
4315
+ u32 logical_len ,
4316
+ struct scrub_stripe * stripe )
4301
4317
{
4302
4318
struct btrfs_fs_info * fs_info = bg -> fs_info ;
4303
4319
struct btrfs_root * extent_root = btrfs_extent_root (fs_info , bg -> start );
@@ -4406,6 +4422,159 @@ int scrub_find_fill_first_stripe(struct btrfs_block_group *bg,
4406
4422
return ret ;
4407
4423
}
4408
4424
4425
+ static void scrub_reset_stripe (struct scrub_stripe * stripe )
4426
+ {
4427
+ scrub_stripe_reset_bitmaps (stripe );
4428
+
4429
+ stripe -> nr_meta_extents = 0 ;
4430
+ stripe -> nr_data_extents = 0 ;
4431
+ stripe -> state = 0 ;
4432
+
4433
+ for (int i = 0 ; i < stripe -> nr_sectors ; i ++ ) {
4434
+ stripe -> sectors [i ].is_metadata = false;
4435
+ stripe -> sectors [i ].csum = NULL ;
4436
+ stripe -> sectors [i ].generation = 0 ;
4437
+ }
4438
+ }
4439
+
4440
+ static void scrub_submit_initial_read (struct scrub_ctx * sctx ,
4441
+ struct scrub_stripe * stripe )
4442
+ {
4443
+ struct btrfs_fs_info * fs_info = sctx -> fs_info ;
4444
+ struct btrfs_bio * bbio ;
4445
+ int mirror = stripe -> mirror_num ;
4446
+
4447
+ ASSERT (stripe -> bg );
4448
+ ASSERT (stripe -> mirror_num > 0 );
4449
+ ASSERT (test_bit (SCRUB_STRIPE_FLAG_INITIALIZED , & stripe -> state ));
4450
+
4451
+ bbio = btrfs_bio_alloc (SCRUB_STRIPE_PAGES , REQ_OP_READ , fs_info ,
4452
+ scrub_read_endio , stripe );
4453
+
4454
+ /* Read the whole stripe. */
4455
+ bbio -> bio .bi_iter .bi_sector = stripe -> logical >> SECTOR_SHIFT ;
4456
+ for (int i = 0 ; i < BTRFS_STRIPE_LEN >> PAGE_SHIFT ; i ++ ) {
4457
+ int ret ;
4458
+
4459
+ ret = bio_add_page (& bbio -> bio , stripe -> pages [i ], PAGE_SIZE , 0 );
4460
+ /* We should have allocated enough bio vectors. */
4461
+ ASSERT (ret == PAGE_SIZE );
4462
+ }
4463
+ atomic_inc (& stripe -> pending_io );
4464
+
4465
+ /*
4466
+ * For dev-replace, either user asks to avoid the source dev, or
4467
+ * the device is missing, we try the next mirror instead.
4468
+ */
4469
+ if (sctx -> is_dev_replace &&
4470
+ (fs_info -> dev_replace .cont_reading_from_srcdev_mode ==
4471
+ BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_AVOID ||
4472
+ !stripe -> dev -> bdev )) {
4473
+ int num_copies = btrfs_num_copies (fs_info , stripe -> bg -> start ,
4474
+ stripe -> bg -> length );
4475
+
4476
+ mirror = calc_next_mirror (mirror , num_copies );
4477
+ }
4478
+ btrfs_submit_bio (bbio , mirror );
4479
+ }
4480
+
4481
+ static void flush_scrub_stripes (struct scrub_ctx * sctx )
4482
+ {
4483
+ struct btrfs_fs_info * fs_info = sctx -> fs_info ;
4484
+ struct scrub_stripe * stripe ;
4485
+ const int nr_stripes = sctx -> cur_stripe ;
4486
+
4487
+ if (!nr_stripes )
4488
+ return ;
4489
+
4490
+ ASSERT (test_bit (SCRUB_STRIPE_FLAG_INITIALIZED , & sctx -> stripes [0 ].state ));
4491
+ for (int i = 0 ; i < nr_stripes ; i ++ ) {
4492
+ stripe = & sctx -> stripes [i ];
4493
+ scrub_submit_initial_read (sctx , stripe );
4494
+ }
4495
+
4496
+ for (int i = 0 ; i < nr_stripes ; i ++ ) {
4497
+ stripe = & sctx -> stripes [i ];
4498
+
4499
+ wait_event (stripe -> repair_wait ,
4500
+ test_bit (SCRUB_STRIPE_FLAG_REPAIR_DONE , & stripe -> state ));
4501
+ }
4502
+
4503
+ /*
4504
+ * Submit the repaired sectors. For zoned case, we cannot do repair
4505
+ * in-place, but queue the bg to be relocated.
4506
+ */
4507
+ if (btrfs_is_zoned (fs_info )) {
4508
+ for (int i = 0 ; i < nr_stripes ; i ++ ) {
4509
+ stripe = & sctx -> stripes [i ];
4510
+
4511
+ if (!bitmap_empty (& stripe -> error_bitmap , stripe -> nr_sectors )) {
4512
+ btrfs_repair_one_zone (fs_info ,
4513
+ sctx -> stripes [0 ].bg -> start );
4514
+ break ;
4515
+ }
4516
+ }
4517
+ } else {
4518
+ for (int i = 0 ; i < nr_stripes ; i ++ ) {
4519
+ unsigned long repaired ;
4520
+
4521
+ stripe = & sctx -> stripes [i ];
4522
+
4523
+ bitmap_andnot (& repaired , & stripe -> init_error_bitmap ,
4524
+ & stripe -> error_bitmap , stripe -> nr_sectors );
4525
+ scrub_write_sectors (sctx , stripe , repaired , false);
4526
+ }
4527
+ }
4528
+
4529
+ /* Submit for dev-replace. */
4530
+ if (sctx -> is_dev_replace ) {
4531
+ for (int i = 0 ; i < nr_stripes ; i ++ ) {
4532
+ unsigned long good ;
4533
+
4534
+ stripe = & sctx -> stripes [i ];
4535
+
4536
+ ASSERT (stripe -> dev == fs_info -> dev_replace .srcdev );
4537
+
4538
+ bitmap_andnot (& good , & stripe -> extent_sector_bitmap ,
4539
+ & stripe -> error_bitmap , stripe -> nr_sectors );
4540
+ scrub_write_sectors (sctx , stripe , good , true);
4541
+ }
4542
+ }
4543
+
4544
+ /* Wait for the above writebacks to finish. */
4545
+ for (int i = 0 ; i < nr_stripes ; i ++ ) {
4546
+ stripe = & sctx -> stripes [i ];
4547
+
4548
+ wait_scrub_stripe_io (stripe );
4549
+ scrub_reset_stripe (stripe );
4550
+ }
4551
+ sctx -> cur_stripe = 0 ;
4552
+ }
4553
+
4554
+ int queue_scrub_stripe (struct scrub_ctx * sctx , struct btrfs_block_group * bg ,
4555
+ struct btrfs_device * dev , int mirror_num ,
4556
+ u64 logical , u32 length , u64 physical )
4557
+ {
4558
+ struct scrub_stripe * stripe ;
4559
+ int ret ;
4560
+
4561
+ /* No available slot, submit all stripes and wait for them. */
4562
+ if (sctx -> cur_stripe >= SCRUB_STRIPES_PER_SCTX )
4563
+ flush_scrub_stripes (sctx );
4564
+
4565
+ stripe = & sctx -> stripes [sctx -> cur_stripe ];
4566
+
4567
+ /* We can queue one stripe using the remaining slot. */
4568
+ scrub_reset_stripe (stripe );
4569
+ ret = scrub_find_fill_first_stripe (bg , dev , physical , mirror_num ,
4570
+ logical , length , stripe );
4571
+ /* Either >0 as no more extents or <0 for error. */
4572
+ if (ret )
4573
+ return ret ;
4574
+ sctx -> cur_stripe ++ ;
4575
+ return 0 ;
4576
+ }
4577
+
4409
4578
/*
4410
4579
* Scrub one range which can only has simple mirror based profile.
4411
4580
* (Including all range in SINGLE/DUP/RAID1/RAID1C*, and each stripe in
0 commit comments