Skip to content

Commit 2942a50

Browse files
adam900710kdave
authored andcommitted
btrfs: raid56: introduce btrfs_raid_bio::error_bitmap
Currently btrfs raid56 uses btrfs_raid_bio::faila and failb to indicate which stripe(s) had IO errors. But that has some problems: - If one sector failed csum check, the whole stripe where the corruption is will be marked error. This can reduce the chance we do recover, like this: 0 4K 8K Data 1 |XX| | Data 2 | |XX| Parity | | | In above case, 0~4K in data 1 should be recovered using data 2 and parity, while 4K~8K in data 2 should be recovered using data 1 and parity. Currently if we trigger read on 0~4K of data 1, we will also recover 4K~8K of data 1 using corrupted data 2 and parity, causing wrong result in rbio cache. - Harder to expand for future M-N scheme As we're limited to just faila/b, two corruptions. - Harder to expand to handle extra csum errors This can be problematic if we start to do csum verification. This patch will introduce an extra @error_bitmap, where one bit represents error that happened for that sector. The choice to introduce a new error bitmap other than reusing sector_ptr, is to avoid extra search between rbio::stripe_sectors[] and rbio::bio_sectors[]. Since we can submit bio using sectors from both sectors, doing proper search on both array will more complex. Although the new bitmap will take extra memory, later we can remove things like @error and faila/b to save some memory. Currently the new error bitmap and failab mechanism coexists, the error bitmap is only updated at endio time and recover entrance. Signed-off-by: Qu Wenruo <[email protected]> Signed-off-by: David Sterba <[email protected]>
1 parent e55cf7c commit 2942a50

File tree

2 files changed

+103
-7
lines changed

2 files changed

+103
-7
lines changed

fs/btrfs/raid56.c

Lines changed: 92 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ static void scrub_rbio_work_locked(struct work_struct *work);
7676

7777
static void free_raid_bio_pointers(struct btrfs_raid_bio *rbio)
7878
{
79+
bitmap_free(rbio->error_bitmap);
7980
kfree(rbio->stripe_pages);
8081
kfree(rbio->bio_sectors);
8182
kfree(rbio->stripe_sectors);
@@ -950,9 +951,10 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
950951
rbio->stripe_sectors = kcalloc(num_sectors, sizeof(struct sector_ptr),
951952
GFP_NOFS);
952953
rbio->finish_pointers = kcalloc(real_stripes, sizeof(void *), GFP_NOFS);
954+
rbio->error_bitmap = bitmap_zalloc(num_sectors, GFP_NOFS);
953955

954956
if (!rbio->stripe_pages || !rbio->bio_sectors || !rbio->stripe_sectors ||
955-
!rbio->finish_pointers) {
957+
!rbio->finish_pointers || !rbio->error_bitmap) {
956958
free_raid_bio_pointers(rbio);
957959
kfree(rbio);
958960
return ERR_PTR(-ENOMEM);
@@ -1044,8 +1046,11 @@ static int rbio_add_io_sector(struct btrfs_raid_bio *rbio,
10441046
disk_start = stripe->physical + sector_nr * sectorsize;
10451047

10461048
/* if the device is missing, just fail this stripe */
1047-
if (!stripe->dev->bdev)
1049+
if (!stripe->dev->bdev) {
1050+
set_bit(stripe_nr * rbio->stripe_nsectors + sector_nr,
1051+
rbio->error_bitmap);
10481052
return fail_rbio_index(rbio, stripe_nr);
1053+
}
10491054

10501055
/* see if we can add this page onto our existing bio */
10511056
if (last) {
@@ -1209,6 +1214,7 @@ static int rmw_assemble_write_bios(struct btrfs_raid_bio *rbio,
12091214
* write.
12101215
*/
12111216
atomic_set(&rbio->error, 0);
1217+
bitmap_clear(rbio->error_bitmap, 0, rbio->nr_sectors);
12121218
rbio->faila = -1;
12131219
rbio->failb = -1;
12141220

@@ -1332,6 +1338,40 @@ static int find_logical_bio_stripe(struct btrfs_raid_bio *rbio,
13321338
return -1;
13331339
}
13341340

1341+
static void set_rbio_range_error(struct btrfs_raid_bio *rbio, struct bio *bio)
1342+
{
1343+
struct btrfs_fs_info *fs_info = rbio->bioc->fs_info;
1344+
u32 offset = (bio->bi_iter.bi_sector << SECTOR_SHIFT) -
1345+
rbio->bioc->raid_map[0];
1346+
int total_nr_sector = offset >> fs_info->sectorsize_bits;
1347+
1348+
ASSERT(total_nr_sector < rbio->nr_data * rbio->stripe_nsectors);
1349+
1350+
bitmap_set(rbio->error_bitmap, total_nr_sector,
1351+
bio->bi_iter.bi_size >> fs_info->sectorsize_bits);
1352+
1353+
/*
1354+
* Special handling for raid56_alloc_missing_rbio() used by
1355+
* scrub/replace. Unlike call path in raid56_parity_recover(), they
1356+
* pass an empty bio here. Thus we have to find out the missing device
1357+
* and mark the stripe error instead.
1358+
*/
1359+
if (bio->bi_iter.bi_size == 0) {
1360+
bool found_missing = false;
1361+
int stripe_nr;
1362+
1363+
for (stripe_nr = 0; stripe_nr < rbio->real_stripes; stripe_nr++) {
1364+
if (!rbio->bioc->stripes[stripe_nr].dev->bdev) {
1365+
found_missing = true;
1366+
bitmap_set(rbio->error_bitmap,
1367+
stripe_nr * rbio->stripe_nsectors,
1368+
rbio->stripe_nsectors);
1369+
}
1370+
}
1371+
ASSERT(found_missing);
1372+
}
1373+
}
1374+
13351375
/*
13361376
* returns -EIO if we had too many failures
13371377
*/
@@ -1423,14 +1463,49 @@ static void set_bio_pages_uptodate(struct btrfs_raid_bio *rbio, struct bio *bio)
14231463
}
14241464
}
14251465

1466+
static int get_bio_sector_nr(struct btrfs_raid_bio *rbio, struct bio *bio)
1467+
{
1468+
struct bio_vec *bv = bio_first_bvec_all(bio);
1469+
int i;
1470+
1471+
for (i = 0; i < rbio->nr_sectors; i++) {
1472+
struct sector_ptr *sector;
1473+
1474+
sector = &rbio->stripe_sectors[i];
1475+
if (sector->page == bv->bv_page && sector->pgoff == bv->bv_offset)
1476+
break;
1477+
sector = &rbio->bio_sectors[i];
1478+
if (sector->page == bv->bv_page && sector->pgoff == bv->bv_offset)
1479+
break;
1480+
}
1481+
ASSERT(i < rbio->nr_sectors);
1482+
return i;
1483+
}
1484+
1485+
static void rbio_update_error_bitmap(struct btrfs_raid_bio *rbio, struct bio *bio)
1486+
{
1487+
int total_sector_nr = get_bio_sector_nr(rbio, bio);
1488+
u32 bio_size = 0;
1489+
struct bio_vec *bvec;
1490+
struct bvec_iter_all iter_all;
1491+
1492+
bio_for_each_segment_all(bvec, bio, iter_all)
1493+
bio_size += bvec->bv_len;
1494+
1495+
bitmap_set(rbio->error_bitmap, total_sector_nr,
1496+
bio_size >> rbio->bioc->fs_info->sectorsize_bits);
1497+
}
1498+
14261499
static void raid_wait_read_end_io(struct bio *bio)
14271500
{
14281501
struct btrfs_raid_bio *rbio = bio->bi_private;
14291502

1430-
if (bio->bi_status)
1503+
if (bio->bi_status) {
14311504
fail_bio_stripe(rbio, bio);
1432-
else
1505+
rbio_update_error_bitmap(rbio, bio);
1506+
} else {
14331507
set_bio_pages_uptodate(rbio, bio);
1508+
}
14341509

14351510
bio_put(bio);
14361511
if (atomic_dec_and_test(&rbio->stripes_pending))
@@ -1863,10 +1938,10 @@ static int recover_assemble_read_bios(struct btrfs_raid_bio *rbio,
18631938
struct sector_ptr *sector;
18641939

18651940
if (rbio->faila == stripe || rbio->failb == stripe) {
1866-
atomic_inc(&rbio->error);
18671941
/* Skip the current stripe. */
18681942
ASSERT(sectornr == 0);
18691943
total_sector_nr += rbio->stripe_nsectors - 1;
1944+
atomic_inc(&rbio->error);
18701945
continue;
18711946
}
18721947
sector = rbio_stripe_sector(rbio, stripe, sectornr);
@@ -1891,9 +1966,10 @@ static int recover_rbio(struct btrfs_raid_bio *rbio)
18911966

18921967
/*
18931968
* Either we're doing recover for a read failure or degraded write,
1894-
* caller should have set faila/b correctly.
1969+
* caller should have set faila/b and error bitmap correctly.
18951970
*/
18961971
ASSERT(rbio->faila >= 0 || rbio->failb >= 0);
1972+
ASSERT(bitmap_weight(rbio->error_bitmap, rbio->nr_sectors));
18971973
bio_list_init(&bio_list);
18981974

18991975
/*
@@ -1978,6 +2054,8 @@ void raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
19782054
rbio->operation = BTRFS_RBIO_READ_REBUILD;
19792055
rbio_add_bio(rbio, bio);
19802056

2057+
set_rbio_range_error(rbio, bio);
2058+
19812059
rbio->faila = find_logical_bio_stripe(rbio, bio);
19822060
if (rbio->faila == -1) {
19832061
btrfs_warn(fs_info,
@@ -2038,8 +2116,10 @@ static void raid_wait_write_end_io(struct bio *bio)
20382116
struct btrfs_raid_bio *rbio = bio->bi_private;
20392117
blk_status_t err = bio->bi_status;
20402118

2041-
if (err)
2119+
if (err) {
20422120
fail_bio_stripe(rbio, bio);
2121+
rbio_update_error_bitmap(rbio, bio);
2122+
}
20432123
bio_put(bio);
20442124
if (atomic_dec_and_test(&rbio->stripes_pending))
20452125
wake_up(&rbio->io_wait);
@@ -2117,6 +2197,7 @@ static int rmw_rbio(struct btrfs_raid_bio *rbio)
21172197
spin_unlock_irq(&rbio->bio_list_lock);
21182198

21192199
atomic_set(&rbio->error, 0);
2200+
bitmap_clear(rbio->error_bitmap, 0, rbio->nr_sectors);
21202201

21212202
index_rbio_pages(rbio);
21222203

@@ -2328,6 +2409,7 @@ static int finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check)
23282409
}
23292410

23302411
atomic_set(&rbio->error, 0);
2412+
bitmap_clear(rbio->error_bitmap, 0, rbio->nr_sectors);
23312413

23322414
/* Map the parity stripe just once */
23332415
pointers[nr_data] = kmap_local_page(p_sector.page);
@@ -2533,6 +2615,8 @@ static int scrub_rbio(struct btrfs_raid_bio *rbio)
25332615
goto cleanup;
25342616

25352617
atomic_set(&rbio->error, 0);
2618+
bitmap_clear(rbio->error_bitmap, 0, rbio->nr_sectors);
2619+
25362620
ret = scrub_assemble_read_bios(rbio, &bio_list);
25372621
if (ret < 0)
25382622
goto cleanup;
@@ -2612,6 +2696,7 @@ raid56_alloc_missing_rbio(struct bio *bio, struct btrfs_io_context *bioc)
26122696
*/
26132697
ASSERT(!bio->bi_iter.bi_size);
26142698

2699+
set_rbio_range_error(rbio, bio);
26152700
rbio->faila = find_logical_bio_stripe(rbio, bio);
26162701
if (rbio->faila == -1) {
26172702
btrfs_warn_rl(fs_info,

fs/btrfs/raid56.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,17 @@ struct btrfs_raid_bio {
126126

127127
/* Allocated with real_stripes-many pointers for finish_*() calls */
128128
void **finish_pointers;
129+
130+
/*
131+
* The bitmap recording where IO errors happened.
132+
* Each bit is corresponding to one sector in either bio_sectors[] or
133+
* stripe_sectors[] array.
134+
*
135+
* The reason we don't use another bit in sector_ptr is, we have two
136+
* arrays of sectors, and a lot of IO can use sectors in both arrays.
137+
* Thus making it much harder to iterate.
138+
*/
139+
unsigned long *error_bitmap;
129140
};
130141

131142
/*

0 commit comments

Comments
 (0)