Skip to content

Commit 163a203

Browse files
djwongtytso
authored andcommitted
ext4: mark block group as corrupt on block bitmap error
When we notice a block-bitmap corruption (because of device failure or something else), we should mark this group as corrupt and prevent further block allocations/deallocations from it. Currently, we end up generating one error message for every block in the bitmap. This potentially could make the system unstable as noticed in some bugs. With this patch, the error will be printed only the first time and mark the entire block group as corrupted. This prevents future access allocations/deallocations from it. Also tested by corrupting the block bitmap and forcefully introducing the mb_free_blocks error: (1) create a largefile (2Gb) $ dd if=/dev/zero of=largefile oflag=direct bs=10485760 count=200 (2) umount filesystem. use dumpe2fs to see which block-bitmaps are in use by largefile and note their block numbers (3) use dd to zero-out the used block bitmaps $ dd if=/dev/zero of=/dev/hdc4 bs=4096 seek=14 count=8 oflag=direct (4) mount the FS and delete the largefile. (5) recreate the largefile. verify that the new largefile does not get any blocks from the groups marked as bad. Without the patch, we will see mb_free_blocks error for each bit in each zero'ed out bitmap at (4). With the patch, we only see the error once per blockgroup: [ 309.706803] EXT4-fs error (device sdb4): ext4_mb_generate_buddy:735: group 15: 32768 clusters in bitmap, 0 in gd. blk grp corrupted. [ 309.720824] EXT4-fs error (device sdb4): ext4_mb_generate_buddy:735: group 14: 32768 clusters in bitmap, 0 in gd. blk grp corrupted. [ 309.732858] EXT4-fs error (device sdb4) in ext4_free_blocks:4802: IO failure [ 309.748321] EXT4-fs error (device sdb4): ext4_mb_generate_buddy:735: group 13: 32768 clusters in bitmap, 0 in gd. blk grp corrupted. [ 309.760331] EXT4-fs error (device sdb4) in ext4_free_blocks:4802: IO failure [ 309.769695] EXT4-fs error (device sdb4): ext4_mb_generate_buddy:735: group 12: 32768 clusters in bitmap, 0 in gd. blk grp corrupted. [ 309.781721] EXT4-fs error (device sdb4) in ext4_free_blocks:4802: IO failure [ 309.798166] EXT4-fs error (device sdb4): ext4_mb_generate_buddy:735: group 11: 32768 clusters in bitmap, 0 in gd. blk grp corrupted. [ 309.810184] EXT4-fs error (device sdb4) in ext4_free_blocks:4802: IO failure [ 309.819532] EXT4-fs error (device sdb4): ext4_mb_generate_buddy:735: group 10: 32768 clusters in bitmap, 0 in gd. blk grp corrupted. Google-Bug-Id: 7258357 [[email protected]] Further modifications (by Darrick) to make more obvious that this corruption bit applies to blocks only. Set the corruption flag if the block group bitmap verification fails. Original-author: Aditya Kali <[email protected]> Signed-off-by: Darrick J. Wong <[email protected]> Signed-off-by: "Theodore Ts'o" <[email protected]>
1 parent dbde0ab commit 163a203

File tree

3 files changed

+31
-3
lines changed

3 files changed

+31
-3
lines changed

fs/ext4/balloc.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -356,6 +356,7 @@ void ext4_validate_block_bitmap(struct super_block *sb,
356356
struct buffer_head *bh)
357357
{
358358
ext4_fsblk_t blk;
359+
struct ext4_group_info *grp = ext4_get_group_info(sb, block_group);
359360

360361
if (buffer_verified(bh))
361362
return;
@@ -366,12 +367,14 @@ void ext4_validate_block_bitmap(struct super_block *sb,
366367
ext4_unlock_group(sb, block_group);
367368
ext4_error(sb, "bg %u: block %llu: invalid block bitmap",
368369
block_group, blk);
370+
set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
369371
return;
370372
}
371373
if (unlikely(!ext4_block_bitmap_csum_verify(sb, block_group,
372374
desc, bh))) {
373375
ext4_unlock_group(sb, block_group);
374376
ext4_error(sb, "bg %u: bad block bitmap checksum", block_group);
377+
set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
375378
return;
376379
}
377380
set_buffer_verified(bh);

fs/ext4/ext4.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2480,9 +2480,12 @@ struct ext4_group_info {
24802480

24812481
#define EXT4_GROUP_INFO_NEED_INIT_BIT 0
24822482
#define EXT4_GROUP_INFO_WAS_TRIMMED_BIT 1
2483+
#define EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT 2
24832484

24842485
#define EXT4_MB_GRP_NEED_INIT(grp) \
24852486
(test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
2487+
#define EXT4_MB_GRP_BBITMAP_CORRUPT(grp) \
2488+
(test_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &((grp)->bb_state)))
24862489

24872490
#define EXT4_MB_GRP_WAS_TRIMMED(grp) \
24882491
(test_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state)))

fs/ext4/mballoc.c

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -751,13 +751,15 @@ void ext4_mb_generate_buddy(struct super_block *sb,
751751

752752
if (free != grp->bb_free) {
753753
ext4_grp_locked_error(sb, group, 0, 0,
754-
"%u clusters in bitmap, %u in gd",
754+
"%u clusters in bitmap, %u in gd; "
755+
"block bitmap corrupt.",
755756
free, grp->bb_free);
756757
/*
757-
* If we intent to continue, we consider group descritor
758+
* If we intend to continue, we consider group descriptor
758759
* corrupt and update bb_free using bitmap value
759760
*/
760761
grp->bb_free = free;
762+
set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
761763
}
762764
mb_set_largest_free_order(sb, grp);
763765

@@ -1398,6 +1400,10 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
13981400

13991401
BUG_ON(last >= (sb->s_blocksize << 3));
14001402
assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
1403+
/* Don't bother if the block group is corrupt. */
1404+
if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info)))
1405+
return;
1406+
14011407
mb_check_buddy(e4b);
14021408
mb_free_blocks_double(inode, e4b, first, count);
14031409

@@ -1423,7 +1429,11 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
14231429
inode ? inode->i_ino : 0,
14241430
blocknr,
14251431
"freeing already freed block "
1426-
"(bit %u)", block);
1432+
"(bit %u); block bitmap corrupt.",
1433+
block);
1434+
/* Mark the block group as corrupt. */
1435+
set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
1436+
&e4b->bd_info->bb_state);
14271437
mb_regenerate_buddy(e4b);
14281438
goto done;
14291439
}
@@ -1790,6 +1800,11 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
17901800
if (err)
17911801
return err;
17921802

1803+
if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) {
1804+
ext4_mb_unload_buddy(e4b);
1805+
return 0;
1806+
}
1807+
17931808
ext4_lock_group(ac->ac_sb, group);
17941809
max = mb_find_extent(e4b, ac->ac_g_ex.fe_start,
17951810
ac->ac_g_ex.fe_len, &ex);
@@ -1987,6 +2002,9 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
19872002
if (cr <= 2 && free < ac->ac_g_ex.fe_len)
19882003
return 0;
19892004

2005+
if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp)))
2006+
return 0;
2007+
19902008
/* We only do this if the grp has never been initialized */
19912009
if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
19922010
int ret = ext4_mb_init_group(ac->ac_sb, group);
@@ -4674,6 +4692,10 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
46744692
overflow = 0;
46754693
ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
46764694

4695+
if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(
4696+
ext4_get_group_info(sb, block_group))))
4697+
return;
4698+
46774699
/*
46784700
* Check to see if we are freeing blocks across a group
46794701
* boundary.

0 commit comments

Comments
 (0)