Skip to content

Commit 62f7b19

Browse files
Guilherme G. Piccoliliu-song-6
authored andcommitted
md raid0/linear: Mark array as 'broken' and fail BIOs if a member is gone
Currently md raid0/linear are not provided with any mechanism to validate if an array member got removed or failed. The driver keeps sending BIOs regardless of the state of array members, and kernel shows state 'clean' in the 'array_state' sysfs attribute. This leads to the following situation: if a raid0/linear array member is removed and the array is mounted, some user writing to this array won't realize that errors are happening unless they check dmesg or perform one fsync per written file. Despite udev signaling the member device is gone, 'mdadm' cannot issue the STOP_ARRAY ioctl successfully, given the array is mounted. In other words, no -EIO is returned and writes (except direct ones) appear normal. Meaning the user might think the wrote data is correctly stored in the array, but instead garbage was written given that raid0 does stripping (and so, it requires all its members to be working in order to not corrupt data). For md/linear, writes to the available members will work fine, but if the writes go to the missing member(s), it'll cause a file corruption situation, whereas the portion of the writes to the missing devices aren't written effectively. This patch changes this behavior: we check if the block device's gendisk is UP when submitting the BIO to the array member, and if it isn't, we flag the md device as MD_BROKEN and fail subsequent I/Os to that device; a read request to the array requiring data from a valid member is still completed. While flagging the device as MD_BROKEN, we also show a rate-limited warning in the kernel log. A new array state 'broken' was added too: it mimics the state 'clean' in every aspect, being useful only to distinguish if the array has some member missing. We rely on the MD_BROKEN flag to put the array in the 'broken' state. This state cannot be written in 'array_state' as it just shows one or more members of the array are missing but acts like 'clean', it wouldn't make sense to write it. With this patch, the filesystem reacts much faster to the event of missing array member: after some I/O errors, ext4 for instance aborts the journal and prevents corruption. Without this change, we're able to keep writing in the disk and after a machine reboot, e2fsck shows some severe fs errors that demand fixing. This patch was tested in ext4 and xfs filesystems, and requires a 'mdadm' counterpart to handle the 'broken' state. Cc: Song Liu <[email protected]> Reviewed-by: NeilBrown <[email protected]> Signed-off-by: Guilherme G. Piccoli <[email protected]> Signed-off-by: Song Liu <[email protected]>
1 parent a22a960 commit 62f7b19

File tree

4 files changed

+45
-4
lines changed

4 files changed

+45
-4
lines changed

drivers/md/md-linear.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,11 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio)
258258
bio_sector < start_sector))
259259
goto out_of_bounds;
260260

261+
if (unlikely(is_mddev_broken(tmp_dev->rdev, "linear"))) {
262+
bio_io_error(bio);
263+
return true;
264+
}
265+
261266
if (unlikely(bio_end_sector(bio) > end_sector)) {
262267
/* This bio crosses a device boundary, so we have to split it */
263268
struct bio *split = bio_split(bio, end_sector - bio_sector,

drivers/md/md.c

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,11 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
376376
struct mddev *mddev = q->queuedata;
377377
unsigned int sectors;
378378

379+
if (unlikely(test_bit(MD_BROKEN, &mddev->flags)) && (rw == WRITE)) {
380+
bio_io_error(bio);
381+
return BLK_QC_T_NONE;
382+
}
383+
379384
blk_queue_split(q, &bio);
380385

381386
if (mddev == NULL || mddev->pers == NULL) {
@@ -4158,12 +4163,17 @@ __ATTR_PREALLOC(resync_start, S_IRUGO|S_IWUSR,
41584163
* active-idle
41594164
* like active, but no writes have been seen for a while (100msec).
41604165
*
4166+
* broken
4167+
* RAID0/LINEAR-only: same as clean, but array is missing a member.
4168+
* It's useful because RAID0/LINEAR mounted-arrays aren't stopped
4169+
* when a member is gone, so this state will at least alert the
4170+
* user that something is wrong.
41614171
*/
41624172
enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
4163-
write_pending, active_idle, bad_word};
4173+
write_pending, active_idle, broken, bad_word};
41644174
static char *array_states[] = {
41654175
"clear", "inactive", "suspended", "readonly", "read-auto", "clean", "active",
4166-
"write-pending", "active-idle", NULL };
4176+
"write-pending", "active-idle", "broken", NULL };
41674177

41684178
static int match_word(const char *word, char **list)
41694179
{
@@ -4179,7 +4189,7 @@ array_state_show(struct mddev *mddev, char *page)
41794189
{
41804190
enum array_state st = inactive;
41814191

4182-
if (mddev->pers && !test_bit(MD_NOT_READY, &mddev->flags))
4192+
if (mddev->pers && !test_bit(MD_NOT_READY, &mddev->flags)) {
41834193
switch(mddev->ro) {
41844194
case 1:
41854195
st = readonly;
@@ -4199,7 +4209,10 @@ array_state_show(struct mddev *mddev, char *page)
41994209
st = active;
42004210
spin_unlock(&mddev->lock);
42014211
}
4202-
else {
4212+
4213+
if (test_bit(MD_BROKEN, &mddev->flags) && st == clean)
4214+
st = broken;
4215+
} else {
42034216
if (list_empty(&mddev->disks) &&
42044217
mddev->raid_disks == 0 &&
42054218
mddev->dev_sectors == 0)
@@ -4313,6 +4326,7 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
43134326
break;
43144327
case write_pending:
43154328
case active_idle:
4329+
case broken:
43164330
/* these cannot be set */
43174331
break;
43184332
}

drivers/md/md.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,9 @@ enum mddev_flags {
251251
MD_NOT_READY, /* do_md_run() is active, so 'array_state'
252252
* must not report that array is ready yet
253253
*/
254+
MD_BROKEN, /* This is used in RAID-0/LINEAR only, to stop
255+
* I/O in case an array member is gone/failed.
256+
*/
254257
};
255258

256259
enum mddev_sb_flags {
@@ -739,6 +742,19 @@ extern void mddev_create_wb_pool(struct mddev *mddev, struct md_rdev *rdev,
739742
struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr);
740743
struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev);
741744

745+
static inline bool is_mddev_broken(struct md_rdev *rdev, const char *md_type)
746+
{
747+
int flags = rdev->bdev->bd_disk->flags;
748+
749+
if (!(flags & GENHD_FL_UP)) {
750+
if (!test_and_set_bit(MD_BROKEN, &rdev->mddev->flags))
751+
pr_warn("md: %s: %s array has a missing/failed member\n",
752+
mdname(rdev->mddev), md_type);
753+
return true;
754+
}
755+
return false;
756+
}
757+
742758
static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev)
743759
{
744760
int faulty = test_bit(Faulty, &rdev->flags);

drivers/md/raid0.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -586,6 +586,12 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio)
586586

587587
zone = find_zone(mddev->private, &sector);
588588
tmp_dev = map_sector(mddev, zone, sector, &sector);
589+
590+
if (unlikely(is_mddev_broken(tmp_dev, "raid0"))) {
591+
bio_io_error(bio);
592+
return true;
593+
}
594+
589595
bio_set_dev(bio, tmp_dev->bdev);
590596
bio->bi_iter.bi_sector = sector + zone->dev_start +
591597
tmp_dev->data_offset;

0 commit comments

Comments
 (0)