Skip to content

Commit 35bfc52

Browse files
NeilBrownshligit
authored andcommitted
md: allow metadata update while suspending.
There are various deadlocks that can occur when a thread holds reconfig_mutex and calls ->quiesce(mddev, 1). As some write request block waiting for metadata to be updated (e.g. to record device failure), and as the md thread updates the metadata while the reconfig mutex is held, holding the mutex can stop write requests completing, and this prevents ->quiesce(mddev, 1) from completing. ->quiesce() is now usually called from mddev_suspend(), and it is always called with reconfig_mutex held. So at this time it is safe for the thread to update metadata without explicitly taking the lock. So add 2 new flags, one which says the unlocked updates is allowed, and one which ways it is happening. Then allow it while the quiesce completes, and then wait for it to finish. Reported-and-tested-by: Xiao Ni <[email protected]> Signed-off-by: NeilBrown <[email protected]> Signed-off-by: Shaohua Li <[email protected]>
1 parent 9e1cc0a commit 35bfc52

File tree

2 files changed

+20
-0
lines changed

2 files changed

+20
-0
lines changed

drivers/md/md.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -364,8 +364,12 @@ void mddev_suspend(struct mddev *mddev)
364364
return;
365365
synchronize_rcu();
366366
wake_up(&mddev->sb_wait);
367+
set_bit(MD_ALLOW_SB_UPDATE, &mddev->flags);
368+
smp_mb__after_atomic();
367369
wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
368370
mddev->pers->quiesce(mddev, 1);
371+
clear_bit_unlock(MD_ALLOW_SB_UPDATE, &mddev->flags);
372+
wait_event(mddev->sb_wait, !test_bit(MD_UPDATING_SB, &mddev->flags));
369373

370374
del_timer_sync(&mddev->safemode_timer);
371375
}
@@ -8838,6 +8842,16 @@ void md_check_recovery(struct mddev *mddev)
88388842
unlock:
88398843
wake_up(&mddev->sb_wait);
88408844
mddev_unlock(mddev);
8845+
} else if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags) && mddev->sb_flags) {
8846+
/* Write superblock - thread that called mddev_suspend()
8847+
* holds reconfig_mutex for us.
8848+
*/
8849+
set_bit(MD_UPDATING_SB, &mddev->flags);
8850+
smp_mb__after_atomic();
8851+
if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags))
8852+
md_update_sb(mddev, 0);
8853+
clear_bit_unlock(MD_UPDATING_SB, &mddev->flags);
8854+
wake_up(&mddev->sb_wait);
88418855
}
88428856
}
88438857
EXPORT_SYMBOL(md_check_recovery);

drivers/md/md.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,12 @@ enum mddev_flags {
237237
*/
238238
MD_HAS_PPL, /* The raid array has PPL feature set */
239239
MD_HAS_MULTIPLE_PPLS, /* The raid array has multiple PPLs feature set */
240+
MD_ALLOW_SB_UPDATE, /* md_check_recovery is allowed to update
241+
* the metadata without taking reconfig_mutex.
242+
*/
243+
MD_UPDATING_SB, /* md_check_recovery is updating the metadata
244+
* without explicitly holding reconfig_mutex.
245+
*/
240246
};
241247

242248
enum mddev_sb_flags {

0 commit comments

Comments
 (0)