Skip to content

Commit 040639b

Browse files
committed
Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md
Pull MD updates from Shaohua Li: "Some small fixes for MD: - fix raid5-cache potential problems if raid5 cache isn't fully recovered - fix a wait-within-wait warning in raid1/10 - make raid5-PPL support disks with writeback cache enabled" * 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md: raid5-ppl: PPL support for disks with write-back cache enabled md/r5cache: print more info of log recovery md/raid1,raid10: silence warning about wait-within-wait md: introduce new personality funciton start()
2 parents 20c59c7 + 1532d9e commit 040639b

File tree

10 files changed

+285
-38
lines changed

10 files changed

+285
-38
lines changed

Documentation/md/raid5-ppl.txt

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ case the behavior is the same as in plain raid5.
3939
PPL is available for md version-1 metadata and external (specifically IMSM)
4040
metadata arrays. It can be enabled using mdadm option --consistency-policy=ppl.
4141

42-
Currently, volatile write-back cache should be disabled on all member drives
43-
when using PPL. Otherwise it cannot guarantee consistency in case of power
44-
failure.
42+
There is a limitation of maximum 64 disks in the array for PPL. It allows to
43+
keep data structures and implementation simple. RAID5 arrays with so many disks
44+
are not likely due to high risk of multiple disks failure. Such restriction
45+
should not be a real life limitation.

drivers/md/dm-raid.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3151,6 +3151,14 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
31513151
goto bad;
31523152
}
31533153

3154+
r = md_start(&rs->md);
3155+
3156+
if (r) {
3157+
ti->error = "Failed to start raid array";
3158+
mddev_unlock(&rs->md);
3159+
goto bad_md_start;
3160+
}
3161+
31543162
rs->callbacks.congested_fn = raid_is_congested;
31553163
dm_table_add_target_callbacks(ti->table, &rs->callbacks);
31563164

@@ -3198,6 +3206,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
31983206
mddev_unlock(&rs->md);
31993207
return 0;
32003208

3209+
bad_md_start:
32013210
bad_journal_mode_set:
32023211
bad_stripe_cache:
32033212
bad_check_reshape:

drivers/md/md.c

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -711,7 +711,7 @@ static struct md_rdev *find_rdev(struct mddev *mddev, dev_t dev)
711711
return NULL;
712712
}
713713

714-
static struct md_rdev *find_rdev_rcu(struct mddev *mddev, dev_t dev)
714+
struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev)
715715
{
716716
struct md_rdev *rdev;
717717

@@ -721,6 +721,7 @@ static struct md_rdev *find_rdev_rcu(struct mddev *mddev, dev_t dev)
721721

722722
return NULL;
723723
}
724+
EXPORT_SYMBOL_GPL(md_find_rdev_rcu);
724725

725726
static struct md_personality *find_pers(int level, char *clevel)
726727
{
@@ -5560,11 +5561,6 @@ int md_run(struct mddev *mddev)
55605561
if (start_readonly && mddev->ro == 0)
55615562
mddev->ro = 2; /* read-only, but switch on first write */
55625563

5563-
/*
5564-
* NOTE: some pers->run(), for example r5l_recovery_log(), wakes
5565-
* up mddev->thread. It is important to initialize critical
5566-
* resources for mddev->thread BEFORE calling pers->run().
5567-
*/
55685564
err = pers->run(mddev);
55695565
if (err)
55705566
pr_warn("md: pers->run() failed ...\n");
@@ -5678,6 +5674,9 @@ static int do_md_run(struct mddev *mddev)
56785674
if (mddev_is_clustered(mddev))
56795675
md_allow_write(mddev);
56805676

5677+
/* run start up tasks that require md_thread */
5678+
md_start(mddev);
5679+
56815680
md_wakeup_thread(mddev->thread);
56825681
md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */
56835682

@@ -5689,6 +5688,21 @@ static int do_md_run(struct mddev *mddev)
56895688
return err;
56905689
}
56915690

5691+
int md_start(struct mddev *mddev)
5692+
{
5693+
int ret = 0;
5694+
5695+
if (mddev->pers->start) {
5696+
set_bit(MD_RECOVERY_WAIT, &mddev->recovery);
5697+
md_wakeup_thread(mddev->thread);
5698+
ret = mddev->pers->start(mddev);
5699+
clear_bit(MD_RECOVERY_WAIT, &mddev->recovery);
5700+
md_wakeup_thread(mddev->sync_thread);
5701+
}
5702+
return ret;
5703+
}
5704+
EXPORT_SYMBOL_GPL(md_start);
5705+
56925706
static int restart_array(struct mddev *mddev)
56935707
{
56945708
struct gendisk *disk = mddev->gendisk;
@@ -6997,7 +7011,7 @@ static int set_disk_faulty(struct mddev *mddev, dev_t dev)
69977011
return -ENODEV;
69987012

69997013
rcu_read_lock();
7000-
rdev = find_rdev_rcu(mddev, dev);
7014+
rdev = md_find_rdev_rcu(mddev, dev);
70017015
if (!rdev)
70027016
err = -ENODEV;
70037017
else {
@@ -8169,7 +8183,8 @@ void md_do_sync(struct md_thread *thread)
81698183
int ret;
81708184

81718185
/* just incase thread restarts... */
8172-
if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
8186+
if (test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
8187+
test_bit(MD_RECOVERY_WAIT, &mddev->recovery))
81738188
return;
81748189
if (mddev->ro) {/* never try to sync a read-only array */
81758190
set_bit(MD_RECOVERY_INTR, &mddev->recovery);

drivers/md/md.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -485,6 +485,7 @@ enum recovery_flags {
485485
MD_RECOVERY_RESHAPE, /* A reshape is happening */
486486
MD_RECOVERY_FROZEN, /* User request to abort, and not restart, any action */
487487
MD_RECOVERY_ERROR, /* sync-action interrupted because io-error */
488+
MD_RECOVERY_WAIT, /* waiting for pers->start() to finish */
488489
};
489490

490491
static inline int __must_check mddev_lock(struct mddev *mddev)
@@ -523,7 +524,13 @@ struct md_personality
523524
struct list_head list;
524525
struct module *owner;
525526
bool (*make_request)(struct mddev *mddev, struct bio *bio);
527+
/*
528+
* start up works that do NOT require md_thread. tasks that
529+
* requires md_thread should go into start()
530+
*/
526531
int (*run)(struct mddev *mddev);
532+
/* start up works that require md threads */
533+
int (*start)(struct mddev *mddev);
527534
void (*free)(struct mddev *mddev, void *priv);
528535
void (*status)(struct seq_file *seq, struct mddev *mddev);
529536
/* error_handler must set ->faulty and clear ->in_sync
@@ -687,6 +694,7 @@ extern int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale);
687694

688695
extern void mddev_init(struct mddev *mddev);
689696
extern int md_run(struct mddev *mddev);
697+
extern int md_start(struct mddev *mddev);
690698
extern void md_stop(struct mddev *mddev);
691699
extern void md_stop_writes(struct mddev *mddev);
692700
extern int md_rdev_init(struct md_rdev *rdev);
@@ -702,6 +710,7 @@ extern void md_reload_sb(struct mddev *mddev, int raid_disk);
702710
extern void md_update_sb(struct mddev *mddev, int force);
703711
extern void md_kick_rdev_from_array(struct md_rdev * rdev);
704712
struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr);
713+
struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev);
705714

706715
static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev)
707716
{

drivers/md/raid1.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -815,6 +815,17 @@ static void flush_pending_writes(struct r1conf *conf)
815815
bio = bio_list_get(&conf->pending_bio_list);
816816
conf->pending_count = 0;
817817
spin_unlock_irq(&conf->device_lock);
818+
819+
/*
820+
* As this is called in a wait_event() loop (see freeze_array),
821+
* current->state might be TASK_UNINTERRUPTIBLE which will
822+
* cause a warning when we prepare to wait again. As it is
823+
* rare that this path is taken, it is perfectly safe to force
824+
* us to go around the wait_event() loop again, so the warning
825+
* is a false-positive. Silence the warning by resetting
826+
* thread state
827+
*/
828+
__set_current_state(TASK_RUNNING);
818829
blk_start_plug(&plug);
819830
flush_bio_list(conf, bio);
820831
blk_finish_plug(&plug);

drivers/md/raid10.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -900,6 +900,18 @@ static void flush_pending_writes(struct r10conf *conf)
900900
bio = bio_list_get(&conf->pending_bio_list);
901901
conf->pending_count = 0;
902902
spin_unlock_irq(&conf->device_lock);
903+
904+
/*
905+
* As this is called in a wait_event() loop (see freeze_array),
906+
* current->state might be TASK_UNINTERRUPTIBLE which will
907+
* cause a warning when we prepare to wait again. As it is
908+
* rare that this path is taken, it is perfectly safe to force
909+
* us to go around the wait_event() loop again, so the warning
910+
* is a false-positive. Silence the warning by resetting
911+
* thread state
912+
*/
913+
__set_current_state(TASK_RUNNING);
914+
903915
blk_start_plug(&plug);
904916
/* flush any pending bitmap writes to disk
905917
* before proceeding w/ I/O */

drivers/md/raid5-cache.c

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1111,9 +1111,6 @@ void r5l_write_stripe_run(struct r5l_log *log)
11111111

11121112
int r5l_handle_flush_request(struct r5l_log *log, struct bio *bio)
11131113
{
1114-
if (!log)
1115-
return -ENODEV;
1116-
11171114
if (log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_THROUGH) {
11181115
/*
11191116
* in write through (journal only)
@@ -1592,8 +1589,6 @@ void r5l_wake_reclaim(struct r5l_log *log, sector_t space)
15921589
void r5l_quiesce(struct r5l_log *log, int quiesce)
15931590
{
15941591
struct mddev *mddev;
1595-
if (!log)
1596-
return;
15971592

15981593
if (quiesce) {
15991594
/* make sure r5l_write_super_and_discard_space exits */
@@ -2448,7 +2443,6 @@ static void r5c_recovery_flush_data_only_stripes(struct r5l_log *log,
24482443
raid5_release_stripe(sh);
24492444
}
24502445

2451-
md_wakeup_thread(conf->mddev->thread);
24522446
/* reuse conf->wait_for_quiescent in recovery */
24532447
wait_event(conf->wait_for_quiescent,
24542448
atomic_read(&conf->active_stripes) == 0);
@@ -2491,10 +2485,10 @@ static int r5l_recovery_log(struct r5l_log *log)
24912485
ctx->seq += 10000;
24922486

24932487
if ((ctx->data_only_stripes == 0) && (ctx->data_parity_stripes == 0))
2494-
pr_debug("md/raid:%s: starting from clean shutdown\n",
2488+
pr_info("md/raid:%s: starting from clean shutdown\n",
24952489
mdname(mddev));
24962490
else
2497-
pr_debug("md/raid:%s: recovering %d data-only stripes and %d data-parity stripes\n",
2491+
pr_info("md/raid:%s: recovering %d data-only stripes and %d data-parity stripes\n",
24982492
mdname(mddev), ctx->data_only_stripes,
24992493
ctx->data_parity_stripes);
25002494

@@ -3036,6 +3030,23 @@ static int r5l_load_log(struct r5l_log *log)
30363030
return ret;
30373031
}
30383032

3033+
int r5l_start(struct r5l_log *log)
3034+
{
3035+
int ret;
3036+
3037+
if (!log)
3038+
return 0;
3039+
3040+
ret = r5l_load_log(log);
3041+
if (ret) {
3042+
struct mddev *mddev = log->rdev->mddev;
3043+
struct r5conf *conf = mddev->private;
3044+
3045+
r5l_exit_log(conf);
3046+
}
3047+
return ret;
3048+
}
3049+
30393050
void r5c_update_on_rdev_error(struct mddev *mddev, struct md_rdev *rdev)
30403051
{
30413052
struct r5conf *conf = mddev->private;
@@ -3138,13 +3149,9 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
31383149

31393150
rcu_assign_pointer(conf->log, log);
31403151

3141-
if (r5l_load_log(log))
3142-
goto error;
3143-
31443152
set_bit(MD_HAS_JOURNAL, &conf->mddev->flags);
31453153
return 0;
31463154

3147-
error:
31483155
rcu_assign_pointer(conf->log, NULL);
31493156
md_unregister_thread(&log->reclaim_thread);
31503157
reclaim_thread:

drivers/md/raid5-log.h

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ extern struct md_sysfs_entry r5c_journal_mode;
3232
extern void r5c_update_on_rdev_error(struct mddev *mddev,
3333
struct md_rdev *rdev);
3434
extern bool r5c_big_stripe_cached(struct r5conf *conf, sector_t sect);
35+
extern int r5l_start(struct r5l_log *log);
3536

3637
extern struct dma_async_tx_descriptor *
3738
ops_run_partial_parity(struct stripe_head *sh, struct raid5_percpu *percpu,
@@ -42,6 +43,7 @@ extern int ppl_write_stripe(struct r5conf *conf, struct stripe_head *sh);
4243
extern void ppl_write_stripe_run(struct r5conf *conf);
4344
extern void ppl_stripe_write_finished(struct stripe_head *sh);
4445
extern int ppl_modify_log(struct r5conf *conf, struct md_rdev *rdev, bool add);
46+
extern void ppl_quiesce(struct r5conf *conf, int quiesce);
4547

4648
static inline bool raid5_has_ppl(struct r5conf *conf)
4749
{
@@ -87,6 +89,34 @@ static inline void log_write_stripe_run(struct r5conf *conf)
8789
ppl_write_stripe_run(conf);
8890
}
8991

92+
static inline void log_flush_stripe_to_raid(struct r5conf *conf)
93+
{
94+
if (conf->log)
95+
r5l_flush_stripe_to_raid(conf->log);
96+
else if (raid5_has_ppl(conf))
97+
ppl_write_stripe_run(conf);
98+
}
99+
100+
static inline int log_handle_flush_request(struct r5conf *conf, struct bio *bio)
101+
{
102+
int ret = -ENODEV;
103+
104+
if (conf->log)
105+
ret = r5l_handle_flush_request(conf->log, bio);
106+
else if (raid5_has_ppl(conf))
107+
ret = 0;
108+
109+
return ret;
110+
}
111+
112+
static inline void log_quiesce(struct r5conf *conf, int quiesce)
113+
{
114+
if (conf->log)
115+
r5l_quiesce(conf->log, quiesce);
116+
else if (raid5_has_ppl(conf))
117+
ppl_quiesce(conf, quiesce);
118+
}
119+
90120
static inline void log_exit(struct r5conf *conf)
91121
{
92122
if (conf->log)

0 commit comments

Comments
 (0)