Skip to content

Commit c9b39e5

Browse files
committed
Merge branch 'md-6.14-bitmap' into md-6.14
Move bitmap_{start, end}write calls to md layer. These changes help address hangs in bitmap_startwrite([1],[2]). [1] https://lore.kernel.org/all/CAJpMwyjmHQLvm6zg1cmQErttNNQPDAAXPKM3xgTjMhbfts986Q@mail.gmail.com/ [2] https://lore.kernel.org/all/[email protected]/ * md-6.14-bitmap: md/md-bitmap: move bitmap_{start, end}write to md upper layer md/raid5: implement pers->bitmap_sector() md: add a new callback pers->bitmap_sector() md/md-bitmap: remove the last parameter for bimtap_ops->endwrite() md/md-bitmap: factor behind write counters out from bitmap_{start/end}write()
2 parents 4fa9161 + cd5fc65 commit c9b39e5

File tree

11 files changed

+149
-147
lines changed

11 files changed

+149
-147
lines changed

drivers/md/md-bitmap.c

Lines changed: 45 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1671,24 +1671,13 @@ __acquires(bitmap->lock)
16711671
}
16721672

16731673
static int bitmap_startwrite(struct mddev *mddev, sector_t offset,
1674-
unsigned long sectors, bool behind)
1674+
unsigned long sectors)
16751675
{
16761676
struct bitmap *bitmap = mddev->bitmap;
16771677

16781678
if (!bitmap)
16791679
return 0;
16801680

1681-
if (behind) {
1682-
int bw;
1683-
atomic_inc(&bitmap->behind_writes);
1684-
bw = atomic_read(&bitmap->behind_writes);
1685-
if (bw > bitmap->behind_writes_used)
1686-
bitmap->behind_writes_used = bw;
1687-
1688-
pr_debug("inc write-behind count %d/%lu\n",
1689-
bw, bitmap->mddev->bitmap_info.max_write_behind);
1690-
}
1691-
16921681
while (sectors) {
16931682
sector_t blocks;
16941683
bitmap_counter_t *bmc;
@@ -1737,21 +1726,13 @@ static int bitmap_startwrite(struct mddev *mddev, sector_t offset,
17371726
}
17381727

17391728
static void bitmap_endwrite(struct mddev *mddev, sector_t offset,
1740-
unsigned long sectors, bool success, bool behind)
1729+
unsigned long sectors)
17411730
{
17421731
struct bitmap *bitmap = mddev->bitmap;
17431732

17441733
if (!bitmap)
17451734
return;
17461735

1747-
if (behind) {
1748-
if (atomic_dec_and_test(&bitmap->behind_writes))
1749-
wake_up(&bitmap->behind_wait);
1750-
pr_debug("dec write-behind count %d/%lu\n",
1751-
atomic_read(&bitmap->behind_writes),
1752-
bitmap->mddev->bitmap_info.max_write_behind);
1753-
}
1754-
17551736
while (sectors) {
17561737
sector_t blocks;
17571738
unsigned long flags;
@@ -1764,15 +1745,16 @@ static void bitmap_endwrite(struct mddev *mddev, sector_t offset,
17641745
return;
17651746
}
17661747

1767-
if (success && !bitmap->mddev->degraded &&
1768-
bitmap->events_cleared < bitmap->mddev->events) {
1769-
bitmap->events_cleared = bitmap->mddev->events;
1770-
bitmap->need_sync = 1;
1771-
sysfs_notify_dirent_safe(bitmap->sysfs_can_clear);
1772-
}
1773-
1774-
if (!success && !NEEDED(*bmc))
1748+
if (!bitmap->mddev->degraded) {
1749+
if (bitmap->events_cleared < bitmap->mddev->events) {
1750+
bitmap->events_cleared = bitmap->mddev->events;
1751+
bitmap->need_sync = 1;
1752+
sysfs_notify_dirent_safe(
1753+
bitmap->sysfs_can_clear);
1754+
}
1755+
} else if (!NEEDED(*bmc)) {
17751756
*bmc |= NEEDED_MASK;
1757+
}
17761758

17771759
if (COUNTER(*bmc) == COUNTER_MAX)
17781760
wake_up(&bitmap->overflow_wait);
@@ -2062,6 +2044,37 @@ static void md_bitmap_free(void *data)
20622044
kfree(bitmap);
20632045
}
20642046

2047+
static void bitmap_start_behind_write(struct mddev *mddev)
2048+
{
2049+
struct bitmap *bitmap = mddev->bitmap;
2050+
int bw;
2051+
2052+
if (!bitmap)
2053+
return;
2054+
2055+
atomic_inc(&bitmap->behind_writes);
2056+
bw = atomic_read(&bitmap->behind_writes);
2057+
if (bw > bitmap->behind_writes_used)
2058+
bitmap->behind_writes_used = bw;
2059+
2060+
pr_debug("inc write-behind count %d/%lu\n",
2061+
bw, bitmap->mddev->bitmap_info.max_write_behind);
2062+
}
2063+
2064+
static void bitmap_end_behind_write(struct mddev *mddev)
2065+
{
2066+
struct bitmap *bitmap = mddev->bitmap;
2067+
2068+
if (!bitmap)
2069+
return;
2070+
2071+
if (atomic_dec_and_test(&bitmap->behind_writes))
2072+
wake_up(&bitmap->behind_wait);
2073+
pr_debug("dec write-behind count %d/%lu\n",
2074+
atomic_read(&bitmap->behind_writes),
2075+
bitmap->mddev->bitmap_info.max_write_behind);
2076+
}
2077+
20652078
static void bitmap_wait_behind_writes(struct mddev *mddev)
20662079
{
20672080
struct bitmap *bitmap = mddev->bitmap;
@@ -2981,6 +2994,9 @@ static struct bitmap_operations bitmap_ops = {
29812994
.dirty_bits = bitmap_dirty_bits,
29822995
.unplug = bitmap_unplug,
29832996
.daemon_work = bitmap_daemon_work,
2997+
2998+
.start_behind_write = bitmap_start_behind_write,
2999+
.end_behind_write = bitmap_end_behind_write,
29843000
.wait_behind_writes = bitmap_wait_behind_writes,
29853001

29863002
.startwrite = bitmap_startwrite,

drivers/md/md-bitmap.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,12 +84,15 @@ struct bitmap_operations {
8484
unsigned long e);
8585
void (*unplug)(struct mddev *mddev, bool sync);
8686
void (*daemon_work)(struct mddev *mddev);
87+
88+
void (*start_behind_write)(struct mddev *mddev);
89+
void (*end_behind_write)(struct mddev *mddev);
8790
void (*wait_behind_writes)(struct mddev *mddev);
8891

8992
int (*startwrite)(struct mddev *mddev, sector_t offset,
90-
unsigned long sectors, bool behind);
93+
unsigned long sectors);
9194
void (*endwrite)(struct mddev *mddev, sector_t offset,
92-
unsigned long sectors, bool success, bool behind);
95+
unsigned long sectors);
9396
bool (*start_sync)(struct mddev *mddev, sector_t offset,
9497
sector_t *blocks, bool degraded);
9598
void (*end_sync)(struct mddev *mddev, sector_t offset, sector_t *blocks);

drivers/md/md.c

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8745,12 +8745,32 @@ void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
87458745
}
87468746
EXPORT_SYMBOL_GPL(md_submit_discard_bio);
87478747

8748+
static void md_bitmap_start(struct mddev *mddev,
8749+
struct md_io_clone *md_io_clone)
8750+
{
8751+
if (mddev->pers->bitmap_sector)
8752+
mddev->pers->bitmap_sector(mddev, &md_io_clone->offset,
8753+
&md_io_clone->sectors);
8754+
8755+
mddev->bitmap_ops->startwrite(mddev, md_io_clone->offset,
8756+
md_io_clone->sectors);
8757+
}
8758+
8759+
static void md_bitmap_end(struct mddev *mddev, struct md_io_clone *md_io_clone)
8760+
{
8761+
mddev->bitmap_ops->endwrite(mddev, md_io_clone->offset,
8762+
md_io_clone->sectors);
8763+
}
8764+
87488765
static void md_end_clone_io(struct bio *bio)
87498766
{
87508767
struct md_io_clone *md_io_clone = bio->bi_private;
87518768
struct bio *orig_bio = md_io_clone->orig_bio;
87528769
struct mddev *mddev = md_io_clone->mddev;
87538770

8771+
if (bio_data_dir(orig_bio) == WRITE && mddev->bitmap)
8772+
md_bitmap_end(mddev, md_io_clone);
8773+
87548774
if (bio->bi_status && !orig_bio->bi_status)
87558775
orig_bio->bi_status = bio->bi_status;
87568776

@@ -8775,6 +8795,12 @@ static void md_clone_bio(struct mddev *mddev, struct bio **bio)
87758795
if (blk_queue_io_stat(bdev->bd_disk->queue))
87768796
md_io_clone->start_time = bio_start_io_acct(*bio);
87778797

8798+
if (bio_data_dir(*bio) == WRITE && mddev->bitmap) {
8799+
md_io_clone->offset = (*bio)->bi_iter.bi_sector;
8800+
md_io_clone->sectors = bio_sectors(*bio);
8801+
md_bitmap_start(mddev, md_io_clone);
8802+
}
8803+
87788804
clone->bi_end_io = md_end_clone_io;
87798805
clone->bi_private = md_io_clone;
87808806
*bio = clone;
@@ -8793,6 +8819,9 @@ void md_free_cloned_bio(struct bio *bio)
87938819
struct bio *orig_bio = md_io_clone->orig_bio;
87948820
struct mddev *mddev = md_io_clone->mddev;
87958821

8822+
if (bio_data_dir(orig_bio) == WRITE && mddev->bitmap)
8823+
md_bitmap_end(mddev, md_io_clone);
8824+
87968825
if (bio->bi_status && !orig_bio->bi_status)
87978826
orig_bio->bi_status = bio->bi_status;
87988827

drivers/md/md.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -746,6 +746,9 @@ struct md_personality
746746
void *(*takeover) (struct mddev *mddev);
747747
/* Changes the consistency policy of an active array. */
748748
int (*change_consistency_policy)(struct mddev *mddev, const char *buf);
749+
/* convert io ranges from array to bitmap */
750+
void (*bitmap_sector)(struct mddev *mddev, sector_t *offset,
751+
unsigned long *sectors);
749752
};
750753

751754
struct md_sysfs_entry {
@@ -828,6 +831,8 @@ struct md_io_clone {
828831
struct mddev *mddev;
829832
struct bio *orig_bio;
830833
unsigned long start_time;
834+
sector_t offset;
835+
unsigned long sectors;
831836
struct bio bio_clone;
832837
};
833838

drivers/md/raid1.c

Lines changed: 6 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -420,10 +420,8 @@ static void close_write(struct r1bio *r1_bio)
420420
r1_bio->behind_master_bio = NULL;
421421
}
422422

423-
/* clear the bitmap if all writes complete successfully */
424-
mddev->bitmap_ops->endwrite(mddev, r1_bio->sector, r1_bio->sectors,
425-
!test_bit(R1BIO_Degraded, &r1_bio->state),
426-
test_bit(R1BIO_BehindIO, &r1_bio->state));
423+
if (test_bit(R1BIO_BehindIO, &r1_bio->state))
424+
mddev->bitmap_ops->end_behind_write(mddev);
427425
md_write_end(mddev);
428426
}
429427

@@ -480,8 +478,6 @@ static void raid1_end_write_request(struct bio *bio)
480478
if (!test_bit(Faulty, &rdev->flags))
481479
set_bit(R1BIO_WriteError, &r1_bio->state);
482480
else {
483-
/* Fail the request */
484-
set_bit(R1BIO_Degraded, &r1_bio->state);
485481
/* Finished with this branch */
486482
r1_bio->bios[mirror] = NULL;
487483
to_put = bio;
@@ -1535,11 +1531,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
15351531
write_behind = true;
15361532

15371533
r1_bio->bios[i] = NULL;
1538-
if (!rdev || test_bit(Faulty, &rdev->flags)) {
1539-
if (i < conf->raid_disks)
1540-
set_bit(R1BIO_Degraded, &r1_bio->state);
1534+
if (!rdev || test_bit(Faulty, &rdev->flags))
15411535
continue;
1542-
}
15431536

15441537
atomic_inc(&rdev->nr_pending);
15451538
if (test_bit(WriteErrorSeen, &rdev->flags)) {
@@ -1558,16 +1551,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
15581551
*/
15591552
max_sectors = bad_sectors;
15601553
rdev_dec_pending(rdev, mddev);
1561-
/* We don't set R1BIO_Degraded as that
1562-
* only applies if the disk is
1563-
* missing, so it might be re-added,
1564-
* and we want to know to recover this
1565-
* chunk.
1566-
* In this case the device is here,
1567-
* and the fact that this chunk is not
1568-
* in-sync is recorded in the bad
1569-
* block log
1570-
*/
15711554
continue;
15721555
}
15731556
if (is_bad) {
@@ -1645,9 +1628,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
16451628
stats.behind_writes < max_write_behind)
16461629
alloc_behind_master_bio(r1_bio, bio);
16471630

1648-
mddev->bitmap_ops->startwrite(
1649-
mddev, r1_bio->sector, r1_bio->sectors,
1650-
test_bit(R1BIO_BehindIO, &r1_bio->state));
1631+
if (test_bit(R1BIO_BehindIO, &r1_bio->state))
1632+
mddev->bitmap_ops->start_behind_write(mddev);
16511633
first_clone = 0;
16521634
}
16531635

@@ -2614,12 +2596,10 @@ static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio)
26142596
* errors.
26152597
*/
26162598
fail = true;
2617-
if (!narrow_write_error(r1_bio, m)) {
2599+
if (!narrow_write_error(r1_bio, m))
26182600
md_error(conf->mddev,
26192601
conf->mirrors[m].rdev);
26202602
/* an I/O failed, we can't clear the bitmap */
2621-
set_bit(R1BIO_Degraded, &r1_bio->state);
2622-
}
26232603
rdev_dec_pending(conf->mirrors[m].rdev,
26242604
conf->mddev);
26252605
}
@@ -2710,8 +2690,6 @@ static void raid1d(struct md_thread *thread)
27102690
list_del(&r1_bio->retry_list);
27112691
idx = sector_to_idx(r1_bio->sector);
27122692
atomic_dec(&conf->nr_queued[idx]);
2713-
if (mddev->degraded)
2714-
set_bit(R1BIO_Degraded, &r1_bio->state);
27152693
if (test_bit(R1BIO_WriteError, &r1_bio->state))
27162694
close_write(r1_bio);
27172695
raid_end_bio_io(r1_bio);

drivers/md/raid1.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,6 @@ struct r1bio {
188188
enum r1bio_state {
189189
R1BIO_Uptodate,
190190
R1BIO_IsSync,
191-
R1BIO_Degraded,
192191
R1BIO_BehindIO,
193192
/* Set ReadError on bios that experience a readerror so that
194193
* raid1d knows what to do with them.

drivers/md/raid10.c

Lines changed: 2 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -428,10 +428,6 @@ static void close_write(struct r10bio *r10_bio)
428428
{
429429
struct mddev *mddev = r10_bio->mddev;
430430

431-
/* clear the bitmap if all writes complete successfully */
432-
mddev->bitmap_ops->endwrite(mddev, r10_bio->sector, r10_bio->sectors,
433-
!test_bit(R10BIO_Degraded, &r10_bio->state),
434-
false);
435431
md_write_end(mddev);
436432
}
437433

@@ -501,7 +497,6 @@ static void raid10_end_write_request(struct bio *bio)
501497
set_bit(R10BIO_WriteError, &r10_bio->state);
502498
else {
503499
/* Fail the request */
504-
set_bit(R10BIO_Degraded, &r10_bio->state);
505500
r10_bio->devs[slot].bio = NULL;
506501
to_put = bio;
507502
dec_rdev = 1;
@@ -1438,10 +1433,8 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
14381433
r10_bio->devs[i].bio = NULL;
14391434
r10_bio->devs[i].repl_bio = NULL;
14401435

1441-
if (!rdev && !rrdev) {
1442-
set_bit(R10BIO_Degraded, &r10_bio->state);
1436+
if (!rdev && !rrdev)
14431437
continue;
1444-
}
14451438
if (rdev && test_bit(WriteErrorSeen, &rdev->flags)) {
14461439
sector_t first_bad;
14471440
sector_t dev_sector = r10_bio->devs[i].addr;
@@ -1458,14 +1451,6 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
14581451
* to other devices yet
14591452
*/
14601453
max_sectors = bad_sectors;
1461-
/* We don't set R10BIO_Degraded as that
1462-
* only applies if the disk is missing,
1463-
* so it might be re-added, and we want to
1464-
* know to recover this chunk.
1465-
* In this case the device is here, and the
1466-
* fact that this chunk is not in-sync is
1467-
* recorded in the bad block log.
1468-
*/
14691454
continue;
14701455
}
14711456
if (is_bad) {
@@ -1519,8 +1504,6 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
15191504
md_account_bio(mddev, &bio);
15201505
r10_bio->master_bio = bio;
15211506
atomic_set(&r10_bio->remaining, 1);
1522-
mddev->bitmap_ops->startwrite(mddev, r10_bio->sector, r10_bio->sectors,
1523-
false);
15241507

15251508
for (i = 0; i < conf->copies; i++) {
15261509
if (r10_bio->devs[i].bio)
@@ -2966,11 +2949,8 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
29662949
rdev_dec_pending(rdev, conf->mddev);
29672950
} else if (bio != NULL && bio->bi_status) {
29682951
fail = true;
2969-
if (!narrow_write_error(r10_bio, m)) {
2952+
if (!narrow_write_error(r10_bio, m))
29702953
md_error(conf->mddev, rdev);
2971-
set_bit(R10BIO_Degraded,
2972-
&r10_bio->state);
2973-
}
29742954
rdev_dec_pending(rdev, conf->mddev);
29752955
}
29762956
bio = r10_bio->devs[m].repl_bio;
@@ -3029,8 +3009,6 @@ static void raid10d(struct md_thread *thread)
30293009
r10_bio = list_first_entry(&tmp, struct r10bio,
30303010
retry_list);
30313011
list_del(&r10_bio->retry_list);
3032-
if (mddev->degraded)
3033-
set_bit(R10BIO_Degraded, &r10_bio->state);
30343012

30353013
if (test_bit(R10BIO_WriteError,
30363014
&r10_bio->state))

drivers/md/raid10.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,6 @@ enum r10bio_state {
161161
R10BIO_IsSync,
162162
R10BIO_IsRecover,
163163
R10BIO_IsReshape,
164-
R10BIO_Degraded,
165164
/* Set ReadError on bios that experience a read error
166165
* so that raid10d knows what to do with them.
167166
*/

0 commit comments

Comments
 (0)