Skip to content

Commit 1731a47

Browse files
committed
Merge tag 'for-4.13/dm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper fixes from Mike Snitzer: - a few DM integrity fixes that improve performance. One that address inefficiencies in the on-disk journal device layout. Another that makes use of the block layer's on-stack plugging when writing the journal. - a dm-bufio fix for the blk_status_t conversion that went in during the merge window. - a few DM raid fixes that address correctness when suspending the device and a validation fix for validation that occurs during device activation. - a couple DM zoned target fixes. Important one being the fix to not use GFP_KERNEL in the IO path due to concerns about deadlock in low-memory conditions (e.g. swap over a DM zoned device, etc). - a DM DAX device fix to make sure dm_dax_flush() is called if the underlying DAX device is operating as a write cache. * tag 'for-4.13/dm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: dm, dax: Make sure dm_dax_flush() is called if device supports it dm verity fec: fix GFP flags used with mempool_alloc() dm zoned: use GFP_NOIO in I/O path dm zoned: remove test for impossible REQ_OP_FLUSH conditions dm raid: bump target version dm raid: avoid mddev->suspended access dm raid: fix activation check in validate_raid_redundancy() dm raid: remove WARN_ON() in raid10_md_layout_to_format() dm bufio: fix error code in dm_bufio_write_dirty_buffers() dm integrity: test for corrupted disk format during table load dm integrity: WARN_ON if variables representing journal usage get out of sync dm integrity: use plugging when writing the journal dm integrity: fix inefficient allocation of journal space
2 parents 0fa8dc4 + 273752c commit 1731a47

File tree

11 files changed

+94
-46
lines changed

11 files changed

+94
-46
lines changed

Documentation/device-mapper/dm-raid.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -343,3 +343,4 @@ Version History
343343
1.11.0 Fix table line argument order
344344
(wrong raid10_copies/raid10_format sequence)
345345
1.11.1 Add raid4/5/6 journal write-back support via journal_mode option
346+
1.12.1 fix for MD deadlock between mddev_suspend() and md_write_start() available

drivers/dax/super.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,12 @@ void dax_write_cache(struct dax_device *dax_dev, bool wc)
278278
}
279279
EXPORT_SYMBOL_GPL(dax_write_cache);
280280

281+
bool dax_write_cache_enabled(struct dax_device *dax_dev)
282+
{
283+
return test_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags);
284+
}
285+
EXPORT_SYMBOL_GPL(dax_write_cache_enabled);
286+
281287
bool dax_alive(struct dax_device *dax_dev)
282288
{
283289
lockdep_assert_held(&dax_srcu);

drivers/md/dm-bufio.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1258,8 +1258,7 @@ EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers_async);
12581258
*/
12591259
int dm_bufio_write_dirty_buffers(struct dm_bufio_client *c)
12601260
{
1261-
blk_status_t a;
1262-
int f;
1261+
int a, f;
12631262
unsigned long buffers_processed = 0;
12641263
struct dm_buffer *b, *tmp;
12651264

drivers/md/dm-integrity.c

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1587,16 +1587,18 @@ static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map
15871587
if (likely(ic->mode == 'J')) {
15881588
if (dio->write) {
15891589
unsigned next_entry, i, pos;
1590-
unsigned ws, we;
1590+
unsigned ws, we, range_sectors;
15911591

1592-
dio->range.n_sectors = min(dio->range.n_sectors, ic->free_sectors);
1592+
dio->range.n_sectors = min(dio->range.n_sectors,
1593+
ic->free_sectors << ic->sb->log2_sectors_per_block);
15931594
if (unlikely(!dio->range.n_sectors))
15941595
goto sleep;
1595-
ic->free_sectors -= dio->range.n_sectors;
1596+
range_sectors = dio->range.n_sectors >> ic->sb->log2_sectors_per_block;
1597+
ic->free_sectors -= range_sectors;
15961598
journal_section = ic->free_section;
15971599
journal_entry = ic->free_section_entry;
15981600

1599-
next_entry = ic->free_section_entry + dio->range.n_sectors;
1601+
next_entry = ic->free_section_entry + range_sectors;
16001602
ic->free_section_entry = next_entry % ic->journal_section_entries;
16011603
ic->free_section += next_entry / ic->journal_section_entries;
16021604
ic->n_uncommitted_sections += next_entry / ic->journal_section_entries;
@@ -1727,6 +1729,8 @@ static void pad_uncommitted(struct dm_integrity_c *ic)
17271729
wraparound_section(ic, &ic->free_section);
17281730
ic->n_uncommitted_sections++;
17291731
}
1732+
WARN_ON(ic->journal_sections * ic->journal_section_entries !=
1733+
(ic->n_uncommitted_sections + ic->n_committed_sections) * ic->journal_section_entries + ic->free_sectors);
17301734
}
17311735

17321736
static void integrity_commit(struct work_struct *w)
@@ -1821,6 +1825,9 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start,
18211825
{
18221826
unsigned i, j, n;
18231827
struct journal_completion comp;
1828+
struct blk_plug plug;
1829+
1830+
blk_start_plug(&plug);
18241831

18251832
comp.ic = ic;
18261833
comp.in_flight = (atomic_t)ATOMIC_INIT(1);
@@ -1945,6 +1952,8 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start,
19451952

19461953
dm_bufio_write_dirty_buffers_async(ic->bufio);
19471954

1955+
blk_finish_plug(&plug);
1956+
19481957
complete_journal_op(&comp);
19491958
wait_for_completion_io(&comp.comp);
19501959

@@ -3019,6 +3028,11 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
30193028
ti->error = "Block size doesn't match the information in superblock";
30203029
goto bad;
30213030
}
3031+
if (!le32_to_cpu(ic->sb->journal_sections)) {
3032+
r = -EINVAL;
3033+
ti->error = "Corrupted superblock, journal_sections is 0";
3034+
goto bad;
3035+
}
30223036
/* make sure that ti->max_io_len doesn't overflow */
30233037
if (ic->sb->log2_interleave_sectors < MIN_LOG2_INTERLEAVE_SECTORS ||
30243038
ic->sb->log2_interleave_sectors > MAX_LOG2_INTERLEAVE_SECTORS) {

drivers/md/dm-raid.c

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,7 @@ struct raid_dev {
208208
#define RT_FLAG_RS_BITMAP_LOADED 2
209209
#define RT_FLAG_UPDATE_SBS 3
210210
#define RT_FLAG_RESHAPE_RS 4
211+
#define RT_FLAG_RS_SUSPENDED 5
211212

212213
/* Array elements of 64 bit needed for rebuild/failed disk bits */
213214
#define DISKS_ARRAY_ELEMS ((MAX_RAID_DEVICES + (sizeof(uint64_t) * 8 - 1)) / sizeof(uint64_t) / 8)
@@ -564,9 +565,10 @@ static const char *raid10_md_layout_to_format(int layout)
564565
if (__raid10_near_copies(layout) > 1)
565566
return "near";
566567

567-
WARN_ON(__raid10_far_copies(layout) < 2);
568+
if (__raid10_far_copies(layout) > 1)
569+
return "far";
568570

569-
return "far";
571+
return "unknown";
570572
}
571573

572574
/* Return md raid10 algorithm for @name */
@@ -2540,11 +2542,6 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs)
25402542
if (!freshest)
25412543
return 0;
25422544

2543-
if (validate_raid_redundancy(rs)) {
2544-
rs->ti->error = "Insufficient redundancy to activate array";
2545-
return -EINVAL;
2546-
}
2547-
25482545
/*
25492546
* Validation of the freshest device provides the source of
25502547
* validation for the remaining devices.
@@ -2553,6 +2550,11 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs)
25532550
if (super_validate(rs, freshest))
25542551
return -EINVAL;
25552552

2553+
if (validate_raid_redundancy(rs)) {
2554+
rs->ti->error = "Insufficient redundancy to activate array";
2555+
return -EINVAL;
2556+
}
2557+
25562558
rdev_for_each(rdev, mddev)
25572559
if (!test_bit(Journal, &rdev->flags) &&
25582560
rdev != freshest &&
@@ -3168,6 +3170,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
31683170
}
31693171

31703172
mddev_suspend(&rs->md);
3173+
set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags);
31713174

31723175
/* Try to adjust the raid4/5/6 stripe cache size to the stripe size */
31733176
if (rs_is_raid456(rs)) {
@@ -3625,7 +3628,7 @@ static void raid_postsuspend(struct dm_target *ti)
36253628
{
36263629
struct raid_set *rs = ti->private;
36273630

3628-
if (!rs->md.suspended)
3631+
if (!test_and_set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags))
36293632
mddev_suspend(&rs->md);
36303633

36313634
rs->md.ro = 1;
@@ -3759,7 +3762,7 @@ static int rs_start_reshape(struct raid_set *rs)
37593762
return r;
37603763

37613764
/* Need to be resumed to be able to start reshape, recovery is frozen until raid_resume() though */
3762-
if (mddev->suspended)
3765+
if (test_and_clear_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags))
37633766
mddev_resume(mddev);
37643767

37653768
/*
@@ -3786,8 +3789,8 @@ static int rs_start_reshape(struct raid_set *rs)
37863789
}
37873790

37883791
/* Suspend because a resume will happen in raid_resume() */
3789-
if (!mddev->suspended)
3790-
mddev_suspend(mddev);
3792+
set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags);
3793+
mddev_suspend(mddev);
37913794

37923795
/*
37933796
* Now reshape got set up, update superblocks to
@@ -3883,13 +3886,13 @@ static void raid_resume(struct dm_target *ti)
38833886
if (!(rs->ctr_flags & RESUME_STAY_FROZEN_FLAGS))
38843887
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
38853888

3886-
if (mddev->suspended)
3889+
if (test_and_clear_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags))
38873890
mddev_resume(mddev);
38883891
}
38893892

38903893
static struct target_type raid_target = {
38913894
.name = "raid",
3892-
.version = {1, 11, 1},
3895+
.version = {1, 12, 1},
38933896
.module = THIS_MODULE,
38943897
.ctr = raid_ctr,
38953898
.dtr = raid_dtr,

drivers/md/dm-table.c

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include <linux/atomic.h>
2121
#include <linux/blk-mq.h>
2222
#include <linux/mount.h>
23+
#include <linux/dax.h>
2324

2425
#define DM_MSG_PREFIX "table"
2526

@@ -1630,6 +1631,37 @@ static bool dm_table_supports_flush(struct dm_table *t, unsigned long flush)
16301631
return false;
16311632
}
16321633

1634+
static int device_dax_write_cache_enabled(struct dm_target *ti,
1635+
struct dm_dev *dev, sector_t start,
1636+
sector_t len, void *data)
1637+
{
1638+
struct dax_device *dax_dev = dev->dax_dev;
1639+
1640+
if (!dax_dev)
1641+
return false;
1642+
1643+
if (dax_write_cache_enabled(dax_dev))
1644+
return true;
1645+
return false;
1646+
}
1647+
1648+
static int dm_table_supports_dax_write_cache(struct dm_table *t)
1649+
{
1650+
struct dm_target *ti;
1651+
unsigned i;
1652+
1653+
for (i = 0; i < dm_table_get_num_targets(t); i++) {
1654+
ti = dm_table_get_target(t, i);
1655+
1656+
if (ti->type->iterate_devices &&
1657+
ti->type->iterate_devices(ti,
1658+
device_dax_write_cache_enabled, NULL))
1659+
return true;
1660+
}
1661+
1662+
return false;
1663+
}
1664+
16331665
static int device_is_nonrot(struct dm_target *ti, struct dm_dev *dev,
16341666
sector_t start, sector_t len, void *data)
16351667
{
@@ -1785,6 +1817,9 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
17851817
}
17861818
blk_queue_write_cache(q, wc, fua);
17871819

1820+
if (dm_table_supports_dax_write_cache(t))
1821+
dax_write_cache(t->md->dax_dev, true);
1822+
17881823
/* Ensure that all underlying devices are non-rotational. */
17891824
if (dm_table_all_devices_attribute(t, device_is_nonrot))
17901825
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);

drivers/md/dm-verity-fec.c

Lines changed: 5 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -308,19 +308,14 @@ static int fec_alloc_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio)
308308
{
309309
unsigned n;
310310

311-
if (!fio->rs) {
312-
fio->rs = mempool_alloc(v->fec->rs_pool, 0);
313-
if (unlikely(!fio->rs)) {
314-
DMERR("failed to allocate RS");
315-
return -ENOMEM;
316-
}
317-
}
311+
if (!fio->rs)
312+
fio->rs = mempool_alloc(v->fec->rs_pool, GFP_NOIO);
318313

319314
fec_for_each_prealloc_buffer(n) {
320315
if (fio->bufs[n])
321316
continue;
322317

323-
fio->bufs[n] = mempool_alloc(v->fec->prealloc_pool, GFP_NOIO);
318+
fio->bufs[n] = mempool_alloc(v->fec->prealloc_pool, GFP_NOWAIT);
324319
if (unlikely(!fio->bufs[n])) {
325320
DMERR("failed to allocate FEC buffer");
326321
return -ENOMEM;
@@ -332,22 +327,16 @@ static int fec_alloc_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio)
332327
if (fio->bufs[n])
333328
continue;
334329

335-
fio->bufs[n] = mempool_alloc(v->fec->extra_pool, GFP_NOIO);
330+
fio->bufs[n] = mempool_alloc(v->fec->extra_pool, GFP_NOWAIT);
336331
/* we can manage with even one buffer if necessary */
337332
if (unlikely(!fio->bufs[n]))
338333
break;
339334
}
340335
fio->nbufs = n;
341336

342-
if (!fio->output) {
337+
if (!fio->output)
343338
fio->output = mempool_alloc(v->fec->output_pool, GFP_NOIO);
344339

345-
if (!fio->output) {
346-
DMERR("failed to allocate FEC page");
347-
return -ENOMEM;
348-
}
349-
}
350-
351340
return 0;
352341
}
353342

drivers/md/dm-zoned-metadata.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -624,7 +624,7 @@ static int dmz_write_sb(struct dmz_metadata *zmd, unsigned int set)
624624

625625
ret = dmz_rdwr_block(zmd, REQ_OP_WRITE, block, mblk->page);
626626
if (ret == 0)
627-
ret = blkdev_issue_flush(zmd->dev->bdev, GFP_KERNEL, NULL);
627+
ret = blkdev_issue_flush(zmd->dev->bdev, GFP_NOIO, NULL);
628628

629629
return ret;
630630
}
@@ -658,7 +658,7 @@ static int dmz_write_dirty_mblocks(struct dmz_metadata *zmd,
658658

659659
/* Flush drive cache (this will also sync data) */
660660
if (ret == 0)
661-
ret = blkdev_issue_flush(zmd->dev->bdev, GFP_KERNEL, NULL);
661+
ret = blkdev_issue_flush(zmd->dev->bdev, GFP_NOIO, NULL);
662662

663663
return ret;
664664
}
@@ -722,7 +722,7 @@ int dmz_flush_metadata(struct dmz_metadata *zmd)
722722

723723
/* If there are no dirty metadata blocks, just flush the device cache */
724724
if (list_empty(&write_list)) {
725-
ret = blkdev_issue_flush(zmd->dev->bdev, GFP_KERNEL, NULL);
725+
ret = blkdev_issue_flush(zmd->dev->bdev, GFP_NOIO, NULL);
726726
goto out;
727727
}
728728

@@ -927,7 +927,7 @@ static int dmz_recover_mblocks(struct dmz_metadata *zmd, unsigned int dst_set)
927927
(zmd->nr_meta_zones << zmd->dev->zone_nr_blocks_shift);
928928
}
929929

930-
page = alloc_page(GFP_KERNEL);
930+
page = alloc_page(GFP_NOIO);
931931
if (!page)
932932
return -ENOMEM;
933933

@@ -1183,7 +1183,7 @@ static int dmz_update_zone(struct dmz_metadata *zmd, struct dm_zone *zone)
11831183

11841184
/* Get zone information from disk */
11851185
ret = blkdev_report_zones(zmd->dev->bdev, dmz_start_sect(zmd, zone),
1186-
&blkz, &nr_blkz, GFP_KERNEL);
1186+
&blkz, &nr_blkz, GFP_NOIO);
11871187
if (ret) {
11881188
dmz_dev_err(zmd->dev, "Get zone %u report failed",
11891189
dmz_id(zmd, zone));
@@ -1257,7 +1257,7 @@ static int dmz_reset_zone(struct dmz_metadata *zmd, struct dm_zone *zone)
12571257

12581258
ret = blkdev_reset_zones(dev->bdev,
12591259
dmz_start_sect(zmd, zone),
1260-
dev->zone_nr_sectors, GFP_KERNEL);
1260+
dev->zone_nr_sectors, GFP_NOIO);
12611261
if (ret) {
12621262
dmz_dev_err(dev, "Reset zone %u failed %d",
12631263
dmz_id(zmd, zone), ret);

drivers/md/dm-zoned-reclaim.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ static int dmz_reclaim_align_wp(struct dmz_reclaim *zrc, struct dm_zone *zone,
7575
nr_blocks = block - wp_block;
7676
ret = blkdev_issue_zeroout(zrc->dev->bdev,
7777
dmz_start_sect(zmd, zone) + dmz_blk2sect(wp_block),
78-
dmz_blk2sect(nr_blocks), GFP_NOFS, false);
78+
dmz_blk2sect(nr_blocks), GFP_NOIO, 0);
7979
if (ret) {
8080
dmz_dev_err(zrc->dev,
8181
"Align zone %u wp %llu to %llu (wp+%u) blocks failed %d",

drivers/md/dm-zoned-target.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -541,7 +541,7 @@ static void dmz_queue_chunk_work(struct dmz_target *dmz, struct bio *bio)
541541
int ret;
542542

543543
/* Create a new chunk work */
544-
cw = kmalloc(sizeof(struct dm_chunk_work), GFP_NOFS);
544+
cw = kmalloc(sizeof(struct dm_chunk_work), GFP_NOIO);
545545
if (!cw)
546546
goto out;
547547

@@ -588,7 +588,7 @@ static int dmz_map(struct dm_target *ti, struct bio *bio)
588588

589589
bio->bi_bdev = dev->bdev;
590590

591-
if (!nr_sectors && (bio_op(bio) != REQ_OP_FLUSH) && (bio_op(bio) != REQ_OP_WRITE))
591+
if (!nr_sectors && bio_op(bio) != REQ_OP_WRITE)
592592
return DM_MAPIO_REMAPPED;
593593

594594
/* The BIO should be block aligned */
@@ -603,7 +603,7 @@ static int dmz_map(struct dm_target *ti, struct bio *bio)
603603
bioctx->status = BLK_STS_OK;
604604

605605
/* Set the BIO pending in the flush list */
606-
if (bio_op(bio) == REQ_OP_FLUSH || (!nr_sectors && bio_op(bio) == REQ_OP_WRITE)) {
606+
if (!nr_sectors && bio_op(bio) == REQ_OP_WRITE) {
607607
spin_lock(&dmz->flush_lock);
608608
bio_list_add(&dmz->flush_list, bio);
609609
spin_unlock(&dmz->flush_lock);
@@ -785,7 +785,7 @@ static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv)
785785

786786
/* Chunk BIO work */
787787
mutex_init(&dmz->chunk_lock);
788-
INIT_RADIX_TREE(&dmz->chunk_rxtree, GFP_NOFS);
788+
INIT_RADIX_TREE(&dmz->chunk_rxtree, GFP_KERNEL);
789789
dmz->chunk_wq = alloc_workqueue("dmz_cwq_%s", WQ_MEM_RECLAIM | WQ_UNBOUND,
790790
0, dev->name);
791791
if (!dmz->chunk_wq) {

include/linux/dax.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
8787
void dax_flush(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
8888
size_t size);
8989
void dax_write_cache(struct dax_device *dax_dev, bool wc);
90+
bool dax_write_cache_enabled(struct dax_device *dax_dev);
9091

9192
/*
9293
* We use lowest available bit in exceptional entry for locking, one bit for

0 commit comments

Comments
 (0)