Skip to content

Commit 0223334

Browse files
committed
dm: optimize dm_mq_queue_rq to _not_ use kthread if using pure blk-mq
dm_mq_queue_rq() is in atomic context so care must be taken to not sleep -- as such GFP_ATOMIC is used for the md->bs bioset allocations and dm-mpath's call to blk_get_request(). In the future the bioset allocations will hopefully go away (by removing support for partial completions of bios in a cloned request). Also prepare for supporting DM blk-mq ontop of old-style request_fn device(s) if a new dm-mod 'use_blk_mq' parameter is set. The kthread will still be used to queue work if blk-mq is used ontop of old-style request_fn device(s). Signed-off-by: Mike Snitzer <[email protected]>
1 parent bfebd1c commit 0223334

File tree

2 files changed

+50
-16
lines changed

2 files changed

+50
-16
lines changed

drivers/md/dm-mpath.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -428,7 +428,7 @@ static int __multipath_map(struct dm_target *ti, struct request *clone,
428428
} else {
429429
/* blk-mq request-based interface */
430430
*__clone = blk_get_request(bdev_get_queue(bdev),
431-
rq_data_dir(rq), GFP_KERNEL);
431+
rq_data_dir(rq), GFP_ATOMIC);
432432
if (IS_ERR(*__clone))
433433
/* ENOMEM, requeue */
434434
return r;

drivers/md/dm.c

Lines changed: 49 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1077,9 +1077,10 @@ static void free_rq_clone(struct request *clone)
10771077

10781078
blk_rq_unprep_clone(clone);
10791079

1080-
if (clone->q && clone->q->mq_ops)
1080+
if (clone->q->mq_ops)
10811081
tio->ti->type->release_clone_rq(clone);
1082-
else
1082+
else if (!md->queue->mq_ops)
1083+
/* request_fn queue stacked on request_fn queue(s) */
10831084
free_clone_request(md, clone);
10841085

10851086
if (!md->queue->mq_ops)
@@ -1838,15 +1839,25 @@ static int setup_clone(struct request *clone, struct request *rq,
18381839
static struct request *clone_rq(struct request *rq, struct mapped_device *md,
18391840
struct dm_rq_target_io *tio, gfp_t gfp_mask)
18401841
{
1841-
struct request *clone = alloc_clone_request(md, gfp_mask);
1842+
/*
1843+
* Do not allocate a clone if tio->clone was already set
1844+
* (see: dm_mq_queue_rq).
1845+
*/
1846+
bool alloc_clone = !tio->clone;
1847+
struct request *clone;
18421848

1843-
if (!clone)
1844-
return NULL;
1849+
if (alloc_clone) {
1850+
clone = alloc_clone_request(md, gfp_mask);
1851+
if (!clone)
1852+
return NULL;
1853+
} else
1854+
clone = tio->clone;
18451855

18461856
blk_rq_init(NULL, clone);
18471857
if (setup_clone(clone, rq, tio, gfp_mask)) {
18481858
/* -ENOMEM */
1849-
free_clone_request(md, clone);
1859+
if (alloc_clone)
1860+
free_clone_request(md, clone);
18501861
return NULL;
18511862
}
18521863

@@ -1864,7 +1875,8 @@ static void init_tio(struct dm_rq_target_io *tio, struct request *rq,
18641875
tio->orig = rq;
18651876
tio->error = 0;
18661877
memset(&tio->info, 0, sizeof(tio->info));
1867-
init_kthread_work(&tio->work, map_tio_request);
1878+
if (md->kworker_task)
1879+
init_kthread_work(&tio->work, map_tio_request);
18681880
}
18691881

18701882
static struct dm_rq_target_io *prep_tio(struct request *rq,
@@ -1941,7 +1953,7 @@ static int map_request(struct dm_rq_target_io *tio, struct request *rq,
19411953
}
19421954
if (IS_ERR(clone))
19431955
return DM_MAPIO_REQUEUE;
1944-
if (setup_clone(clone, rq, tio, GFP_NOIO)) {
1956+
if (setup_clone(clone, rq, tio, GFP_ATOMIC)) {
19451957
/* -ENOMEM */
19461958
ti->type->release_clone_rq(clone);
19471959
return DM_MAPIO_REQUEUE;
@@ -2408,7 +2420,7 @@ static void __bind_mempools(struct mapped_device *md, struct dm_table *t)
24082420
p->bs = NULL;
24092421

24102422
out:
2411-
/* mempool bind completed, now no need any mempools in the table */
2423+
/* mempool bind completed, no longer need any mempools in the table */
24122424
dm_table_free_md_mempools(t);
24132425
}
24142426

@@ -2713,9 +2725,24 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
27132725
/* Init tio using md established in .init_request */
27142726
init_tio(tio, rq, md);
27152727

2716-
/* Establish tio->ti before queuing work (map_tio_request) */
2728+
/*
2729+
* Establish tio->ti before queuing work (map_tio_request)
2730+
* or making direct call to map_request().
2731+
*/
27172732
tio->ti = ti;
2718-
queue_kthread_work(&md->kworker, &tio->work);
2733+
2734+
/* Clone the request if underlying devices aren't blk-mq */
2735+
if (dm_table_get_type(map) == DM_TYPE_REQUEST_BASED) {
2736+
/* clone request is allocated at the end of the pdu */
2737+
tio->clone = (void *)blk_mq_rq_to_pdu(rq) + sizeof(struct dm_rq_target_io);
2738+
if (!clone_rq(rq, md, tio, GFP_ATOMIC))
2739+
return BLK_MQ_RQ_QUEUE_BUSY;
2740+
queue_kthread_work(&md->kworker, &tio->work);
2741+
} else {
2742+
/* Direct call is fine since .queue_rq allows allocations */
2743+
if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE)
2744+
dm_requeue_unmapped_original_request(md, rq);
2745+
}
27192746

27202747
return BLK_MQ_RQ_QUEUE_OK;
27212748
}
@@ -2729,6 +2756,7 @@ static struct blk_mq_ops dm_mq_ops = {
27292756

27302757
static int dm_init_request_based_blk_mq_queue(struct mapped_device *md)
27312758
{
2759+
unsigned md_type = dm_get_md_type(md);
27322760
struct request_queue *q;
27332761
int err;
27342762

@@ -2738,7 +2766,11 @@ static int dm_init_request_based_blk_mq_queue(struct mapped_device *md)
27382766
md->tag_set.numa_node = NUMA_NO_NODE;
27392767
md->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
27402768
md->tag_set.nr_hw_queues = 1;
2741-
md->tag_set.cmd_size = sizeof(struct dm_rq_target_io);
2769+
if (md_type == DM_TYPE_REQUEST_BASED) {
2770+
/* make the memory for non-blk-mq clone part of the pdu */
2771+
md->tag_set.cmd_size = sizeof(struct dm_rq_target_io) + sizeof(struct request);
2772+
} else
2773+
md->tag_set.cmd_size = sizeof(struct dm_rq_target_io);
27422774
md->tag_set.driver_data = md;
27432775

27442776
err = blk_mq_alloc_tag_set(&md->tag_set);
@@ -2756,7 +2788,8 @@ static int dm_init_request_based_blk_mq_queue(struct mapped_device *md)
27562788
/* backfill 'mq' sysfs registration normally done in blk_register_queue */
27572789
blk_mq_register_disk(md->disk);
27582790

2759-
init_rq_based_worker_thread(md);
2791+
if (md_type == DM_TYPE_REQUEST_BASED)
2792+
init_rq_based_worker_thread(md);
27602793

27612794
return 0;
27622795

@@ -2876,7 +2909,7 @@ static void __dm_destroy(struct mapped_device *md, bool wait)
28762909
set_bit(DMF_FREEING, &md->flags);
28772910
spin_unlock(&_minor_lock);
28782911

2879-
if (dm_request_based(md))
2912+
if (dm_request_based(md) && md->kworker_task)
28802913
flush_kthread_worker(&md->kworker);
28812914

28822915
/*
@@ -3130,7 +3163,8 @@ static int __dm_suspend(struct mapped_device *md, struct dm_table *map,
31303163
*/
31313164
if (dm_request_based(md)) {
31323165
stop_queue(md->queue);
3133-
flush_kthread_worker(&md->kworker);
3166+
if (md->kworker_task)
3167+
flush_kthread_worker(&md->kworker);
31343168
}
31353169

31363170
flush_workqueue(md->wq);

0 commit comments

Comments
 (0)