Skip to content

Commit f70ced0

Browse files
Ming Leiaxboe
authored andcommitted
blk-mq: support per-distpatch_queue flush machinery
This patch supports to run one single flush machinery for each blk-mq dispatch queue, so that: - current init_request and exit_request callbacks can cover flush request too, then the buggy copying way of initializing flush request's pdu can be fixed - flushing performance gets improved in case of multi hw-queue In fio sync write test over virtio-blk(4 hw queues, ioengine=sync, iodepth=64, numjobs=4, bs=4K), it is observed that througput gets increased a lot over my test environment: - throughput: +70% in case of virtio-blk over null_blk - throughput: +30% in case of virtio-blk over SSD image The multi virtqueue feature isn't merged to QEMU yet, and patches for the feature can be found in below tree: git://kernel.ubuntu.com/ming/qemu.git v2.1.0-mq.4 And simply passing 'num_queues=4 vectors=5' should be enough to enable multi queue(quad queue) feature for QEMU virtio-blk. Suggested-by: Christoph Hellwig <[email protected]> Signed-off-by: Ming Lei <[email protected]> Signed-off-by: Jens Axboe <[email protected]>
1 parent e97c293 commit f70ced0

File tree

6 files changed

+60
-39
lines changed

6 files changed

+60
-39
lines changed

block/blk-core.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -704,7 +704,7 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
704704
if (!q)
705705
return NULL;
706706

707-
q->fq = blk_alloc_flush_queue(q);
707+
q->fq = blk_alloc_flush_queue(q, NUMA_NO_NODE, 0);
708708
if (!q->fq)
709709
return NULL;
710710

block/blk-flush.c

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -305,8 +305,15 @@ static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq)
305305
fq->flush_pending_idx ^= 1;
306306

307307
blk_rq_init(q, flush_rq);
308-
if (q->mq_ops)
309-
blk_mq_clone_flush_request(flush_rq, first_rq);
308+
309+
/*
310+
* Borrow tag from the first request since they can't
311+
* be in flight at the same time.
312+
*/
313+
if (q->mq_ops) {
314+
flush_rq->mq_ctx = first_rq->mq_ctx;
315+
flush_rq->tag = first_rq->tag;
316+
}
310317

311318
flush_rq->cmd_type = REQ_TYPE_FS;
312319
flush_rq->cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ;
@@ -480,22 +487,22 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
480487
}
481488
EXPORT_SYMBOL(blkdev_issue_flush);
482489

483-
struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q)
490+
struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q,
491+
int node, int cmd_size)
484492
{
485493
struct blk_flush_queue *fq;
486494
int rq_sz = sizeof(struct request);
487495

488-
fq = kzalloc(sizeof(*fq), GFP_KERNEL);
496+
fq = kzalloc_node(sizeof(*fq), GFP_KERNEL, node);
489497
if (!fq)
490498
goto fail;
491499

492500
if (q->mq_ops) {
493501
spin_lock_init(&fq->mq_flush_lock);
494-
rq_sz = round_up(rq_sz + q->tag_set->cmd_size,
495-
cache_line_size());
502+
rq_sz = round_up(rq_sz + cmd_size, cache_line_size());
496503
}
497504

498-
fq->flush_rq = kzalloc(rq_sz, GFP_KERNEL);
505+
fq->flush_rq = kzalloc_node(rq_sz, GFP_KERNEL, node);
499506
if (!fq->flush_rq)
500507
goto fail_rq;
501508

block/blk-mq.c

Lines changed: 24 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -281,26 +281,6 @@ void blk_mq_free_request(struct request *rq)
281281
__blk_mq_free_request(hctx, ctx, rq);
282282
}
283283

284-
/*
285-
* Clone all relevant state from a request that has been put on hold in
286-
* the flush state machine into the preallocated flush request that hangs
287-
* off the request queue.
288-
*
289-
* For a driver the flush request should be invisible, that's why we are
290-
* impersonating the original request here.
291-
*/
292-
void blk_mq_clone_flush_request(struct request *flush_rq,
293-
struct request *orig_rq)
294-
{
295-
struct blk_mq_hw_ctx *hctx =
296-
orig_rq->q->mq_ops->map_queue(orig_rq->q, orig_rq->mq_ctx->cpu);
297-
298-
flush_rq->mq_ctx = orig_rq->mq_ctx;
299-
flush_rq->tag = orig_rq->tag;
300-
memcpy(blk_mq_rq_to_pdu(flush_rq), blk_mq_rq_to_pdu(orig_rq),
301-
hctx->cmd_size);
302-
}
303-
304284
inline void __blk_mq_end_request(struct request *rq, int error)
305285
{
306286
blk_account_io_done(rq);
@@ -1516,12 +1496,20 @@ static void blk_mq_exit_hctx(struct request_queue *q,
15161496
struct blk_mq_tag_set *set,
15171497
struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
15181498
{
1499+
unsigned flush_start_tag = set->queue_depth;
1500+
15191501
blk_mq_tag_idle(hctx);
15201502

1503+
if (set->ops->exit_request)
1504+
set->ops->exit_request(set->driver_data,
1505+
hctx->fq->flush_rq, hctx_idx,
1506+
flush_start_tag + hctx_idx);
1507+
15211508
if (set->ops->exit_hctx)
15221509
set->ops->exit_hctx(hctx, hctx_idx);
15231510

15241511
blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier);
1512+
blk_free_flush_queue(hctx->fq);
15251513
kfree(hctx->ctxs);
15261514
blk_mq_free_bitmap(&hctx->ctx_map);
15271515
}
@@ -1556,6 +1544,7 @@ static int blk_mq_init_hctx(struct request_queue *q,
15561544
struct blk_mq_hw_ctx *hctx, unsigned hctx_idx)
15571545
{
15581546
int node;
1547+
unsigned flush_start_tag = set->queue_depth;
15591548

15601549
node = hctx->numa_node;
15611550
if (node == NUMA_NO_NODE)
@@ -1594,8 +1583,23 @@ static int blk_mq_init_hctx(struct request_queue *q,
15941583
set->ops->init_hctx(hctx, set->driver_data, hctx_idx))
15951584
goto free_bitmap;
15961585

1586+
hctx->fq = blk_alloc_flush_queue(q, hctx->numa_node, set->cmd_size);
1587+
if (!hctx->fq)
1588+
goto exit_hctx;
1589+
1590+
if (set->ops->init_request &&
1591+
set->ops->init_request(set->driver_data,
1592+
hctx->fq->flush_rq, hctx_idx,
1593+
flush_start_tag + hctx_idx, node))
1594+
goto free_fq;
1595+
15971596
return 0;
15981597

1598+
free_fq:
1599+
kfree(hctx->fq);
1600+
exit_hctx:
1601+
if (set->ops->exit_hctx)
1602+
set->ops->exit_hctx(hctx, hctx_idx);
15991603
free_bitmap:
16001604
blk_mq_free_bitmap(&hctx->ctx_map);
16011605
free_ctxs:
@@ -1862,16 +1866,10 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
18621866

18631867
blk_mq_add_queue_tag_set(set, q);
18641868

1865-
q->fq = blk_alloc_flush_queue(q);
1866-
if (!q->fq)
1867-
goto err_hw_queues;
1868-
18691869
blk_mq_map_swqueue(q);
18701870

18711871
return q;
18721872

1873-
err_hw_queues:
1874-
blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);
18751873
err_hw:
18761874
blk_cleanup_queue(q);
18771875
err_hctxs:

block/blk-sysfs.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -517,10 +517,10 @@ static void blk_release_queue(struct kobject *kobj)
517517
if (q->queue_tags)
518518
__blk_queue_free_tags(q);
519519

520-
blk_free_flush_queue(q->fq);
521-
522520
if (q->mq_ops)
523521
blk_mq_free_queue(q);
522+
else
523+
blk_free_flush_queue(q->fq);
524524

525525
blk_trace_shutdown(q);
526526

block/blk.h

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
#define BLK_INTERNAL_H
33

44
#include <linux/idr.h>
5+
#include <linux/blk-mq.h>
6+
#include "blk-mq.h"
57

68
/* Amount of time in which a process may batch requests */
79
#define BLK_BATCH_TIME (HZ/50UL)
@@ -31,16 +33,24 @@ extern struct ida blk_queue_ida;
3133
static inline struct blk_flush_queue *blk_get_flush_queue(
3234
struct request_queue *q, struct blk_mq_ctx *ctx)
3335
{
34-
return q->fq;
36+
struct blk_mq_hw_ctx *hctx;
37+
38+
if (!q->mq_ops)
39+
return q->fq;
40+
41+
hctx = q->mq_ops->map_queue(q, ctx->cpu);
42+
43+
return hctx->fq;
3544
}
3645

3746
static inline void __blk_get_queue(struct request_queue *q)
3847
{
3948
kobject_get(&q->kobj);
4049
}
4150

42-
struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q);
43-
void blk_free_flush_queue(struct blk_flush_queue *fq);
51+
struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q,
52+
int node, int cmd_size);
53+
void blk_free_flush_queue(struct blk_flush_queue *q);
4454

4555
int blk_init_rl(struct request_list *rl, struct request_queue *q,
4656
gfp_t gfp_mask);

include/linux/blk-mq.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include <linux/blkdev.h>
55

66
struct blk_mq_tags;
7+
struct blk_flush_queue;
78

89
struct blk_mq_cpu_notifier {
910
struct list_head list;
@@ -34,6 +35,7 @@ struct blk_mq_hw_ctx {
3435

3536
struct request_queue *queue;
3637
unsigned int queue_num;
38+
struct blk_flush_queue *fq;
3739

3840
void *driver_data;
3941

@@ -119,6 +121,10 @@ struct blk_mq_ops {
119121
/*
120122
* Called for every command allocated by the block layer to allow
121123
* the driver to set up driver specific data.
124+
*
125+
* Tag greater than or equal to queue_depth is for setting up
126+
* flush request.
127+
*
122128
* Ditto for exit/teardown.
123129
*/
124130
init_request_fn *init_request;

0 commit comments

Comments
 (0)