Skip to content

Commit 04f3eaf

Browse files
committed
nvme: implement mq_ops->commit_rqs() hook
Split the command submission and the SQ doorbell ring, and add the doorbell ring as our ->commit_rqs() hook. This allows a list of requests to be issued, with nvme only writing the SQ update when it's necessary. This is more efficient if we have lists of requests to issue, particularly on virtualized hardware, where writing the SQ doorbell is more expensive than on real hardware. For those cases, performance increases of 2-3x have been observed. The use case for this is plugged IO, where blk-mq flushes a batch of requests at the time. Reviewed-by: Christoph Hellwig <[email protected]> Signed-off-by: Jens Axboe <[email protected]>
1 parent d666ba9 commit 04f3eaf

File tree

1 file changed

+39
-8
lines changed

1 file changed

+39
-8
lines changed

drivers/nvme/host/pci.c

Lines changed: 39 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,7 @@ struct nvme_queue {
203203
u16 q_depth;
204204
s16 cq_vector;
205205
u16 sq_tail;
206+
u16 last_sq_tail;
206207
u16 cq_head;
207208
u16 last_cq_head;
208209
u16 qid;
@@ -522,22 +523,50 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set)
522523
return 0;
523524
}
524525

526+
/*
527+
* Write sq tail if we are asked to, or if the next command would wrap.
528+
*/
529+
static inline void nvme_write_sq_db(struct nvme_queue *nvmeq, bool write_sq)
530+
{
531+
if (!write_sq) {
532+
u16 next_tail = nvmeq->sq_tail + 1;
533+
534+
if (next_tail == nvmeq->q_depth)
535+
next_tail = 0;
536+
if (next_tail != nvmeq->last_sq_tail)
537+
return;
538+
}
539+
540+
if (nvme_dbbuf_update_and_check_event(nvmeq->sq_tail,
541+
nvmeq->dbbuf_sq_db, nvmeq->dbbuf_sq_ei))
542+
writel(nvmeq->sq_tail, nvmeq->q_db);
543+
nvmeq->last_sq_tail = nvmeq->sq_tail;
544+
}
545+
525546
/**
526547
* nvme_submit_cmd() - Copy a command into a queue and ring the doorbell
527548
* @nvmeq: The queue to use
528549
* @cmd: The command to send
550+
* @write_sq: whether to write to the SQ doorbell
529551
*/
530-
static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd)
552+
static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd,
553+
bool write_sq)
531554
{
532555
spin_lock(&nvmeq->sq_lock);
533-
534556
memcpy(&nvmeq->sq_cmds[nvmeq->sq_tail], cmd, sizeof(*cmd));
535-
536557
if (++nvmeq->sq_tail == nvmeq->q_depth)
537558
nvmeq->sq_tail = 0;
538-
if (nvme_dbbuf_update_and_check_event(nvmeq->sq_tail,
539-
nvmeq->dbbuf_sq_db, nvmeq->dbbuf_sq_ei))
540-
writel(nvmeq->sq_tail, nvmeq->q_db);
559+
nvme_write_sq_db(nvmeq, write_sq);
560+
spin_unlock(&nvmeq->sq_lock);
561+
}
562+
563+
static void nvme_commit_rqs(struct blk_mq_hw_ctx *hctx)
564+
{
565+
struct nvme_queue *nvmeq = hctx->driver_data;
566+
567+
spin_lock(&nvmeq->sq_lock);
568+
if (nvmeq->sq_tail != nvmeq->last_sq_tail)
569+
nvme_write_sq_db(nvmeq, true);
541570
spin_unlock(&nvmeq->sq_lock);
542571
}
543572

@@ -923,7 +952,7 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
923952
}
924953

925954
blk_mq_start_request(req);
926-
nvme_submit_cmd(nvmeq, &cmnd);
955+
nvme_submit_cmd(nvmeq, &cmnd, bd->last);
927956
return BLK_STS_OK;
928957
out_cleanup_iod:
929958
nvme_free_iod(dev, req);
@@ -1108,7 +1137,7 @@ static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl)
11081137
memset(&c, 0, sizeof(c));
11091138
c.common.opcode = nvme_admin_async_event;
11101139
c.common.command_id = NVME_AQ_BLK_MQ_DEPTH;
1111-
nvme_submit_cmd(nvmeq, &c);
1140+
nvme_submit_cmd(nvmeq, &c, true);
11121141
}
11131142

11141143
static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
@@ -1531,6 +1560,7 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
15311560

15321561
spin_lock_irq(&nvmeq->cq_lock);
15331562
nvmeq->sq_tail = 0;
1563+
nvmeq->last_sq_tail = 0;
15341564
nvmeq->cq_head = 0;
15351565
nvmeq->cq_phase = 1;
15361566
nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
@@ -1603,6 +1633,7 @@ static const struct blk_mq_ops nvme_mq_admin_ops = {
16031633

16041634
#define NVME_SHARED_MQ_OPS \
16051635
.queue_rq = nvme_queue_rq, \
1636+
.commit_rqs = nvme_commit_rqs, \
16061637
.rq_flags_to_type = nvme_rq_flags_to_type, \
16071638
.complete = nvme_pci_complete_rq, \
16081639
.init_hctx = nvme_init_hctx, \

0 commit comments

Comments
 (0)