Skip to content

Commit 5cb525c

Browse files
axboeKeith Busch
authored andcommitted
nvme-pci: handle completions outside of the queue lock
Split the completion of events into a two part process: 1) Reap the events inside the queue lock 2) Complete the events outside the queue lock Since we never wrap the queue, we can access it locklessly after we've updated the completion queue head. This patch started off with batching events on the stack, but with this trick we don't have to. Keith Busch <[email protected]> came up with that idea. Note that this kills the ->cqe_seen as well. I haven't been able to trigger any ill effects of this. If we do race with polling every so often, it should be rare enough NOT to trigger any issues. Signed-off-by: Jens Axboe <[email protected]> Signed-off-by: Keith Busch <[email protected]> [hch: refactored, restored poll early exit optimization] Signed-off-by: Christoph Hellwig <[email protected]>
1 parent d1f06f4 commit 5cb525c

File tree

1 file changed

+45
-42
lines changed

1 file changed

+45
-42
lines changed

drivers/nvme/host/pci.c

Lines changed: 45 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,6 @@ struct nvme_queue {
161161
u16 cq_head;
162162
u16 qid;
163163
u8 cq_phase;
164-
u8 cqe_seen;
165164
u32 *dbbuf_sq_db;
166165
u32 *dbbuf_cq_db;
167166
u32 *dbbuf_sq_ei;
@@ -932,9 +931,9 @@ static inline void nvme_ring_cq_doorbell(struct nvme_queue *nvmeq)
932931
}
933932
}
934933

935-
static inline void nvme_handle_cqe(struct nvme_queue *nvmeq,
936-
struct nvme_completion *cqe)
934+
static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx)
937935
{
936+
volatile struct nvme_completion *cqe = &nvmeq->cqes[idx];
938937
struct request *req;
939938

940939
if (unlikely(cqe->command_id >= nvmeq->q_depth)) {
@@ -957,50 +956,58 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq,
957956
return;
958957
}
959958

960-
nvmeq->cqe_seen = 1;
961959
req = blk_mq_tag_to_rq(*nvmeq->tags, cqe->command_id);
962960
nvme_end_request(req, cqe->status, cqe->result);
963961
}
964962

965-
static inline bool nvme_read_cqe(struct nvme_queue *nvmeq,
966-
struct nvme_completion *cqe)
963+
static void nvme_complete_cqes(struct nvme_queue *nvmeq, u16 start, u16 end)
967964
{
968-
if (nvme_cqe_pending(nvmeq)) {
969-
*cqe = nvmeq->cqes[nvmeq->cq_head];
965+
while (start != end) {
966+
nvme_handle_cqe(nvmeq, start);
967+
if (++start == nvmeq->q_depth)
968+
start = 0;
969+
}
970+
}
970971

971-
if (++nvmeq->cq_head == nvmeq->q_depth) {
972-
nvmeq->cq_head = 0;
973-
nvmeq->cq_phase = !nvmeq->cq_phase;
974-
}
975-
return true;
972+
static inline void nvme_update_cq_head(struct nvme_queue *nvmeq)
973+
{
974+
if (++nvmeq->cq_head == nvmeq->q_depth) {
975+
nvmeq->cq_head = 0;
976+
nvmeq->cq_phase = !nvmeq->cq_phase;
976977
}
977-
return false;
978978
}
979979

980-
static void nvme_process_cq(struct nvme_queue *nvmeq)
980+
static inline bool nvme_process_cq(struct nvme_queue *nvmeq, u16 *start,
981+
u16 *end, int tag)
981982
{
982-
struct nvme_completion cqe;
983-
int consumed = 0;
983+
bool found = false;
984984

985-
while (nvme_read_cqe(nvmeq, &cqe)) {
986-
nvme_handle_cqe(nvmeq, &cqe);
987-
consumed++;
985+
*start = nvmeq->cq_head;
986+
while (!found && nvme_cqe_pending(nvmeq)) {
987+
if (nvmeq->cqes[nvmeq->cq_head].command_id == tag)
988+
found = true;
989+
nvme_update_cq_head(nvmeq);
988990
}
991+
*end = nvmeq->cq_head;
989992

990-
if (consumed)
993+
if (*start != *end)
991994
nvme_ring_cq_doorbell(nvmeq);
995+
return found;
992996
}
993997

994998
static irqreturn_t nvme_irq(int irq, void *data)
995999
{
996-
irqreturn_t result;
9971000
struct nvme_queue *nvmeq = data;
1001+
u16 start, end;
1002+
9981003
spin_lock(&nvmeq->q_lock);
999-
nvme_process_cq(nvmeq);
1000-
result = nvmeq->cqe_seen ? IRQ_HANDLED : IRQ_NONE;
1001-
nvmeq->cqe_seen = 0;
1004+
nvme_process_cq(nvmeq, &start, &end, -1);
10021005
spin_unlock(&nvmeq->q_lock);
1003-
return result;
1006+
1007+
if (start == end)
1008+
return IRQ_NONE;
1009+
nvme_complete_cqes(nvmeq, start, end);
1010+
return IRQ_HANDLED;
10041011
}
10051012

10061013
static irqreturn_t nvme_irq_check(int irq, void *data)
@@ -1013,27 +1020,17 @@ static irqreturn_t nvme_irq_check(int irq, void *data)
10131020

10141021
static int __nvme_poll(struct nvme_queue *nvmeq, unsigned int tag)
10151022
{
1016-
struct nvme_completion cqe;
1017-
int found = 0, consumed = 0;
1023+
u16 start, end;
1024+
bool found;
10181025

10191026
if (!nvme_cqe_pending(nvmeq))
10201027
return 0;
10211028

10221029
spin_lock_irq(&nvmeq->q_lock);
1023-
while (nvme_read_cqe(nvmeq, &cqe)) {
1024-
nvme_handle_cqe(nvmeq, &cqe);
1025-
consumed++;
1026-
1027-
if (tag == cqe.command_id) {
1028-
found = 1;
1029-
break;
1030-
}
1031-
}
1032-
1033-
if (consumed)
1034-
nvme_ring_cq_doorbell(nvmeq);
1030+
found = nvme_process_cq(nvmeq, &start, &end, tag);
10351031
spin_unlock_irq(&nvmeq->q_lock);
10361032

1033+
nvme_complete_cqes(nvmeq, start, end);
10371034
return found;
10381035
}
10391036

@@ -1340,15 +1337,18 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq)
13401337
static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown)
13411338
{
13421339
struct nvme_queue *nvmeq = &dev->queues[0];
1340+
u16 start, end;
13431341

13441342
if (shutdown)
13451343
nvme_shutdown_ctrl(&dev->ctrl);
13461344
else
13471345
nvme_disable_ctrl(&dev->ctrl, dev->ctrl.cap);
13481346

13491347
spin_lock_irq(&nvmeq->q_lock);
1350-
nvme_process_cq(nvmeq);
1348+
nvme_process_cq(nvmeq, &start, &end, -1);
13511349
spin_unlock_irq(&nvmeq->q_lock);
1350+
1351+
nvme_complete_cqes(nvmeq, start, end);
13521352
}
13531353

13541354
static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues,
@@ -1995,6 +1995,7 @@ static void nvme_del_queue_end(struct request *req, blk_status_t error)
19951995
static void nvme_del_cq_end(struct request *req, blk_status_t error)
19961996
{
19971997
struct nvme_queue *nvmeq = req->end_io_data;
1998+
u16 start, end;
19981999

19992000
if (!error) {
20002001
unsigned long flags;
@@ -2006,8 +2007,10 @@ static void nvme_del_cq_end(struct request *req, blk_status_t error)
20062007
*/
20072008
spin_lock_irqsave_nested(&nvmeq->q_lock, flags,
20082009
SINGLE_DEPTH_NESTING);
2009-
nvme_process_cq(nvmeq);
2010+
nvme_process_cq(nvmeq, &start, &end, -1);
20102011
spin_unlock_irqrestore(&nvmeq->q_lock, flags);
2012+
2013+
nvme_complete_cqes(nvmeq, start, end);
20112014
}
20122015

20132016
nvme_del_queue_end(req, error);

0 commit comments

Comments
 (0)