Skip to content

Commit ed23a72

Browse files
author
Roland Dreier
committed
IB: Return "maybe missed event" hint from ib_req_notify_cq()
The semantics defined by the InfiniBand specification say that completion events are only generated when a completions is added to a completion queue (CQ) after completion notification is requested. In other words, this means that the following race is possible: while (CQ is not empty) ib_poll_cq(CQ); // new completion is added after while loop is exited ib_req_notify_cq(CQ); // no event is generated for the existing completion To close this race, the IB spec recommends doing another poll of the CQ after requesting notification. However, it is not always possible to arrange code this way (for example, we have found that NAPI for IPoIB cannot poll after requesting notification). Also, some hardware (eg Mellanox HCAs) actually will generate an event for completions added before the call to ib_req_notify_cq() -- which is allowed by the spec, since there's no way for any upper-layer consumer to know exactly when a completion was really added -- so the extra poll of the CQ is just a waste. Motivated by this, we add a new flag "IB_CQ_REPORT_MISSED_EVENTS" for ib_req_notify_cq() so that it can return a hint about whether the a completion may have been added before the request for notification. The return value of ib_req_notify_cq() is extended so: < 0 means an error occurred while requesting notification == 0 means notification was requested successfully, and if IB_CQ_REPORT_MISSED_EVENTS was passed in, then no events were missed and it is safe to wait for another event. > 0 is only returned if IB_CQ_REPORT_MISSED_EVENTS was passed in. It means that the consumer must poll the CQ again to make sure it is empty to avoid the race described above. We add a flag to enable this behavior rather than turning it on unconditionally, because checking for missed events may incur significant overhead for some low-level drivers, and consumers that don't care about the results of this test shouldn't be forced to pay for the test. Signed-off-by: Roland Dreier <[email protected]>
1 parent f4fd0b2 commit ed23a72

File tree

12 files changed

+93
-33
lines changed

12 files changed

+93
-33
lines changed

drivers/infiniband/hw/amso1100/c2.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -519,7 +519,7 @@ extern void c2_free_cq(struct c2_dev *c2dev, struct c2_cq *cq);
519519
extern void c2_cq_event(struct c2_dev *c2dev, u32 mq_index);
520520
extern void c2_cq_clean(struct c2_dev *c2dev, struct c2_qp *qp, u32 mq_index);
521521
extern int c2_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
522-
extern int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify);
522+
extern int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
523523

524524
/* CM */
525525
extern int c2_llp_connect(struct iw_cm_id *cm_id,

drivers/infiniband/hw/amso1100/c2_cq.c

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -217,17 +217,19 @@ int c2_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
217217
return npolled;
218218
}
219219

220-
int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
220+
int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
221221
{
222222
struct c2_mq_shared __iomem *shared;
223223
struct c2_cq *cq;
224+
unsigned long flags;
225+
int ret = 0;
224226

225227
cq = to_c2cq(ibcq);
226228
shared = cq->mq.peer;
227229

228-
if (notify == IB_CQ_NEXT_COMP)
230+
if ((notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_NEXT_COMP)
229231
writeb(C2_CQ_NOTIFICATION_TYPE_NEXT, &shared->notification_type);
230-
else if (notify == IB_CQ_SOLICITED)
232+
else if ((notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED)
231233
writeb(C2_CQ_NOTIFICATION_TYPE_NEXT_SE, &shared->notification_type);
232234
else
233235
return -EINVAL;
@@ -241,7 +243,13 @@ int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
241243
*/
242244
readb(&shared->armed);
243245

244-
return 0;
246+
if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) {
247+
spin_lock_irqsave(&cq->lock, flags);
248+
ret = !c2_mq_empty(&cq->mq);
249+
spin_unlock_irqrestore(&cq->lock, flags);
250+
}
251+
252+
return ret;
245253
}
246254

247255
static void c2_free_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq)

drivers/infiniband/hw/cxgb3/cxio_hal.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,10 @@ int cxio_hal_cq_op(struct cxio_rdev *rdev_p, struct t3_cq *cq,
114114
return -EIO;
115115
}
116116
}
117+
118+
return 1;
117119
}
120+
118121
return 0;
119122
}
120123

drivers/infiniband/hw/cxgb3/iwch_provider.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,7 @@ static int iwch_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata)
292292
#endif
293293
}
294294

295-
static int iwch_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
295+
static int iwch_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
296296
{
297297
struct iwch_dev *rhp;
298298
struct iwch_cq *chp;
@@ -303,7 +303,7 @@ static int iwch_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
303303

304304
chp = to_iwch_cq(ibcq);
305305
rhp = chp->rhp;
306-
if (notify == IB_CQ_SOLICITED)
306+
if ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED)
307307
cq_op = CQ_ARM_SE;
308308
else
309309
cq_op = CQ_ARM_AN;
@@ -317,9 +317,11 @@ static int iwch_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
317317
PDBG("%s rptr 0x%x\n", __FUNCTION__, chp->cq.rptr);
318318
err = cxio_hal_cq_op(&rhp->rdev, &chp->cq, cq_op, 0);
319319
spin_unlock_irqrestore(&chp->lock, flag);
320-
if (err)
320+
if (err < 0)
321321
printk(KERN_ERR MOD "Error %d rearming CQID 0x%x\n", err,
322322
chp->cq.cqid);
323+
if (err > 0 && !(flags & IB_CQ_REPORT_MISSED_EVENTS))
324+
err = 0;
323325
return err;
324326
}
325327

drivers/infiniband/hw/ehca/ehca_iverbs.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc);
135135

136136
int ehca_peek_cq(struct ib_cq *cq, int wc_cnt);
137137

138-
int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify cq_notify);
138+
int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags);
139139

140140
struct ib_qp *ehca_create_qp(struct ib_pd *pd,
141141
struct ib_qp_init_attr *init_attr,

drivers/infiniband/hw/ehca/ehca_reqs.c

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -634,11 +634,13 @@ int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc)
634634
return ret;
635635
}
636636

637-
int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify cq_notify)
637+
int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags)
638638
{
639639
struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
640+
unsigned long spl_flags;
641+
int ret = 0;
640642

641-
switch (cq_notify) {
643+
switch (notify_flags & IB_CQ_SOLICITED_MASK) {
642644
case IB_CQ_SOLICITED:
643645
hipz_set_cqx_n0(my_cq, 1);
644646
break;
@@ -649,5 +651,11 @@ int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify cq_notify)
649651
return -EINVAL;
650652
}
651653

652-
return 0;
654+
if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) {
655+
spin_lock_irqsave(&my_cq->spinlock, spl_flags);
656+
ret = ipz_qeit_is_valid(&my_cq->ipz_queue);
657+
spin_unlock_irqrestore(&my_cq->spinlock, spl_flags);
658+
}
659+
660+
return ret;
653661
}

drivers/infiniband/hw/ehca/ipz_pt_fn.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,14 @@ static inline void *ipz_qeit_get_inc_valid(struct ipz_queue *queue)
140140
return cqe;
141141
}
142142

143+
static inline int ipz_qeit_is_valid(struct ipz_queue *queue)
144+
{
145+
struct ehca_cqe *cqe = ipz_qeit_get(queue);
146+
u32 cqe_flags = cqe->cqe_flags;
147+
148+
return cqe_flags >> 7 == (queue->toggle_state & 1);
149+
}
150+
143151
/*
144152
* returns and resets Queue Entry iterator
145153
* returns address (kv) of first Queue Entry

drivers/infiniband/hw/ipath/ipath_cq.c

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -334,27 +334,34 @@ int ipath_destroy_cq(struct ib_cq *ibcq)
334334
/**
335335
* ipath_req_notify_cq - change the notification type for a completion queue
336336
* @ibcq: the completion queue
337-
* @notify: the type of notification to request
337+
* @notify_flags: the type of notification to request
338338
*
339339
* Returns 0 for success.
340340
*
341341
* This may be called from interrupt context. Also called by
342342
* ib_req_notify_cq() in the generic verbs code.
343343
*/
344-
int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
344+
int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
345345
{
346346
struct ipath_cq *cq = to_icq(ibcq);
347347
unsigned long flags;
348+
int ret = 0;
348349

349350
spin_lock_irqsave(&cq->lock, flags);
350351
/*
351352
* Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow
352353
* any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2).
353354
*/
354355
if (cq->notify != IB_CQ_NEXT_COMP)
355-
cq->notify = notify;
356+
cq->notify = notify_flags & IB_CQ_SOLICITED_MASK;
357+
358+
if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) &&
359+
cq->queue->head != cq->queue->tail)
360+
ret = 1;
361+
356362
spin_unlock_irqrestore(&cq->lock, flags);
357-
return 0;
363+
364+
return ret;
358365
}
359366

360367
/**

drivers/infiniband/hw/ipath/ipath_verbs.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -741,7 +741,7 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, int comp_vec
741741

742742
int ipath_destroy_cq(struct ib_cq *ibcq);
743743

744-
int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify);
744+
int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags);
745745

746746
int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
747747

drivers/infiniband/hw/mthca/mthca_cq.c

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -726,11 +726,12 @@ int mthca_poll_cq(struct ib_cq *ibcq, int num_entries,
726726
return err == 0 || err == -EAGAIN ? npolled : err;
727727
}
728728

729-
int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify)
729+
int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags)
730730
{
731731
__be32 doorbell[2];
732732

733-
doorbell[0] = cpu_to_be32((notify == IB_CQ_SOLICITED ?
733+
doorbell[0] = cpu_to_be32(((flags & IB_CQ_SOLICITED_MASK) ==
734+
IB_CQ_SOLICITED ?
734735
MTHCA_TAVOR_CQ_DB_REQ_NOT_SOL :
735736
MTHCA_TAVOR_CQ_DB_REQ_NOT) |
736737
to_mcq(cq)->cqn);
@@ -743,7 +744,7 @@ int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify)
743744
return 0;
744745
}
745746

746-
int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
747+
int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
747748
{
748749
struct mthca_cq *cq = to_mcq(ibcq);
749750
__be32 doorbell[2];
@@ -755,7 +756,8 @@ int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
755756

756757
doorbell[0] = ci;
757758
doorbell[1] = cpu_to_be32((cq->cqn << 8) | (2 << 5) | (sn << 3) |
758-
(notify == IB_CQ_SOLICITED ? 1 : 2));
759+
((flags & IB_CQ_SOLICITED_MASK) ==
760+
IB_CQ_SOLICITED ? 1 : 2));
759761

760762
mthca_write_db_rec(doorbell, cq->arm_db);
761763

@@ -766,7 +768,7 @@ int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
766768
wmb();
767769

768770
doorbell[0] = cpu_to_be32((sn << 28) |
769-
(notify == IB_CQ_SOLICITED ?
771+
((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
770772
MTHCA_ARBEL_CQ_DB_REQ_NOT_SOL :
771773
MTHCA_ARBEL_CQ_DB_REQ_NOT) |
772774
cq->cqn);

drivers/infiniband/hw/mthca/mthca_dev.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -495,8 +495,8 @@ void mthca_unmap_eq_icm(struct mthca_dev *dev);
495495

496496
int mthca_poll_cq(struct ib_cq *ibcq, int num_entries,
497497
struct ib_wc *entry);
498-
int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify);
499-
int mthca_arbel_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify);
498+
int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
499+
int mthca_arbel_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
500500
int mthca_init_cq(struct mthca_dev *dev, int nent,
501501
struct mthca_ucontext *ctx, u32 pdn,
502502
struct mthca_cq *cq);

include/rdma/ib_verbs.h

Lines changed: 31 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -431,9 +431,11 @@ struct ib_wc {
431431
u8 port_num; /* valid only for DR SMPs on switches */
432432
};
433433

434-
enum ib_cq_notify {
435-
IB_CQ_SOLICITED,
436-
IB_CQ_NEXT_COMP
434+
enum ib_cq_notify_flags {
435+
IB_CQ_SOLICITED = 1 << 0,
436+
IB_CQ_NEXT_COMP = 1 << 1,
437+
IB_CQ_SOLICITED_MASK = IB_CQ_SOLICITED | IB_CQ_NEXT_COMP,
438+
IB_CQ_REPORT_MISSED_EVENTS = 1 << 2,
437439
};
438440

439441
enum ib_srq_attr_mask {
@@ -990,7 +992,7 @@ struct ib_device {
990992
struct ib_wc *wc);
991993
int (*peek_cq)(struct ib_cq *cq, int wc_cnt);
992994
int (*req_notify_cq)(struct ib_cq *cq,
993-
enum ib_cq_notify cq_notify);
995+
enum ib_cq_notify_flags flags);
994996
int (*req_ncomp_notif)(struct ib_cq *cq,
995997
int wc_cnt);
996998
struct ib_mr * (*get_dma_mr)(struct ib_pd *pd,
@@ -1419,14 +1421,34 @@ int ib_peek_cq(struct ib_cq *cq, int wc_cnt);
14191421
/**
14201422
* ib_req_notify_cq - Request completion notification on a CQ.
14211423
* @cq: The CQ to generate an event for.
1422-
* @cq_notify: If set to %IB_CQ_SOLICITED, completion notification will
1423-
* occur on the next solicited event. If set to %IB_CQ_NEXT_COMP,
1424-
* notification will occur on the next completion.
1424+
* @flags:
1425+
* Must contain exactly one of %IB_CQ_SOLICITED or %IB_CQ_NEXT_COMP
1426+
* to request an event on the next solicited event or next work
1427+
* completion at any type, respectively. %IB_CQ_REPORT_MISSED_EVENTS
1428+
* may also be |ed in to request a hint about missed events, as
1429+
* described below.
1430+
*
1431+
* Return Value:
1432+
* < 0 means an error occurred while requesting notification
1433+
* == 0 means notification was requested successfully, and if
1434+
* IB_CQ_REPORT_MISSED_EVENTS was passed in, then no events
1435+
* were missed and it is safe to wait for another event. In
1436+
* this case is it guaranteed that any work completions added
1437+
* to the CQ since the last CQ poll will trigger a completion
1438+
* notification event.
1439+
* > 0 is only returned if IB_CQ_REPORT_MISSED_EVENTS was passed
1440+
* in. It means that the consumer must poll the CQ again to
1441+
* make sure it is empty to avoid missing an event because of a
1442+
* race between requesting notification and an entry being
1443+
* added to the CQ. This return value means it is possible
1444+
* (but not guaranteed) that a work completion has been added
1445+
* to the CQ since the last poll without triggering a
1446+
* completion notification event.
14251447
*/
14261448
static inline int ib_req_notify_cq(struct ib_cq *cq,
1427-
enum ib_cq_notify cq_notify)
1449+
enum ib_cq_notify_flags flags)
14281450
{
1429-
return cq->device->req_notify_cq(cq, cq_notify);
1451+
return cq->device->req_notify_cq(cq, flags);
14301452
}
14311453

14321454
/**

0 commit comments

Comments
 (0)