Skip to content

Commit d72fe7d

Browse files
kwan-intcdledford
authored andcommitted
IB/hfi1: Add a function to receive TID RDMA WRITE DATA packet
This patch adds a function to receive TID RDMA WRITE DATA packet, which is in the KDETH PSN space in packet ordering. Due to the use of header suppression, software is generally only notified when the last data packet for a segment is received. This patch also adds code to handle KDETH EFLAGS errors for ingress TID RDMA WRITE DATA packets. Signed-off-by: Mitko Haralanov <[email protected]> Signed-off-by: Mike Marciniszyn <[email protected]> Signed-off-by: Ashutosh Dixit <[email protected]> Signed-off-by: Kaike Wan <[email protected]> Signed-off-by: Dennis Dalessandro <[email protected]> Signed-off-by: Doug Ledford <[email protected]>
1 parent 539e190 commit d72fe7d

File tree

3 files changed

+241
-0
lines changed

3 files changed

+241
-0
lines changed

drivers/infiniband/hw/hfi1/tid_rdma.c

Lines changed: 236 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2566,13 +2566,32 @@ static bool tid_rdma_tid_err(struct hfi1_ctxtdata *rcd,
25662566
u8 opcode)
25672567
{
25682568
struct rvt_qp *qp = packet->qp;
2569+
struct hfi1_qp_priv *qpriv = qp->priv;
25692570
u32 ipsn;
25702571
struct ib_other_headers *ohdr = packet->ohdr;
2572+
struct rvt_ack_entry *e;
2573+
struct tid_rdma_request *req;
2574+
struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
2575+
u32 i;
25712576

25722577
if (rcv_type >= RHF_RCV_TYPE_IB)
25732578
goto done;
25742579

25752580
spin_lock(&qp->s_lock);
2581+
2582+
/*
2583+
* We've ran out of space in the eager buffer.
2584+
* Eagerly received KDETH packets which require space in the
2585+
* Eager buffer (packet that have payload) are TID RDMA WRITE
2586+
* response packets. In this case, we have to re-transmit the
2587+
* TID RDMA WRITE request.
2588+
*/
2589+
if (rcv_type == RHF_RCV_TYPE_EAGER) {
2590+
hfi1_restart_rc(qp, qp->s_last_psn + 1, 1);
2591+
hfi1_schedule_send(qp);
2592+
goto done_unlock;
2593+
}
2594+
25762595
/*
25772596
* For TID READ response, error out QP after freeing the tid
25782597
* resources.
@@ -2586,8 +2605,25 @@ static bool tid_rdma_tid_err(struct hfi1_ctxtdata *rcd,
25862605
rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
25872606
goto done;
25882607
}
2608+
goto done_unlock;
2609+
}
2610+
2611+
/*
2612+
* Error out the qp for TID RDMA WRITE
2613+
*/
2614+
hfi1_kern_clear_hw_flow(qpriv->rcd, qp);
2615+
for (i = 0; i < rvt_max_atomic(rdi); i++) {
2616+
e = &qp->s_ack_queue[i];
2617+
if (e->opcode == TID_OP(WRITE_REQ)) {
2618+
req = ack_to_tid_req(e);
2619+
hfi1_kern_exp_rcv_clear_all(req);
2620+
}
25892621
}
2622+
spin_unlock(&qp->s_lock);
2623+
rvt_rc_error(qp, IB_WC_LOC_LEN_ERR);
2624+
goto done;
25902625

2626+
done_unlock:
25912627
spin_unlock(&qp->s_lock);
25922628
done:
25932629
return true;
@@ -2837,8 +2873,12 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd,
28372873
u8 opcode;
28382874
u32 qp_num, psn, ibpsn;
28392875
struct rvt_qp *qp;
2876+
struct hfi1_qp_priv *qpriv;
28402877
unsigned long flags;
28412878
bool ret = true;
2879+
struct rvt_ack_entry *e;
2880+
struct tid_rdma_request *req;
2881+
struct tid_rdma_flow *flow;
28422882

28432883
trace_hfi1_msg_handle_kdeth_eflags(NULL, "Kdeth error: rhf ",
28442884
packet->rhf);
@@ -2897,14 +2937,109 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd,
28972937
ibpsn = mask_psn(ibpsn);
28982938
ret = handle_read_kdeth_eflags(rcd, packet, rcv_type, rte, psn,
28992939
ibpsn);
2940+
goto r_unlock;
2941+
}
2942+
2943+
/*
2944+
* qp->s_tail_ack_queue points to the rvt_ack_entry currently being
2945+
* processed. These a completed sequentially so we can be sure that
2946+
* the pointer will not change until the entire request has completed.
2947+
*/
2948+
spin_lock(&qp->s_lock);
2949+
qpriv = qp->priv;
2950+
e = &qp->s_ack_queue[qpriv->r_tid_tail];
2951+
req = ack_to_tid_req(e);
2952+
flow = &req->flows[req->clear_tail];
2953+
2954+
switch (rcv_type) {
2955+
case RHF_RCV_TYPE_EXPECTED:
2956+
switch (rte) {
2957+
case RHF_RTE_EXPECTED_FLOW_SEQ_ERR:
2958+
if (!(qpriv->s_flags & HFI1_R_TID_SW_PSN)) {
2959+
u64 reg;
2960+
2961+
qpriv->s_flags |= HFI1_R_TID_SW_PSN;
2962+
/*
2963+
* The only sane way to get the amount of
2964+
* progress is to read the HW flow state.
2965+
*/
2966+
reg = read_uctxt_csr(dd, rcd->ctxt,
2967+
RCV_TID_FLOW_TABLE +
2968+
(8 * flow->idx));
2969+
flow->flow_state.r_next_psn = mask_psn(reg);
2970+
qpriv->r_next_psn_kdeth =
2971+
flow->flow_state.r_next_psn;
2972+
goto nak_psn;
2973+
} else {
2974+
/*
2975+
* If the received PSN does not match the next
2976+
* expected PSN, NAK the packet.
2977+
* However, only do that if we know that the a
2978+
* NAK has already been sent. Otherwise, this
2979+
* mismatch could be due to packets that were
2980+
* already in flight.
2981+
*/
2982+
if (psn != flow->flow_state.r_next_psn) {
2983+
psn = flow->flow_state.r_next_psn;
2984+
goto nak_psn;
2985+
}
2986+
2987+
qpriv->s_nak_state = 0;
2988+
/*
2989+
* If SW PSN verification is successful and this
2990+
* is the last packet in the segment, tell the
2991+
* caller to process it as a normal packet.
2992+
*/
2993+
if (psn == full_flow_psn(flow,
2994+
flow->flow_state.lpsn))
2995+
ret = false;
2996+
qpriv->r_next_psn_kdeth =
2997+
++flow->flow_state.r_next_psn;
2998+
}
2999+
break;
3000+
3001+
case RHF_RTE_EXPECTED_FLOW_GEN_ERR:
3002+
goto nak_psn;
3003+
3004+
default:
3005+
break;
3006+
}
3007+
break;
3008+
3009+
case RHF_RCV_TYPE_ERROR:
3010+
switch (rte) {
3011+
case RHF_RTE_ERROR_OP_CODE_ERR:
3012+
case RHF_RTE_ERROR_KHDR_MIN_LEN_ERR:
3013+
case RHF_RTE_ERROR_KHDR_HCRC_ERR:
3014+
case RHF_RTE_ERROR_KHDR_KVER_ERR:
3015+
case RHF_RTE_ERROR_CONTEXT_ERR:
3016+
case RHF_RTE_ERROR_KHDR_TID_ERR:
3017+
default:
3018+
break;
3019+
}
3020+
default:
3021+
break;
29003022
}
29013023

3024+
unlock:
3025+
spin_unlock(&qp->s_lock);
29023026
r_unlock:
29033027
spin_unlock_irqrestore(&qp->r_lock, flags);
29043028
rcu_unlock:
29053029
rcu_read_unlock();
29063030
drop:
29073031
return ret;
3032+
nak_psn:
3033+
ibp->rvp.n_rc_seqnak++;
3034+
if (!qpriv->s_nak_state) {
3035+
qpriv->s_nak_state = IB_NAK_PSN_ERROR;
3036+
/* We are NAK'ing the next expected PSN */
3037+
qpriv->s_nak_psn = mask_psn(flow->flow_state.r_next_psn);
3038+
qpriv->s_flags |= RVT_S_ACK_PENDING;
3039+
if (qpriv->r_tid_ack == HFI1_QP_WQE_INVALID)
3040+
qpriv->r_tid_ack = qpriv->r_tid_tail;
3041+
}
3042+
goto unlock;
29083043
}
29093044

29103045
/*
@@ -4005,3 +4140,104 @@ bool hfi1_build_tid_rdma_packet(struct rvt_swqe *wqe,
40054140
}
40064141
return last_pkt;
40074142
}
4143+
4144+
void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet)
4145+
{
4146+
struct rvt_qp *qp = packet->qp;
4147+
struct hfi1_qp_priv *priv = qp->priv;
4148+
struct hfi1_ctxtdata *rcd = priv->rcd;
4149+
struct ib_other_headers *ohdr = packet->ohdr;
4150+
struct rvt_ack_entry *e;
4151+
struct tid_rdma_request *req;
4152+
struct tid_rdma_flow *flow;
4153+
struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
4154+
unsigned long flags;
4155+
u32 psn, next;
4156+
u8 opcode;
4157+
4158+
psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
4159+
opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
4160+
4161+
/*
4162+
* All error handling should be done by now. If we are here, the packet
4163+
* is either good or been accepted by the error handler.
4164+
*/
4165+
spin_lock_irqsave(&qp->s_lock, flags);
4166+
e = &qp->s_ack_queue[priv->r_tid_tail];
4167+
req = ack_to_tid_req(e);
4168+
flow = &req->flows[req->clear_tail];
4169+
if (cmp_psn(psn, full_flow_psn(flow, flow->flow_state.lpsn))) {
4170+
if (cmp_psn(psn, flow->flow_state.r_next_psn))
4171+
goto send_nak;
4172+
flow->flow_state.r_next_psn++;
4173+
goto exit;
4174+
}
4175+
flow->flow_state.r_next_psn = mask_psn(psn + 1);
4176+
hfi1_kern_exp_rcv_clear(req);
4177+
priv->alloc_w_segs--;
4178+
rcd->flows[flow->idx].psn = psn & HFI1_KDETH_BTH_SEQ_MASK;
4179+
req->comp_seg++;
4180+
priv->s_nak_state = 0;
4181+
4182+
/*
4183+
* Release the flow if one of the following conditions has been met:
4184+
* - The request has reached a sync point AND all outstanding
4185+
* segments have been completed, or
4186+
* - The entire request is complete and there are no more requests
4187+
* (of any kind) in the queue.
4188+
*/
4189+
if (priv->r_tid_ack == HFI1_QP_WQE_INVALID)
4190+
priv->r_tid_ack = priv->r_tid_tail;
4191+
4192+
if (opcode == TID_OP(WRITE_DATA_LAST)) {
4193+
for (next = priv->r_tid_tail + 1; ; next++) {
4194+
if (next > rvt_size_atomic(&dev->rdi))
4195+
next = 0;
4196+
if (next == priv->r_tid_head)
4197+
break;
4198+
e = &qp->s_ack_queue[next];
4199+
if (e->opcode == TID_OP(WRITE_REQ))
4200+
break;
4201+
}
4202+
priv->r_tid_tail = next;
4203+
if (++qp->s_acked_ack_queue > rvt_size_atomic(&dev->rdi))
4204+
qp->s_acked_ack_queue = 0;
4205+
}
4206+
4207+
hfi1_tid_write_alloc_resources(qp, true);
4208+
4209+
/*
4210+
* If we need to generate more responses, schedule the
4211+
* send engine.
4212+
*/
4213+
if (req->cur_seg < req->total_segs ||
4214+
qp->s_tail_ack_queue != qp->r_head_ack_queue) {
4215+
qp->s_flags |= RVT_S_RESP_PENDING;
4216+
hfi1_schedule_send(qp);
4217+
}
4218+
4219+
priv->pending_tid_w_segs--;
4220+
if (priv->s_flags & HFI1_R_TID_RSC_TIMER) {
4221+
if (priv->pending_tid_w_segs)
4222+
hfi1_mod_tid_reap_timer(req->qp);
4223+
else
4224+
hfi1_stop_tid_reap_timer(req->qp);
4225+
}
4226+
4227+
done:
4228+
priv->s_flags |= RVT_S_ACK_PENDING;
4229+
exit:
4230+
priv->r_next_psn_kdeth = flow->flow_state.r_next_psn;
4231+
spin_unlock_irqrestore(&qp->s_lock, flags);
4232+
return;
4233+
4234+
send_nak:
4235+
if (!priv->s_nak_state) {
4236+
priv->s_nak_state = IB_NAK_PSN_ERROR;
4237+
priv->s_nak_psn = flow->flow_state.r_next_psn;
4238+
priv->s_flags |= RVT_S_ACK_PENDING;
4239+
if (priv->r_tid_ack == HFI1_QP_WQE_INVALID)
4240+
priv->r_tid_ack = priv->r_tid_tail;
4241+
}
4242+
goto done;
4243+
}

drivers/infiniband/hw/hfi1/tid_rdma.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,4 +279,6 @@ bool hfi1_build_tid_rdma_packet(struct rvt_swqe *wqe,
279279
struct ib_other_headers *ohdr,
280280
u32 *bth1, u32 *bth2, u32 *len);
281281

282+
void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet);
283+
282284
#endif /* HFI1_TID_RDMA_H */

drivers/infiniband/hw/hfi1/verbs.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,8 @@ struct hfi1_qp_priv {
174174

175175
/* variables for the TID RDMA SE state machine */
176176
u8 rnr_nak_state; /* RNR NAK state */
177+
u8 s_nak_state;
178+
u32 s_nak_psn;
177179
u32 s_flags;
178180
u32 s_tid_cur;
179181
u32 s_tid_head;
@@ -193,6 +195,7 @@ struct hfi1_qp_priv {
193195
u16 pkts_ps; /* packets per segment */
194196
u8 timeout_shift; /* account for number of packets per segment */
195197

198+
u32 r_next_psn_kdeth;
196199
u8 sync_pt; /* Set when QP reaches sync point */
197200
};
198201

0 commit comments

Comments
 (0)