@@ -220,11 +220,6 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
220
220
return 0 ;
221
221
}
222
222
223
- static int hns_roce_v2_modify_qp (struct ib_qp * ibqp ,
224
- const struct ib_qp_attr * attr ,
225
- int attr_mask , enum ib_qp_state cur_state ,
226
- enum ib_qp_state new_state );
227
-
228
223
static int check_send_valid (struct hns_roce_dev * hr_dev ,
229
224
struct hns_roce_qp * hr_qp )
230
225
{
@@ -261,15 +256,13 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
261
256
struct hns_roce_wqe_frmr_seg * fseg ;
262
257
struct device * dev = hr_dev -> dev ;
263
258
struct hns_roce_v2_db sq_db ;
264
- struct ib_qp_attr attr ;
265
259
unsigned int owner_bit ;
266
260
unsigned int sge_idx ;
267
261
unsigned int wqe_idx ;
268
262
unsigned long flags ;
269
263
int valid_num_sge ;
270
264
void * wqe = NULL ;
271
265
bool loopback ;
272
- int attr_mask ;
273
266
u32 tmp_len ;
274
267
u32 hr_op ;
275
268
u8 * smac ;
@@ -607,18 +600,19 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
607
600
608
601
qp -> next_sge = sge_idx ;
609
602
610
- if (qp -> state == IB_QPS_ERR ) {
611
- attr_mask = IB_QP_STATE ;
612
- attr .qp_state = IB_QPS_ERR ;
613
-
614
- ret = hns_roce_v2_modify_qp (& qp -> ibqp , & attr , attr_mask ,
615
- qp -> state , IB_QPS_ERR );
616
- if (ret ) {
617
- spin_unlock_irqrestore (& qp -> sq .lock , flags );
618
- * bad_wr = wr ;
619
- return ret ;
620
- }
621
- }
603
+ /*
604
+ * Hip08 hardware cannot flush the WQEs in SQ if the QP state
605
+ * gets into errored mode. Hence, as a workaround to this
606
+ * hardware limitation, driver needs to assist in flushing. But
607
+ * the flushing operation uses mailbox to convey the QP state to
608
+ * the hardware and which can sleep due to the mutex protection
609
+ * around the mailbox calls. Hence, use the deferred flush for
610
+ * now.
611
+ */
612
+ if (qp -> state == IB_QPS_ERR )
613
+ if (!test_and_set_bit (HNS_ROCE_FLUSH_FLAG ,
614
+ & qp -> flush_flag ))
615
+ init_flush_work (hr_dev , qp );
622
616
}
623
617
624
618
spin_unlock_irqrestore (& qp -> sq .lock , flags );
@@ -646,10 +640,8 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
646
640
struct hns_roce_v2_wqe_data_seg * dseg ;
647
641
struct hns_roce_rinl_sge * sge_list ;
648
642
struct device * dev = hr_dev -> dev ;
649
- struct ib_qp_attr attr ;
650
643
unsigned long flags ;
651
644
void * wqe = NULL ;
652
- int attr_mask ;
653
645
u32 wqe_idx ;
654
646
int nreq ;
655
647
int ret ;
@@ -719,19 +711,19 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
719
711
720
712
* hr_qp -> rdb .db_record = hr_qp -> rq .head & 0xffff ;
721
713
722
- if ( hr_qp -> state == IB_QPS_ERR ) {
723
- attr_mask = IB_QP_STATE ;
724
- attr . qp_state = IB_QPS_ERR ;
725
-
726
- ret = hns_roce_v2_modify_qp ( & hr_qp -> ibqp , & attr ,
727
- attr_mask , hr_qp -> state ,
728
- IB_QPS_ERR );
729
- if ( ret ) {
730
- spin_unlock_irqrestore ( & hr_qp -> rq . lock , flags );
731
- * bad_wr = wr ;
732
- return ret ;
733
- }
734
- }
714
+ /*
715
+ * Hip08 hardware cannot flush the WQEs in RQ if the QP state
716
+ * gets into errored mode. Hence, as a workaround to this
717
+ * hardware limitation, driver needs to assist in flushing. But
718
+ * the flushing operation uses mailbox to convey the QP state to
719
+ * the hardware and which can sleep due to the mutex protection
720
+ * around the mailbox calls. Hence, use the deferred flush for
721
+ * now.
722
+ */
723
+ if ( hr_qp -> state == IB_QPS_ERR )
724
+ if (! test_and_set_bit ( HNS_ROCE_FLUSH_FLAG ,
725
+ & hr_qp -> flush_flag ))
726
+ init_flush_work ( hr_dev , hr_qp );
735
727
}
736
728
spin_unlock_irqrestore (& hr_qp -> rq .lock , flags );
737
729
@@ -3013,13 +3005,11 @@ static int hns_roce_v2_sw_poll_cq(struct hns_roce_cq *hr_cq, int num_entries,
3013
3005
static int hns_roce_v2_poll_one (struct hns_roce_cq * hr_cq ,
3014
3006
struct hns_roce_qp * * cur_qp , struct ib_wc * wc )
3015
3007
{
3008
+ struct hns_roce_dev * hr_dev = to_hr_dev (hr_cq -> ib_cq .device );
3016
3009
struct hns_roce_srq * srq = NULL ;
3017
- struct hns_roce_dev * hr_dev ;
3018
3010
struct hns_roce_v2_cqe * cqe ;
3019
3011
struct hns_roce_qp * hr_qp ;
3020
3012
struct hns_roce_wq * wq ;
3021
- struct ib_qp_attr attr ;
3022
- int attr_mask ;
3023
3013
int is_send ;
3024
3014
u16 wqe_ctr ;
3025
3015
u32 opcode ;
@@ -3043,7 +3033,6 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
3043
3033
V2_CQE_BYTE_16_LCL_QPN_S );
3044
3034
3045
3035
if (!* cur_qp || (qpn & HNS_ROCE_V2_CQE_QPN_MASK ) != (* cur_qp )-> qpn ) {
3046
- hr_dev = to_hr_dev (hr_cq -> ib_cq .device );
3047
3036
hr_qp = __hns_roce_qp_lookup (hr_dev , qpn );
3048
3037
if (unlikely (!hr_qp )) {
3049
3038
dev_err (hr_dev -> dev , "CQ %06lx with entry for unknown QPN %06x\n" ,
@@ -3053,6 +3042,7 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
3053
3042
* cur_qp = hr_qp ;
3054
3043
}
3055
3044
3045
+ hr_qp = * cur_qp ;
3056
3046
wc -> qp = & (* cur_qp )-> ibqp ;
3057
3047
wc -> vendor_err = 0 ;
3058
3048
@@ -3137,14 +3127,24 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
3137
3127
break ;
3138
3128
}
3139
3129
3140
- /* flush cqe if wc status is error, excluding flush error */
3141
- if ((wc -> status != IB_WC_SUCCESS ) &&
3142
- (wc -> status != IB_WC_WR_FLUSH_ERR )) {
3143
- attr_mask = IB_QP_STATE ;
3144
- attr .qp_state = IB_QPS_ERR ;
3145
- return hns_roce_v2_modify_qp (& (* cur_qp )-> ibqp ,
3146
- & attr , attr_mask ,
3147
- (* cur_qp )-> state , IB_QPS_ERR );
3130
+ /*
3131
+ * Hip08 hardware cannot flush the WQEs in SQ/RQ if the QP state gets
3132
+ * into errored mode. Hence, as a workaround to this hardware
3133
+ * limitation, driver needs to assist in flushing. But the flushing
3134
+ * operation uses mailbox to convey the QP state to the hardware and
3135
+ * which can sleep due to the mutex protection around the mailbox calls.
3136
+ * Hence, use the deferred flush for now. Once wc error detected, the
3137
+ * flushing operation is needed.
3138
+ */
3139
+ if (wc -> status != IB_WC_SUCCESS &&
3140
+ wc -> status != IB_WC_WR_FLUSH_ERR ) {
3141
+ dev_err (hr_dev -> dev , "error cqe status is: 0x%x\n" ,
3142
+ status & HNS_ROCE_V2_CQE_STATUS_MASK );
3143
+
3144
+ if (!test_and_set_bit (HNS_ROCE_FLUSH_FLAG , & hr_qp -> flush_flag ))
3145
+ init_flush_work (hr_dev , hr_qp );
3146
+
3147
+ return 0 ;
3148
3148
}
3149
3149
3150
3150
if (wc -> status == IB_WC_WR_FLUSH_ERR )
@@ -4735,6 +4735,8 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
4735
4735
struct hns_roce_v2_qp_context * context = ctx ;
4736
4736
struct hns_roce_v2_qp_context * qpc_mask = ctx + 1 ;
4737
4737
struct device * dev = hr_dev -> dev ;
4738
+ unsigned long sq_flag = 0 ;
4739
+ unsigned long rq_flag = 0 ;
4738
4740
int ret ;
4739
4741
4740
4742
/*
@@ -4752,6 +4754,9 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
4752
4754
4753
4755
/* When QP state is err, SQ and RQ WQE should be flushed */
4754
4756
if (new_state == IB_QPS_ERR ) {
4757
+ spin_lock_irqsave (& hr_qp -> sq .lock , sq_flag );
4758
+ spin_lock_irqsave (& hr_qp -> rq .lock , rq_flag );
4759
+ hr_qp -> state = IB_QPS_ERR ;
4755
4760
roce_set_field (context -> byte_160_sq_ci_pi ,
4756
4761
V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M ,
4757
4762
V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S ,
@@ -4769,6 +4774,8 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
4769
4774
V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M ,
4770
4775
V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S , 0 );
4771
4776
}
4777
+ spin_unlock_irqrestore (& hr_qp -> rq .lock , rq_flag );
4778
+ spin_unlock_irqrestore (& hr_qp -> sq .lock , sq_flag );
4772
4779
}
4773
4780
4774
4781
/* Configure the optional fields */
0 commit comments