Skip to content

Commit 89f8100

Browse files
selvintxavierdledford
authored andcommitted
RDMA/bnxt_re: expose detailed stats retrieved from HW
Broadcom's adapter supports more granular statistics to allow better understanding about the state of the chip when data traffic is flowing. Exposing the detailed stats to the consumer through the standard hook available in the kverbs interface. In order to retrieve all the information, driver implements a firmware command. Signed-off-by: Selvin Xavier <[email protected]> Signed-off-by: Devesh Sharma <[email protected]> Signed-off-by: Doug Ledford <[email protected]>
1 parent 872f357 commit 89f8100

File tree

7 files changed

+417
-10
lines changed

7 files changed

+417
-10
lines changed

drivers/infiniband/hw/bnxt_re/bnxt_re.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ struct bnxt_re_dev {
121121
#define BNXT_RE_FLAG_RCFW_CHANNEL_EN 4
122122
#define BNXT_RE_FLAG_QOS_WORK_REG 5
123123
#define BNXT_RE_FLAG_TASK_IN_PROG 6
124+
#define BNXT_RE_FLAG_ISSUE_ROCE_STATS 29
124125
struct net_device *netdev;
125126
unsigned int version, major, minor;
126127
struct bnxt_en_dev *en_dev;
@@ -168,6 +169,7 @@ struct bnxt_re_dev {
168169
atomic_t nq_alloc_cnt;
169170
u32 is_virtfn;
170171
u32 num_vfs;
172+
struct bnxt_qplib_roce_stats stats;
171173
};
172174

173175
#define to_bnxt_re_dev(ptr, member) \

drivers/infiniband/hw/bnxt_re/hw_counters.c

Lines changed: 135 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -58,16 +58,55 @@
5858
#include "hw_counters.h"
5959

6060
static const char * const bnxt_re_stat_name[] = {
61-
[BNXT_RE_ACTIVE_QP] = "active_qps",
62-
[BNXT_RE_ACTIVE_SRQ] = "active_srqs",
63-
[BNXT_RE_ACTIVE_CQ] = "active_cqs",
64-
[BNXT_RE_ACTIVE_MR] = "active_mrs",
65-
[BNXT_RE_ACTIVE_MW] = "active_mws",
66-
[BNXT_RE_RX_PKTS] = "rx_pkts",
67-
[BNXT_RE_RX_BYTES] = "rx_bytes",
68-
[BNXT_RE_TX_PKTS] = "tx_pkts",
69-
[BNXT_RE_TX_BYTES] = "tx_bytes",
70-
[BNXT_RE_RECOVERABLE_ERRORS] = "recoverable_errors"
61+
[BNXT_RE_ACTIVE_QP] = "active_qps",
62+
[BNXT_RE_ACTIVE_SRQ] = "active_srqs",
63+
[BNXT_RE_ACTIVE_CQ] = "active_cqs",
64+
[BNXT_RE_ACTIVE_MR] = "active_mrs",
65+
[BNXT_RE_ACTIVE_MW] = "active_mws",
66+
[BNXT_RE_RX_PKTS] = "rx_pkts",
67+
[BNXT_RE_RX_BYTES] = "rx_bytes",
68+
[BNXT_RE_TX_PKTS] = "tx_pkts",
69+
[BNXT_RE_TX_BYTES] = "tx_bytes",
70+
[BNXT_RE_RECOVERABLE_ERRORS] = "recoverable_errors",
71+
[BNXT_RE_TO_RETRANSMITS] = "to_retransmits",
72+
[BNXT_RE_SEQ_ERR_NAKS_RCVD] = "seq_err_naks_rcvd",
73+
[BNXT_RE_MAX_RETRY_EXCEEDED] = "max_retry_exceeded",
74+
[BNXT_RE_RNR_NAKS_RCVD] = "rnr_naks_rcvd",
75+
[BNXT_RE_MISSING_RESP] = "missin_resp",
76+
[BNXT_RE_UNRECOVERABLE_ERR] = "unrecoverable_err",
77+
[BNXT_RE_BAD_RESP_ERR] = "bad_resp_err",
78+
[BNXT_RE_LOCAL_QP_OP_ERR] = "local_qp_op_err",
79+
[BNXT_RE_LOCAL_PROTECTION_ERR] = "local_protection_err",
80+
[BNXT_RE_MEM_MGMT_OP_ERR] = "mem_mgmt_op_err",
81+
[BNXT_RE_REMOTE_INVALID_REQ_ERR] = "remote_invalid_req_err",
82+
[BNXT_RE_REMOTE_ACCESS_ERR] = "remote_access_err",
83+
[BNXT_RE_REMOTE_OP_ERR] = "remote_op_err",
84+
[BNXT_RE_DUP_REQ] = "dup_req",
85+
[BNXT_RE_RES_EXCEED_MAX] = "res_exceed_max",
86+
[BNXT_RE_RES_LENGTH_MISMATCH] = "res_length_mismatch",
87+
[BNXT_RE_RES_EXCEEDS_WQE] = "res_exceeds_wqe",
88+
[BNXT_RE_RES_OPCODE_ERR] = "res_opcode_err",
89+
[BNXT_RE_RES_RX_INVALID_RKEY] = "res_rx_invalid_rkey",
90+
[BNXT_RE_RES_RX_DOMAIN_ERR] = "res_rx_domain_err",
91+
[BNXT_RE_RES_RX_NO_PERM] = "res_rx_no_perm",
92+
[BNXT_RE_RES_RX_RANGE_ERR] = "res_rx_range_err",
93+
[BNXT_RE_RES_TX_INVALID_RKEY] = "res_tx_invalid_rkey",
94+
[BNXT_RE_RES_TX_DOMAIN_ERR] = "res_tx_domain_err",
95+
[BNXT_RE_RES_TX_NO_PERM] = "res_tx_no_perm",
96+
[BNXT_RE_RES_TX_RANGE_ERR] = "res_tx_range_err",
97+
[BNXT_RE_RES_IRRQ_OFLOW] = "res_irrq_oflow",
98+
[BNXT_RE_RES_UNSUP_OPCODE] = "res_unsup_opcode",
99+
[BNXT_RE_RES_UNALIGNED_ATOMIC] = "res_unaligned_atomic",
100+
[BNXT_RE_RES_REM_INV_ERR] = "res_rem_inv_err",
101+
[BNXT_RE_RES_MEM_ERROR] = "res_mem_err",
102+
[BNXT_RE_RES_SRQ_ERR] = "res_srq_err",
103+
[BNXT_RE_RES_CMP_ERR] = "res_cmp_err",
104+
[BNXT_RE_RES_INVALID_DUP_RKEY] = "res_invalid_dup_rkey",
105+
[BNXT_RE_RES_WQE_FORMAT_ERR] = "res_wqe_format_err",
106+
[BNXT_RE_RES_CQ_LOAD_ERR] = "res_cq_load_err",
107+
[BNXT_RE_RES_SRQ_LOAD_ERR] = "res_srq_load_err",
108+
[BNXT_RE_RES_TX_PCI_ERR] = "res_tx_pci_err",
109+
[BNXT_RE_RES_RX_PCI_ERR] = "res_rx_pci_err"
71110
};
72111

73112
int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
@@ -76,6 +115,7 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
76115
{
77116
struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
78117
struct ctx_hw_stats *bnxt_re_stats = rdev->qplib_ctx.stats.dma;
118+
int rc = 0;
79119

80120
if (!port || !stats)
81121
return -EINVAL;
@@ -97,6 +137,91 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
97137
stats->value[BNXT_RE_TX_BYTES] =
98138
le64_to_cpu(bnxt_re_stats->tx_ucast_bytes);
99139
}
140+
if (test_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags)) {
141+
rc = bnxt_qplib_get_roce_stats(&rdev->rcfw, &rdev->stats);
142+
if (rc)
143+
clear_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS,
144+
&rdev->flags);
145+
stats->value[BNXT_RE_TO_RETRANSMITS] =
146+
rdev->stats.to_retransmits;
147+
stats->value[BNXT_RE_SEQ_ERR_NAKS_RCVD] =
148+
rdev->stats.seq_err_naks_rcvd;
149+
stats->value[BNXT_RE_MAX_RETRY_EXCEEDED] =
150+
rdev->stats.max_retry_exceeded;
151+
stats->value[BNXT_RE_RNR_NAKS_RCVD] =
152+
rdev->stats.rnr_naks_rcvd;
153+
stats->value[BNXT_RE_MISSING_RESP] =
154+
rdev->stats.missing_resp;
155+
stats->value[BNXT_RE_UNRECOVERABLE_ERR] =
156+
rdev->stats.unrecoverable_err;
157+
stats->value[BNXT_RE_BAD_RESP_ERR] =
158+
rdev->stats.bad_resp_err;
159+
stats->value[BNXT_RE_LOCAL_QP_OP_ERR] =
160+
rdev->stats.local_qp_op_err;
161+
stats->value[BNXT_RE_LOCAL_PROTECTION_ERR] =
162+
rdev->stats.local_protection_err;
163+
stats->value[BNXT_RE_MEM_MGMT_OP_ERR] =
164+
rdev->stats.mem_mgmt_op_err;
165+
stats->value[BNXT_RE_REMOTE_INVALID_REQ_ERR] =
166+
rdev->stats.remote_invalid_req_err;
167+
stats->value[BNXT_RE_REMOTE_ACCESS_ERR] =
168+
rdev->stats.remote_access_err;
169+
stats->value[BNXT_RE_REMOTE_OP_ERR] =
170+
rdev->stats.remote_op_err;
171+
stats->value[BNXT_RE_DUP_REQ] =
172+
rdev->stats.dup_req;
173+
stats->value[BNXT_RE_RES_EXCEED_MAX] =
174+
rdev->stats.res_exceed_max;
175+
stats->value[BNXT_RE_RES_LENGTH_MISMATCH] =
176+
rdev->stats.res_length_mismatch;
177+
stats->value[BNXT_RE_RES_EXCEEDS_WQE] =
178+
rdev->stats.res_exceeds_wqe;
179+
stats->value[BNXT_RE_RES_OPCODE_ERR] =
180+
rdev->stats.res_opcode_err;
181+
stats->value[BNXT_RE_RES_RX_INVALID_RKEY] =
182+
rdev->stats.res_rx_invalid_rkey;
183+
stats->value[BNXT_RE_RES_RX_DOMAIN_ERR] =
184+
rdev->stats.res_rx_domain_err;
185+
stats->value[BNXT_RE_RES_RX_NO_PERM] =
186+
rdev->stats.res_rx_no_perm;
187+
stats->value[BNXT_RE_RES_RX_RANGE_ERR] =
188+
rdev->stats.res_rx_range_err;
189+
stats->value[BNXT_RE_RES_TX_INVALID_RKEY] =
190+
rdev->stats.res_tx_invalid_rkey;
191+
stats->value[BNXT_RE_RES_TX_DOMAIN_ERR] =
192+
rdev->stats.res_tx_domain_err;
193+
stats->value[BNXT_RE_RES_TX_NO_PERM] =
194+
rdev->stats.res_tx_no_perm;
195+
stats->value[BNXT_RE_RES_TX_RANGE_ERR] =
196+
rdev->stats.res_tx_range_err;
197+
stats->value[BNXT_RE_RES_IRRQ_OFLOW] =
198+
rdev->stats.res_irrq_oflow;
199+
stats->value[BNXT_RE_RES_UNSUP_OPCODE] =
200+
rdev->stats.res_unsup_opcode;
201+
stats->value[BNXT_RE_RES_UNALIGNED_ATOMIC] =
202+
rdev->stats.res_unaligned_atomic;
203+
stats->value[BNXT_RE_RES_REM_INV_ERR] =
204+
rdev->stats.res_rem_inv_err;
205+
stats->value[BNXT_RE_RES_MEM_ERROR] =
206+
rdev->stats.res_mem_error;
207+
stats->value[BNXT_RE_RES_SRQ_ERR] =
208+
rdev->stats.res_srq_err;
209+
stats->value[BNXT_RE_RES_CMP_ERR] =
210+
rdev->stats.res_cmp_err;
211+
stats->value[BNXT_RE_RES_INVALID_DUP_RKEY] =
212+
rdev->stats.res_invalid_dup_rkey;
213+
stats->value[BNXT_RE_RES_WQE_FORMAT_ERR] =
214+
rdev->stats.res_wqe_format_err;
215+
stats->value[BNXT_RE_RES_CQ_LOAD_ERR] =
216+
rdev->stats.res_cq_load_err;
217+
stats->value[BNXT_RE_RES_SRQ_LOAD_ERR] =
218+
rdev->stats.res_srq_load_err;
219+
stats->value[BNXT_RE_RES_TX_PCI_ERR] =
220+
rdev->stats.res_tx_pci_err;
221+
stats->value[BNXT_RE_RES_RX_PCI_ERR] =
222+
rdev->stats.res_rx_pci_err;
223+
}
224+
100225
return ARRAY_SIZE(bnxt_re_stat_name);
101226
}
102227

drivers/infiniband/hw/bnxt_re/hw_counters.h

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,45 @@ enum bnxt_re_hw_stats {
5151
BNXT_RE_TX_PKTS,
5252
BNXT_RE_TX_BYTES,
5353
BNXT_RE_RECOVERABLE_ERRORS,
54+
BNXT_RE_TO_RETRANSMITS,
55+
BNXT_RE_SEQ_ERR_NAKS_RCVD,
56+
BNXT_RE_MAX_RETRY_EXCEEDED,
57+
BNXT_RE_RNR_NAKS_RCVD,
58+
BNXT_RE_MISSING_RESP,
59+
BNXT_RE_UNRECOVERABLE_ERR,
60+
BNXT_RE_BAD_RESP_ERR,
61+
BNXT_RE_LOCAL_QP_OP_ERR,
62+
BNXT_RE_LOCAL_PROTECTION_ERR,
63+
BNXT_RE_MEM_MGMT_OP_ERR,
64+
BNXT_RE_REMOTE_INVALID_REQ_ERR,
65+
BNXT_RE_REMOTE_ACCESS_ERR,
66+
BNXT_RE_REMOTE_OP_ERR,
67+
BNXT_RE_DUP_REQ,
68+
BNXT_RE_RES_EXCEED_MAX,
69+
BNXT_RE_RES_LENGTH_MISMATCH,
70+
BNXT_RE_RES_EXCEEDS_WQE,
71+
BNXT_RE_RES_OPCODE_ERR,
72+
BNXT_RE_RES_RX_INVALID_RKEY,
73+
BNXT_RE_RES_RX_DOMAIN_ERR,
74+
BNXT_RE_RES_RX_NO_PERM,
75+
BNXT_RE_RES_RX_RANGE_ERR,
76+
BNXT_RE_RES_TX_INVALID_RKEY,
77+
BNXT_RE_RES_TX_DOMAIN_ERR,
78+
BNXT_RE_RES_TX_NO_PERM,
79+
BNXT_RE_RES_TX_RANGE_ERR,
80+
BNXT_RE_RES_IRRQ_OFLOW,
81+
BNXT_RE_RES_UNSUP_OPCODE,
82+
BNXT_RE_RES_UNALIGNED_ATOMIC,
83+
BNXT_RE_RES_REM_INV_ERR,
84+
BNXT_RE_RES_MEM_ERROR,
85+
BNXT_RE_RES_SRQ_ERR,
86+
BNXT_RE_RES_CMP_ERR,
87+
BNXT_RE_RES_INVALID_DUP_RKEY,
88+
BNXT_RE_RES_WQE_FORMAT_ERR,
89+
BNXT_RE_RES_CQ_LOAD_ERR,
90+
BNXT_RE_RES_SRQ_LOAD_ERR,
91+
BNXT_RE_RES_TX_PCI_ERR,
92+
BNXT_RE_RES_RX_PCI_ERR,
5493
BNXT_RE_NUM_COUNTERS
5594
};
5695

drivers/infiniband/hw/bnxt_re/main.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1245,6 +1245,7 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
12451245
set_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags);
12461246
ib_get_eth_speed(&rdev->ibdev, 1, &rdev->active_speed,
12471247
&rdev->active_width);
1248+
set_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags);
12481249
bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_PORT_ACTIVE);
12491250
bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_GID_CHANGE);
12501251

drivers/infiniband/hw/bnxt_re/qplib_sp.c

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -790,3 +790,73 @@ int bnxt_qplib_map_tc2cos(struct bnxt_qplib_res *res, u16 *cids)
790790
0);
791791
return 0;
792792
}
793+
794+
int bnxt_qplib_get_roce_stats(struct bnxt_qplib_rcfw *rcfw,
795+
struct bnxt_qplib_roce_stats *stats)
796+
{
797+
struct cmdq_query_roce_stats req;
798+
struct creq_query_roce_stats_resp resp;
799+
struct bnxt_qplib_rcfw_sbuf *sbuf;
800+
struct creq_query_roce_stats_resp_sb *sb;
801+
u16 cmd_flags = 0;
802+
int rc = 0;
803+
804+
RCFW_CMD_PREP(req, QUERY_ROCE_STATS, cmd_flags);
805+
806+
sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb));
807+
if (!sbuf) {
808+
dev_err(&rcfw->pdev->dev,
809+
"QPLIB: SP: QUERY_ROCE_STATS alloc side buffer failed");
810+
return -ENOMEM;
811+
}
812+
813+
sb = sbuf->sb;
814+
req.resp_size = sizeof(*sb) / BNXT_QPLIB_CMDQE_UNITS;
815+
rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
816+
(void *)sbuf, 0);
817+
if (rc)
818+
goto bail;
819+
/* Extract the context from the side buffer */
820+
stats->to_retransmits = le64_to_cpu(sb->to_retransmits);
821+
stats->seq_err_naks_rcvd = le64_to_cpu(sb->seq_err_naks_rcvd);
822+
stats->max_retry_exceeded = le64_to_cpu(sb->max_retry_exceeded);
823+
stats->rnr_naks_rcvd = le64_to_cpu(sb->rnr_naks_rcvd);
824+
stats->missing_resp = le64_to_cpu(sb->missing_resp);
825+
stats->unrecoverable_err = le64_to_cpu(sb->unrecoverable_err);
826+
stats->bad_resp_err = le64_to_cpu(sb->bad_resp_err);
827+
stats->local_qp_op_err = le64_to_cpu(sb->local_qp_op_err);
828+
stats->local_protection_err = le64_to_cpu(sb->local_protection_err);
829+
stats->mem_mgmt_op_err = le64_to_cpu(sb->mem_mgmt_op_err);
830+
stats->remote_invalid_req_err = le64_to_cpu(sb->remote_invalid_req_err);
831+
stats->remote_access_err = le64_to_cpu(sb->remote_access_err);
832+
stats->remote_op_err = le64_to_cpu(sb->remote_op_err);
833+
stats->dup_req = le64_to_cpu(sb->dup_req);
834+
stats->res_exceed_max = le64_to_cpu(sb->res_exceed_max);
835+
stats->res_length_mismatch = le64_to_cpu(sb->res_length_mismatch);
836+
stats->res_exceeds_wqe = le64_to_cpu(sb->res_exceeds_wqe);
837+
stats->res_opcode_err = le64_to_cpu(sb->res_opcode_err);
838+
stats->res_rx_invalid_rkey = le64_to_cpu(sb->res_rx_invalid_rkey);
839+
stats->res_rx_domain_err = le64_to_cpu(sb->res_rx_domain_err);
840+
stats->res_rx_no_perm = le64_to_cpu(sb->res_rx_no_perm);
841+
stats->res_rx_range_err = le64_to_cpu(sb->res_rx_range_err);
842+
stats->res_tx_invalid_rkey = le64_to_cpu(sb->res_tx_invalid_rkey);
843+
stats->res_tx_domain_err = le64_to_cpu(sb->res_tx_domain_err);
844+
stats->res_tx_no_perm = le64_to_cpu(sb->res_tx_no_perm);
845+
stats->res_tx_range_err = le64_to_cpu(sb->res_tx_range_err);
846+
stats->res_irrq_oflow = le64_to_cpu(sb->res_irrq_oflow);
847+
stats->res_unsup_opcode = le64_to_cpu(sb->res_unsup_opcode);
848+
stats->res_unaligned_atomic = le64_to_cpu(sb->res_unaligned_atomic);
849+
stats->res_rem_inv_err = le64_to_cpu(sb->res_rem_inv_err);
850+
stats->res_mem_error = le64_to_cpu(sb->res_mem_error);
851+
stats->res_srq_err = le64_to_cpu(sb->res_srq_err);
852+
stats->res_cmp_err = le64_to_cpu(sb->res_cmp_err);
853+
stats->res_invalid_dup_rkey = le64_to_cpu(sb->res_invalid_dup_rkey);
854+
stats->res_wqe_format_err = le64_to_cpu(sb->res_wqe_format_err);
855+
stats->res_cq_load_err = le64_to_cpu(sb->res_cq_load_err);
856+
stats->res_srq_load_err = le64_to_cpu(sb->res_srq_load_err);
857+
stats->res_tx_pci_err = le64_to_cpu(sb->res_tx_pci_err);
858+
stats->res_rx_pci_err = le64_to_cpu(sb->res_rx_pci_err);
859+
bail:
860+
bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf);
861+
return rc;
862+
}

0 commit comments

Comments
 (0)