Skip to content

Commit b5fa635

Browse files
Leon Romanovskyjgunthorpe
authored andcommitted
RDMA/nldev: Provide detailed QP information
Implement RDMA nldev netlink interface to get detailed information on each QP in the system. This includes the owning process or kernel ULP and detailed information from the qp_attrs. Currently only the dumpit variant is implemented. Reviewed-by: Mark Bloch <[email protected]> Signed-off-by: Leon Romanovsky <[email protected]> Reviewed-by: Steve Wise <[email protected]> Signed-off-by: Jason Gunthorpe <[email protected]>
1 parent bf3c5a9 commit b5fa635

File tree

2 files changed

+269
-0
lines changed

2 files changed

+269
-0
lines changed

drivers/infiniband/core/nldev.c

Lines changed: 227 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,18 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
5959
[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME] = { .type = NLA_NUL_STRING,
6060
.len = 16 },
6161
[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR] = { .type = NLA_U64 },
62+
[RDMA_NLDEV_ATTR_RES_QP] = { .type = NLA_NESTED },
63+
[RDMA_NLDEV_ATTR_RES_QP_ENTRY] = { .type = NLA_NESTED },
64+
[RDMA_NLDEV_ATTR_RES_LQPN] = { .type = NLA_U32 },
65+
[RDMA_NLDEV_ATTR_RES_RQPN] = { .type = NLA_U32 },
66+
[RDMA_NLDEV_ATTR_RES_RQ_PSN] = { .type = NLA_U32 },
67+
[RDMA_NLDEV_ATTR_RES_SQ_PSN] = { .type = NLA_U32 },
68+
[RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE] = { .type = NLA_U8 },
69+
[RDMA_NLDEV_ATTR_RES_TYPE] = { .type = NLA_U8 },
70+
[RDMA_NLDEV_ATTR_RES_STATE] = { .type = NLA_U8 },
71+
[RDMA_NLDEV_ATTR_RES_PID] = { .type = NLA_U32 },
72+
[RDMA_NLDEV_ATTR_RES_KERN_NAME] = { .type = NLA_NUL_STRING,
73+
.len = TASK_COMM_LEN },
6274
};
6375

6476
static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
@@ -200,6 +212,78 @@ static int fill_res_info(struct sk_buff *msg, struct ib_device *device)
200212
return ret;
201213
}
202214

215+
static int fill_res_qp_entry(struct sk_buff *msg,
216+
struct ib_qp *qp, uint32_t port)
217+
{
218+
struct rdma_restrack_entry *res = &qp->res;
219+
struct ib_qp_init_attr qp_init_attr;
220+
struct nlattr *entry_attr;
221+
struct ib_qp_attr qp_attr;
222+
int ret;
223+
224+
ret = ib_query_qp(qp, &qp_attr, 0, &qp_init_attr);
225+
if (ret)
226+
return ret;
227+
228+
if (port && port != qp_attr.port_num)
229+
return 0;
230+
231+
entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY);
232+
if (!entry_attr)
233+
goto out;
234+
235+
/* In create_qp() port is not set yet */
236+
if (qp_attr.port_num &&
237+
nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp_attr.port_num))
238+
goto err;
239+
240+
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num))
241+
goto err;
242+
if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC) {
243+
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQPN,
244+
qp_attr.dest_qp_num))
245+
goto err;
246+
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQ_PSN,
247+
qp_attr.rq_psn))
248+
goto err;
249+
}
250+
251+
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SQ_PSN, qp_attr.sq_psn))
252+
goto err;
253+
254+
if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC ||
255+
qp->qp_type == IB_QPT_XRC_INI || qp->qp_type == IB_QPT_XRC_TGT) {
256+
if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE,
257+
qp_attr.path_mig_state))
258+
goto err;
259+
}
260+
if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, qp->qp_type))
261+
goto err;
262+
if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state))
263+
goto err;
264+
265+
/*
266+
* Existence of task means that it is user QP and netlink
267+
* user is invited to go and read /proc/PID/comm to get name
268+
* of the task file and res->task_com should be NULL.
269+
*/
270+
if (rdma_is_kernel_res(res)) {
271+
if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME, res->kern_name))
272+
goto err;
273+
} else {
274+
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, task_pid_vnr(res->task)))
275+
goto err;
276+
}
277+
278+
nla_nest_end(msg, entry_attr);
279+
return 0;
280+
281+
err:
282+
nla_nest_cancel(msg, entry_attr);
283+
out:
284+
return -EMSGSIZE;
285+
}
286+
203287
static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
204288
struct netlink_ext_ack *extack)
205289
{
@@ -472,6 +556,136 @@ static int nldev_res_get_dumpit(struct sk_buff *skb,
472556
return ib_enum_all_devs(_nldev_res_get_dumpit, skb, cb);
473557
}
474558

559+
static int nldev_res_get_qp_dumpit(struct sk_buff *skb,
560+
struct netlink_callback *cb)
561+
{
562+
struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
563+
struct rdma_restrack_entry *res;
564+
int err, ret = 0, idx = 0;
565+
struct nlattr *table_attr;
566+
struct ib_device *device;
567+
int start = cb->args[0];
568+
struct ib_qp *qp = NULL;
569+
struct nlmsghdr *nlh;
570+
u32 index, port = 0;
571+
572+
err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
573+
nldev_policy, NULL);
574+
/*
575+
* Right now, we are expecting the device index to get QP information,
576+
* but it is possible to extend this code to return all devices in
577+
* one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX.
578+
* if it doesn't exist, we will iterate over all devices.
579+
*
580+
* But it is not needed for now.
581+
*/
582+
if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
583+
return -EINVAL;
584+
585+
index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
586+
device = ib_device_get_by_index(index);
587+
if (!device)
588+
return -EINVAL;
589+
590+
/*
591+
* If no PORT_INDEX is supplied, we will return all QPs from that device
592+
*/
593+
if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
594+
port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
595+
if (!rdma_is_port_valid(device, port)) {
596+
ret = -EINVAL;
597+
goto err_index;
598+
}
599+
}
600+
601+
nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
602+
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_QP_GET),
603+
0, NLM_F_MULTI);
604+
605+
if (fill_nldev_handle(skb, device)) {
606+
ret = -EMSGSIZE;
607+
goto err;
608+
}
609+
610+
table_attr = nla_nest_start(skb, RDMA_NLDEV_ATTR_RES_QP);
611+
if (!table_attr) {
612+
ret = -EMSGSIZE;
613+
goto err;
614+
}
615+
616+
down_read(&device->res.rwsem);
617+
hash_for_each_possible(device->res.hash, res, node, RDMA_RESTRACK_QP) {
618+
if (idx < start)
619+
goto next;
620+
621+
if ((rdma_is_kernel_res(res) &&
622+
task_active_pid_ns(current) != &init_pid_ns) ||
623+
(!rdma_is_kernel_res(res) &&
624+
task_active_pid_ns(current) != task_active_pid_ns(res->task)))
625+
/*
626+
* 1. Kernel QPs should be visible in init namspace only
627+
* 2. Present only QPs visible in the current namespace
628+
*/
629+
goto next;
630+
631+
if (!rdma_restrack_get(res))
632+
/*
633+
* Resource is under release now, but we are not
634+
* relesing lock now, so it will be released in
635+
* our next pass, once we will get ->next pointer.
636+
*/
637+
goto next;
638+
639+
qp = container_of(res, struct ib_qp, res);
640+
641+
up_read(&device->res.rwsem);
642+
ret = fill_res_qp_entry(skb, qp, port);
643+
down_read(&device->res.rwsem);
644+
/*
645+
* Return resource back, but it won't be released till
646+
* the &device->res.rwsem will be released for write.
647+
*/
648+
rdma_restrack_put(res);
649+
650+
if (ret == -EMSGSIZE)
651+
/*
652+
* There is a chance to optimize here.
653+
* It can be done by using list_prepare_entry
654+
* and list_for_each_entry_continue afterwards.
655+
*/
656+
break;
657+
if (ret)
658+
goto res_err;
659+
next: idx++;
660+
}
661+
up_read(&device->res.rwsem);
662+
663+
nla_nest_end(skb, table_attr);
664+
nlmsg_end(skb, nlh);
665+
cb->args[0] = idx;
666+
667+
/*
668+
* No more QPs to fill, cancel the message and
669+
* return 0 to mark end of dumpit.
670+
*/
671+
if (!qp)
672+
goto err;
673+
674+
put_device(&device->dev);
675+
return skb->len;
676+
677+
res_err:
678+
nla_nest_cancel(skb, table_attr);
679+
up_read(&device->res.rwsem);
680+
681+
err:
682+
nlmsg_cancel(skb, nlh);
683+
684+
err_index:
685+
put_device(&device->dev);
686+
return ret;
687+
}
688+
475689
static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
476690
[RDMA_NLDEV_CMD_GET] = {
477691
.doit = nldev_get_doit,
@@ -485,6 +699,19 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
485699
.doit = nldev_res_get_doit,
486700
.dump = nldev_res_get_dumpit,
487701
},
702+
[RDMA_NLDEV_CMD_RES_QP_GET] = {
703+
.dump = nldev_res_get_qp_dumpit,
704+
/*
705+
* .doit is not implemented yet for two reasons:
706+
* 1. It is not needed yet.
707+
* 2. There is a need to provide identifier, while it is easy
708+
* for the QPs (device index + port index + LQPN), it is not
709+
* the case for the rest of resources (PD and CQ). Because it
710+
* is better to provide similar interface for all resources,
711+
* let's wait till we will have other resources implemented
712+
* too.
713+
*/
714+
},
488715
};
489716

490717
void __init nldev_init(void)

include/uapi/rdma/rdma_netlink.h

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,8 @@ enum rdma_nldev_command {
238238

239239
RDMA_NLDEV_CMD_RES_GET, /* can dump */
240240

241+
RDMA_NLDEV_CMD_RES_QP_GET, /* can dump */
242+
241243
RDMA_NLDEV_NUM_OPS
242244
};
243245

@@ -310,6 +312,46 @@ enum rdma_nldev_attr {
310312
RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME, /* string */
311313
RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR, /* u64 */
312314

315+
RDMA_NLDEV_ATTR_RES_QP, /* nested table */
316+
RDMA_NLDEV_ATTR_RES_QP_ENTRY, /* nested table */
317+
/*
318+
* Local QPN
319+
*/
320+
RDMA_NLDEV_ATTR_RES_LQPN, /* u32 */
321+
/*
322+
* Remote QPN,
323+
* Applicable for RC and UC only IBTA 11.2.5.3 QUERY QUEUE PAIR
324+
*/
325+
RDMA_NLDEV_ATTR_RES_RQPN, /* u32 */
326+
/*
327+
* Receive Queue PSN,
328+
* Applicable for RC and UC only 11.2.5.3 QUERY QUEUE PAIR
329+
*/
330+
RDMA_NLDEV_ATTR_RES_RQ_PSN, /* u32 */
331+
/*
332+
* Send Queue PSN
333+
*/
334+
RDMA_NLDEV_ATTR_RES_SQ_PSN, /* u32 */
335+
RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE, /* u8 */
336+
/*
337+
* QP types as visible to RDMA/core, the reserved QPT
338+
* are not exported through this interface.
339+
*/
340+
RDMA_NLDEV_ATTR_RES_TYPE, /* u8 */
341+
RDMA_NLDEV_ATTR_RES_STATE, /* u8 */
342+
/*
343+
* Process ID which created object,
344+
* in case of kernel origin, PID won't exist.
345+
*/
346+
RDMA_NLDEV_ATTR_RES_PID, /* u32 */
347+
/*
348+
* The name of process created following resource.
349+
* It will exist only for kernel objects.
350+
* For user created objects, the user is supposed
351+
* to read /proc/PID/comm file.
352+
*/
353+
RDMA_NLDEV_ATTR_RES_KERN_NAME, /* string */
354+
313355
RDMA_NLDEV_ATTR_MAX
314356
};
315357
#endif /* _UAPI_RDMA_NETLINK_H */

0 commit comments

Comments
 (0)