@@ -59,6 +59,18 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
59
59
[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME ] = { .type = NLA_NUL_STRING ,
60
60
.len = 16 },
61
61
[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR ] = { .type = NLA_U64 },
62
+ [RDMA_NLDEV_ATTR_RES_QP ] = { .type = NLA_NESTED },
63
+ [RDMA_NLDEV_ATTR_RES_QP_ENTRY ] = { .type = NLA_NESTED },
64
+ [RDMA_NLDEV_ATTR_RES_LQPN ] = { .type = NLA_U32 },
65
+ [RDMA_NLDEV_ATTR_RES_RQPN ] = { .type = NLA_U32 },
66
+ [RDMA_NLDEV_ATTR_RES_RQ_PSN ] = { .type = NLA_U32 },
67
+ [RDMA_NLDEV_ATTR_RES_SQ_PSN ] = { .type = NLA_U32 },
68
+ [RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE ] = { .type = NLA_U8 },
69
+ [RDMA_NLDEV_ATTR_RES_TYPE ] = { .type = NLA_U8 },
70
+ [RDMA_NLDEV_ATTR_RES_STATE ] = { .type = NLA_U8 },
71
+ [RDMA_NLDEV_ATTR_RES_PID ] = { .type = NLA_U32 },
72
+ [RDMA_NLDEV_ATTR_RES_KERN_NAME ] = { .type = NLA_NUL_STRING ,
73
+ .len = TASK_COMM_LEN },
62
74
};
63
75
64
76
static int fill_nldev_handle (struct sk_buff * msg , struct ib_device * device )
@@ -200,6 +212,78 @@ static int fill_res_info(struct sk_buff *msg, struct ib_device *device)
200
212
return ret ;
201
213
}
202
214
215
+ static int fill_res_qp_entry (struct sk_buff * msg ,
216
+ struct ib_qp * qp , uint32_t port )
217
+ {
218
+ struct rdma_restrack_entry * res = & qp -> res ;
219
+ struct ib_qp_init_attr qp_init_attr ;
220
+ struct nlattr * entry_attr ;
221
+ struct ib_qp_attr qp_attr ;
222
+ int ret ;
223
+
224
+ ret = ib_query_qp (qp , & qp_attr , 0 , & qp_init_attr );
225
+ if (ret )
226
+ return ret ;
227
+
228
+ if (port && port != qp_attr .port_num )
229
+ return 0 ;
230
+
231
+ entry_attr = nla_nest_start (msg , RDMA_NLDEV_ATTR_RES_QP_ENTRY );
232
+ if (!entry_attr )
233
+ goto out ;
234
+
235
+ /* In create_qp() port is not set yet */
236
+ if (qp_attr .port_num &&
237
+ nla_put_u32 (msg , RDMA_NLDEV_ATTR_PORT_INDEX , qp_attr .port_num ))
238
+ goto err ;
239
+
240
+ if (nla_put_u32 (msg , RDMA_NLDEV_ATTR_RES_LQPN , qp -> qp_num ))
241
+ goto err ;
242
+ if (qp -> qp_type == IB_QPT_RC || qp -> qp_type == IB_QPT_UC ) {
243
+ if (nla_put_u32 (msg , RDMA_NLDEV_ATTR_RES_RQPN ,
244
+ qp_attr .dest_qp_num ))
245
+ goto err ;
246
+ if (nla_put_u32 (msg , RDMA_NLDEV_ATTR_RES_RQ_PSN ,
247
+ qp_attr .rq_psn ))
248
+ goto err ;
249
+ }
250
+
251
+ if (nla_put_u32 (msg , RDMA_NLDEV_ATTR_RES_SQ_PSN , qp_attr .sq_psn ))
252
+ goto err ;
253
+
254
+ if (qp -> qp_type == IB_QPT_RC || qp -> qp_type == IB_QPT_UC ||
255
+ qp -> qp_type == IB_QPT_XRC_INI || qp -> qp_type == IB_QPT_XRC_TGT ) {
256
+ if (nla_put_u8 (msg , RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE ,
257
+ qp_attr .path_mig_state ))
258
+ goto err ;
259
+ }
260
+ if (nla_put_u8 (msg , RDMA_NLDEV_ATTR_RES_TYPE , qp -> qp_type ))
261
+ goto err ;
262
+ if (nla_put_u8 (msg , RDMA_NLDEV_ATTR_RES_STATE , qp_attr .qp_state ))
263
+ goto err ;
264
+
265
+ /*
266
+ * Existence of task means that it is user QP and netlink
267
+ * user is invited to go and read /proc/PID/comm to get name
268
+ * of the task file and res->task_com should be NULL.
269
+ */
270
+ if (rdma_is_kernel_res (res )) {
271
+ if (nla_put_string (msg , RDMA_NLDEV_ATTR_RES_KERN_NAME , res -> kern_name ))
272
+ goto err ;
273
+ } else {
274
+ if (nla_put_u32 (msg , RDMA_NLDEV_ATTR_RES_PID , task_pid_vnr (res -> task )))
275
+ goto err ;
276
+ }
277
+
278
+ nla_nest_end (msg , entry_attr );
279
+ return 0 ;
280
+
281
+ err :
282
+ nla_nest_cancel (msg , entry_attr );
283
+ out :
284
+ return - EMSGSIZE ;
285
+ }
286
+
203
287
static int nldev_get_doit (struct sk_buff * skb , struct nlmsghdr * nlh ,
204
288
struct netlink_ext_ack * extack )
205
289
{
@@ -472,6 +556,136 @@ static int nldev_res_get_dumpit(struct sk_buff *skb,
472
556
return ib_enum_all_devs (_nldev_res_get_dumpit , skb , cb );
473
557
}
474
558
559
+ static int nldev_res_get_qp_dumpit (struct sk_buff * skb ,
560
+ struct netlink_callback * cb )
561
+ {
562
+ struct nlattr * tb [RDMA_NLDEV_ATTR_MAX ];
563
+ struct rdma_restrack_entry * res ;
564
+ int err , ret = 0 , idx = 0 ;
565
+ struct nlattr * table_attr ;
566
+ struct ib_device * device ;
567
+ int start = cb -> args [0 ];
568
+ struct ib_qp * qp = NULL ;
569
+ struct nlmsghdr * nlh ;
570
+ u32 index , port = 0 ;
571
+
572
+ err = nlmsg_parse (cb -> nlh , 0 , tb , RDMA_NLDEV_ATTR_MAX - 1 ,
573
+ nldev_policy , NULL );
574
+ /*
575
+ * Right now, we are expecting the device index to get QP information,
576
+ * but it is possible to extend this code to return all devices in
577
+ * one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX.
578
+ * if it doesn't exist, we will iterate over all devices.
579
+ *
580
+ * But it is not needed for now.
581
+ */
582
+ if (err || !tb [RDMA_NLDEV_ATTR_DEV_INDEX ])
583
+ return - EINVAL ;
584
+
585
+ index = nla_get_u32 (tb [RDMA_NLDEV_ATTR_DEV_INDEX ]);
586
+ device = ib_device_get_by_index (index );
587
+ if (!device )
588
+ return - EINVAL ;
589
+
590
+ /*
591
+ * If no PORT_INDEX is supplied, we will return all QPs from that device
592
+ */
593
+ if (tb [RDMA_NLDEV_ATTR_PORT_INDEX ]) {
594
+ port = nla_get_u32 (tb [RDMA_NLDEV_ATTR_PORT_INDEX ]);
595
+ if (!rdma_is_port_valid (device , port )) {
596
+ ret = - EINVAL ;
597
+ goto err_index ;
598
+ }
599
+ }
600
+
601
+ nlh = nlmsg_put (skb , NETLINK_CB (cb -> skb ).portid , cb -> nlh -> nlmsg_seq ,
602
+ RDMA_NL_GET_TYPE (RDMA_NL_NLDEV , RDMA_NLDEV_CMD_RES_QP_GET ),
603
+ 0 , NLM_F_MULTI );
604
+
605
+ if (fill_nldev_handle (skb , device )) {
606
+ ret = - EMSGSIZE ;
607
+ goto err ;
608
+ }
609
+
610
+ table_attr = nla_nest_start (skb , RDMA_NLDEV_ATTR_RES_QP );
611
+ if (!table_attr ) {
612
+ ret = - EMSGSIZE ;
613
+ goto err ;
614
+ }
615
+
616
+ down_read (& device -> res .rwsem );
617
+ hash_for_each_possible (device -> res .hash , res , node , RDMA_RESTRACK_QP ) {
618
+ if (idx < start )
619
+ goto next ;
620
+
621
+ if ((rdma_is_kernel_res (res ) &&
622
+ task_active_pid_ns (current ) != & init_pid_ns ) ||
623
+ (!rdma_is_kernel_res (res ) &&
624
+ task_active_pid_ns (current ) != task_active_pid_ns (res -> task )))
625
+ /*
626
+ * 1. Kernel QPs should be visible in init namspace only
627
+ * 2. Present only QPs visible in the current namespace
628
+ */
629
+ goto next ;
630
+
631
+ if (!rdma_restrack_get (res ))
632
+ /*
633
+ * Resource is under release now, but we are not
634
+ * relesing lock now, so it will be released in
635
+ * our next pass, once we will get ->next pointer.
636
+ */
637
+ goto next ;
638
+
639
+ qp = container_of (res , struct ib_qp , res );
640
+
641
+ up_read (& device -> res .rwsem );
642
+ ret = fill_res_qp_entry (skb , qp , port );
643
+ down_read (& device -> res .rwsem );
644
+ /*
645
+ * Return resource back, but it won't be released till
646
+ * the &device->res.rwsem will be released for write.
647
+ */
648
+ rdma_restrack_put (res );
649
+
650
+ if (ret == - EMSGSIZE )
651
+ /*
652
+ * There is a chance to optimize here.
653
+ * It can be done by using list_prepare_entry
654
+ * and list_for_each_entry_continue afterwards.
655
+ */
656
+ break ;
657
+ if (ret )
658
+ goto res_err ;
659
+ next : idx ++ ;
660
+ }
661
+ up_read (& device -> res .rwsem );
662
+
663
+ nla_nest_end (skb , table_attr );
664
+ nlmsg_end (skb , nlh );
665
+ cb -> args [0 ] = idx ;
666
+
667
+ /*
668
+ * No more QPs to fill, cancel the message and
669
+ * return 0 to mark end of dumpit.
670
+ */
671
+ if (!qp )
672
+ goto err ;
673
+
674
+ put_device (& device -> dev );
675
+ return skb -> len ;
676
+
677
+ res_err :
678
+ nla_nest_cancel (skb , table_attr );
679
+ up_read (& device -> res .rwsem );
680
+
681
+ err :
682
+ nlmsg_cancel (skb , nlh );
683
+
684
+ err_index :
685
+ put_device (& device -> dev );
686
+ return ret ;
687
+ }
688
+
475
689
static const struct rdma_nl_cbs nldev_cb_table [RDMA_NLDEV_NUM_OPS ] = {
476
690
[RDMA_NLDEV_CMD_GET ] = {
477
691
.doit = nldev_get_doit ,
@@ -485,6 +699,19 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
485
699
.doit = nldev_res_get_doit ,
486
700
.dump = nldev_res_get_dumpit ,
487
701
},
702
+ [RDMA_NLDEV_CMD_RES_QP_GET ] = {
703
+ .dump = nldev_res_get_qp_dumpit ,
704
+ /*
705
+ * .doit is not implemented yet for two reasons:
706
+ * 1. It is not needed yet.
707
+ * 2. There is a need to provide identifier, while it is easy
708
+ * for the QPs (device index + port index + LQPN), it is not
709
+ * the case for the rest of resources (PD and CQ). Because it
710
+ * is better to provide similar interface for all resources,
711
+ * let's wait till we will have other resources implemented
712
+ * too.
713
+ */
714
+ },
488
715
};
489
716
490
717
void __init nldev_init (void )
0 commit comments