Skip to content

Commit 9cbed5a

Browse files
chiarameiohasrleon
authored andcommitted
RDMA/nldev: Add support for RDMA monitoring
Introduce a new netlink command to allow rdma event monitoring. The rdma events supported now are IB device registration/unregistration and net device attachment/detachment. Example output of rdma monitor and the commands which trigger the events: $ rdma monitor $ rmmod mlx5_ib [UNREGISTER] dev 1 rocep8s0f1 [UNREGISTER] dev 0 rocep8s0f0 $ modprobe mlx5_ib [REGISTER] dev 2 mlx5_0 [NETDEV_ATTACH] dev 2 mlx5_0 port 1 netdev 4 eth2 [REGISTER] dev 3 mlx5_1 [NETDEV_ATTACH] dev 3 mlx5_1 port 1 netdev 5 eth3 $ devlink dev eswitch set pci/0000:08:00.0 mode switchdev [UNREGISTER] dev 2 rocep8s0f0 [REGISTER] dev 4 mlx5_0 [NETDEV_ATTACH] dev 4 mlx5_0 port 30 netdev 4 eth2 $ echo 4 > /sys/class/net/eth2/device/sriov_numvfs [NETDEV_ATTACH] dev 4 rdmap8s0f0 port 2 netdev 7 eth4 [NETDEV_ATTACH] dev 4 rdmap8s0f0 port 3 netdev 8 eth5 [NETDEV_ATTACH] dev 4 rdmap8s0f0 port 4 netdev 9 eth6 [NETDEV_ATTACH] dev 4 rdmap8s0f0 port 5 netdev 10 eth7 [REGISTER] dev 5 mlx5_0 [NETDEV_ATTACH] dev 5 mlx5_0 port 1 netdev 11 eth8 [REGISTER] dev 6 mlx5_0 [NETDEV_ATTACH] dev 6 mlx5_0 port 1 netdev 12 eth9 [REGISTER] dev 7 mlx5_0 [NETDEV_ATTACH] dev 7 mlx5_0 port 1 netdev 13 eth10 [REGISTER] dev 8 mlx5_0 [NETDEV_ATTACH] dev 8 mlx5_0 port 1 netdev 14 eth11 $ echo 0 > /sys/class/net/eth2/device/sriov_numvfs [UNREGISTER] dev 5 rocep8s0f0v0 [UNREGISTER] dev 6 rocep8s0f0v1 [UNREGISTER] dev 7 rocep8s0f0v2 [UNREGISTER] dev 8 rocep8s0f0v3 [NETDEV_DETACH] dev 4 rdmap8s0f0 port 2 [NETDEV_DETACH] dev 4 rdmap8s0f0 port 3 [NETDEV_DETACH] dev 4 rdmap8s0f0 port 4 [NETDEV_DETACH] dev 4 rdmap8s0f0 port 5 Signed-off-by: Chiara Meiohas <[email protected]> Signed-off-by: Michael Guralnik <[email protected]> Link: https://patch.msgid.link/[email protected] Signed-off-by: Leon Romanovsky <[email protected]>
1 parent 8d159eb commit 9cbed5a

File tree

5 files changed

+187
-0
lines changed

5 files changed

+187
-0
lines changed

drivers/infiniband/core/device.c

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1351,6 +1351,29 @@ static void prevent_dealloc_device(struct ib_device *ib_dev)
13511351
{
13521352
}
13531353

1354+
static void ib_device_notify_register(struct ib_device *device)
1355+
{
1356+
struct net_device *netdev;
1357+
u32 port;
1358+
int ret;
1359+
1360+
ret = rdma_nl_notify_event(device, 0, RDMA_REGISTER_EVENT);
1361+
if (ret)
1362+
return;
1363+
1364+
rdma_for_each_port(device, port) {
1365+
netdev = ib_device_get_netdev(device, port);
1366+
if (!netdev)
1367+
continue;
1368+
1369+
ret = rdma_nl_notify_event(device, port,
1370+
RDMA_NETDEV_ATTACH_EVENT);
1371+
dev_put(netdev);
1372+
if (ret)
1373+
return;
1374+
}
1375+
}
1376+
13541377
/**
13551378
* ib_register_device - Register an IB device with IB core
13561379
* @device: Device to register
@@ -1449,6 +1472,8 @@ int ib_register_device(struct ib_device *device, const char *name,
14491472
dev_set_uevent_suppress(&device->dev, false);
14501473
/* Mark for userspace that device is ready */
14511474
kobject_uevent(&device->dev.kobj, KOBJ_ADD);
1475+
1476+
ib_device_notify_register(device);
14521477
ib_device_put(device);
14531478

14541479
return 0;
@@ -1491,6 +1516,7 @@ static void __ib_unregister_device(struct ib_device *ib_dev)
14911516
goto out;
14921517

14931518
disable_device(ib_dev);
1519+
rdma_nl_notify_event(ib_dev, 0, RDMA_UNREGISTER_EVENT);
14941520

14951521
/* Expedite removing unregistered pointers from the hash table */
14961522
free_netdevs(ib_dev);
@@ -2159,6 +2185,7 @@ static void add_ndev_hash(struct ib_port_data *pdata)
21592185
int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev,
21602186
u32 port)
21612187
{
2188+
enum rdma_nl_notify_event_type etype;
21622189
struct net_device *old_ndev;
21632190
struct ib_port_data *pdata;
21642191
unsigned long flags;
@@ -2190,6 +2217,14 @@ int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev,
21902217
spin_unlock_irqrestore(&pdata->netdev_lock, flags);
21912218

21922219
add_ndev_hash(pdata);
2220+
2221+
/* Make sure that the device is registered before we send events */
2222+
if (xa_load(&devices, ib_dev->index) != ib_dev)
2223+
return 0;
2224+
2225+
etype = ndev ? RDMA_NETDEV_ATTACH_EVENT : RDMA_NETDEV_DETACH_EVENT;
2226+
rdma_nl_notify_event(ib_dev, port, etype);
2227+
21932228
return 0;
21942229
}
21952230
EXPORT_SYMBOL(ib_device_set_netdev);

drivers/infiniband/core/netlink.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,7 @@ int rdma_nl_net_init(struct rdma_dev_net *rnet)
311311
struct net *net = read_pnet(&rnet->net);
312312
struct netlink_kernel_cfg cfg = {
313313
.input = rdma_nl_rcv,
314+
.flags = NL_CFG_F_NONROOT_RECV,
314315
};
315316
struct sock *nls;
316317

drivers/infiniband/core/nldev.c

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,7 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
170170
[RDMA_NLDEV_ATTR_DEV_TYPE] = { .type = NLA_U8 },
171171
[RDMA_NLDEV_ATTR_PARENT_NAME] = { .type = NLA_NUL_STRING },
172172
[RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE] = { .type = NLA_U8 },
173+
[RDMA_NLDEV_ATTR_EVENT_TYPE] = { .type = NLA_U8 },
173174
};
174175

175176
static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
@@ -2722,6 +2723,129 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
27222723
},
27232724
};
27242725

2726+
static int fill_mon_netdev_association(struct sk_buff *msg,
2727+
struct ib_device *device, u32 port,
2728+
const struct net *net)
2729+
{
2730+
struct net_device *netdev = ib_device_get_netdev(device, port);
2731+
int ret = 0;
2732+
2733+
if (netdev && !net_eq(dev_net(netdev), net))
2734+
goto out;
2735+
2736+
ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index);
2737+
if (ret)
2738+
goto out;
2739+
2740+
ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME,
2741+
dev_name(&device->dev));
2742+
if (ret)
2743+
goto out;
2744+
2745+
ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port);
2746+
if (ret)
2747+
goto out;
2748+
2749+
if (netdev) {
2750+
ret = nla_put_u32(msg,
2751+
RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
2752+
if (ret)
2753+
goto out;
2754+
2755+
ret = nla_put_string(msg,
2756+
RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name);
2757+
}
2758+
2759+
out:
2760+
dev_put(netdev);
2761+
return ret;
2762+
}
2763+
2764+
static void rdma_nl_notify_err_msg(struct ib_device *device, u32 port_num,
2765+
enum rdma_nl_notify_event_type type)
2766+
{
2767+
struct net_device *netdev;
2768+
2769+
switch (type) {
2770+
case RDMA_REGISTER_EVENT:
2771+
dev_warn_ratelimited(&device->dev,
2772+
"Failed to send RDMA monitor register device event\n");
2773+
break;
2774+
case RDMA_UNREGISTER_EVENT:
2775+
dev_warn_ratelimited(&device->dev,
2776+
"Failed to send RDMA monitor unregister device event\n");
2777+
break;
2778+
case RDMA_NETDEV_ATTACH_EVENT:
2779+
netdev = ib_device_get_netdev(device, port_num);
2780+
dev_warn_ratelimited(&device->dev,
2781+
"Failed to send RDMA monitor netdev attach event: port %d netdev %d\n",
2782+
port_num, netdev->ifindex);
2783+
dev_put(netdev);
2784+
break;
2785+
case RDMA_NETDEV_DETACH_EVENT:
2786+
dev_warn_ratelimited(&device->dev,
2787+
"Failed to send RDMA monitor netdev detach event: port %d\n",
2788+
port_num);
2789+
default:
2790+
break;
2791+
}
2792+
}
2793+
2794+
int rdma_nl_notify_event(struct ib_device *device, u32 port_num,
2795+
enum rdma_nl_notify_event_type type)
2796+
{
2797+
struct sk_buff *skb;
2798+
struct net *net;
2799+
int ret = 0;
2800+
void *nlh;
2801+
2802+
net = read_pnet(&device->coredev.rdma_net);
2803+
if (!net)
2804+
return -EINVAL;
2805+
2806+
skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2807+
if (!skb)
2808+
return -ENOMEM;
2809+
nlh = nlmsg_put(skb, 0, 0,
2810+
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_MONITOR),
2811+
0, 0);
2812+
2813+
switch (type) {
2814+
case RDMA_REGISTER_EVENT:
2815+
case RDMA_UNREGISTER_EVENT:
2816+
ret = fill_nldev_handle(skb, device);
2817+
if (ret)
2818+
goto err_free;
2819+
break;
2820+
case RDMA_NETDEV_ATTACH_EVENT:
2821+
case RDMA_NETDEV_DETACH_EVENT:
2822+
ret = fill_mon_netdev_association(skb, device,
2823+
port_num, net);
2824+
if (ret)
2825+
goto err_free;
2826+
break;
2827+
default:
2828+
break;
2829+
}
2830+
2831+
ret = nla_put_u8(skb, RDMA_NLDEV_ATTR_EVENT_TYPE, type);
2832+
if (ret)
2833+
goto err_free;
2834+
2835+
nlmsg_end(skb, nlh);
2836+
ret = rdma_nl_multicast(net, skb, RDMA_NL_GROUP_NOTIFY, GFP_KERNEL);
2837+
if (ret && ret != -ESRCH) {
2838+
skb = NULL; /* skb is freed in the netlink send-op handling */
2839+
goto err_free;
2840+
}
2841+
return 0;
2842+
2843+
err_free:
2844+
rdma_nl_notify_err_msg(device, port_num, type);
2845+
nlmsg_free(skb);
2846+
return ret;
2847+
}
2848+
27252849
void __init nldev_init(void)
27262850
{
27272851
rdma_nl_register(RDMA_NL_NLDEV, nldev_cb_table);

include/rdma/rdma_netlink.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
#include <linux/netlink.h>
77
#include <uapi/rdma/rdma_netlink.h>
88

9+
struct ib_device;
10+
911
enum {
1012
RDMA_NLDEV_ATTR_EMPTY_STRING = 1,
1113
RDMA_NLDEV_ATTR_ENTRY_STRLEN = 16,
@@ -110,6 +112,16 @@ int rdma_nl_multicast(struct net *net, struct sk_buff *skb,
110112
*/
111113
bool rdma_nl_chk_listeners(unsigned int group);
112114

115+
/**
116+
* Prepare and send an event message
117+
* @ib: the IB device which triggered the event
118+
* @port_num: the port number which triggered the event - 0 if unused
119+
* @type: the event type
120+
* Returns 0 on success or a negative error code
121+
*/
122+
int rdma_nl_notify_event(struct ib_device *ib, u32 port_num,
123+
enum rdma_nl_notify_event_type type);
124+
113125
struct rdma_link_ops {
114126
struct list_head list;
115127
const char *type;

include/uapi/rdma/rdma_netlink.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ enum {
1515
enum {
1616
RDMA_NL_GROUP_IWPM = 2,
1717
RDMA_NL_GROUP_LS,
18+
RDMA_NL_GROUP_NOTIFY,
1819
RDMA_NL_NUM_GROUPS
1920
};
2021

@@ -305,6 +306,8 @@ enum rdma_nldev_command {
305306

306307
RDMA_NLDEV_CMD_DELDEV,
307308

309+
RDMA_NLDEV_CMD_MONITOR,
310+
308311
RDMA_NLDEV_NUM_OPS
309312
};
310313

@@ -574,6 +577,8 @@ enum rdma_nldev_attr {
574577

575578
RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE, /* u8 */
576579

580+
RDMA_NLDEV_ATTR_EVENT_TYPE, /* u8 */
581+
577582
/*
578583
* Always the end
579584
*/
@@ -624,4 +629,14 @@ enum rdma_nl_name_assign_type {
624629
RDMA_NAME_ASSIGN_TYPE_USER = 1, /* Provided by user-space */
625630
};
626631

632+
/*
633+
* Supported rdma monitoring event types.
634+
*/
635+
enum rdma_nl_notify_event_type {
636+
RDMA_REGISTER_EVENT,
637+
RDMA_UNREGISTER_EVENT,
638+
RDMA_NETDEV_ATTACH_EVENT,
639+
RDMA_NETDEV_DETACH_EVENT,
640+
};
641+
627642
#endif /* _UAPI_RDMA_NETLINK_H */

0 commit comments

Comments
 (0)