Skip to content

Commit 76f06cb

Browse files
committed
Merge branch 'netdevsim-link'
David Wei says: ==================== netdevsim: link and forward skbs between ports This patchset adds the ability to link two netdevsim ports together and forward skbs between them, similar to veth. The goal is to use netdevsim for testing features e.g. zero copy Rx using io_uring. This feature was tested locally on QEMU, and a selftest is included. I ran netdev selftests CI style and all tests but the following passed: - gro.sh - l2tp.sh - ip_local_port_range.sh gro.sh fails because virtme-ng mounts as read-only and it tries to write to log.txt. This issue was reported to virtme-ng upstream. l2tp.sh and ip_local_port_range.sh both fail for me on net-next/main as well. --- v13->v14: - implement ndo_get_iflink() - fix returning 0 if peer is already linked during linking or not linked during unlinking - bump dropped counter if nsim_ipsec_tx() fails and generally reorder nsim_start_xmit() - fix overflowing lines and indentations v12->v13: - wait for socat listening port to be ready before sending data in selftest v11->v12: - fix leaked netns refs - fix rtnetlink.sh kci_test_ipsec_offload() selftest v10->v11: - add udevadm settle after creating netdevsims in selftest v9->v10: - fix not freeing skb when not there is no peer - prevent possible id clashes in selftest - cleanup selftest on error paths v8->v9: - switch to getting netns using fd rather than id - prevent linking a netdevsim to itself - update tests v7->v8: - fix not dereferencing RCU ptr using rcu_dereference() - remove unused variables in selftest v6->v7: - change link syntax to netnsid:ifidx - replace dev_get_by_index() with __dev_get_by_index() - check for NULL peer when linking - add a sysfs attribute for unlinking - only update Tx stats if not dropped - update selftest v5->v6: - reworked to link two netdevsims using sysfs attribute on the bus device instead of debugfs due to deadlock possibility if a netdevsim is removed during linking - removed unnecessary patch maintaining a list of probed nsim_devs - updated selftest v4->v5: - reduce nsim_dev_list_lock critical section - fixed missing mutex unlock during unwind ladder - rework nsim_dev_peer_write synchronization to take devlink lock as well as rtnl_lock - return err msgs to user during linking if port doesn't exist or linking to self - update tx stats outside of RCU lock v3->v4: - maintain a mutex protected list of probed nsim_devs instead of using nsim_bus_dev - fixed synchronization issues by taking rtnl_lock - track tx_dropped skbs v2->v3: - take lock when traversing nsim_bus_dev_list - take device ref when getting a nsim_bus_dev - return 0 if nsim_dev_peer_read cannot find the port - address code formatting - do not hard code values in selftests - add Makefile for selftests v1->v2: - renamed debugfs file from "link" to "peer" - replaced strstep() with sscanf() for consistency - increased char[] buf sz to 22 for copying id + port from user - added err msg w/ expected fmt when linking as a hint to user - prevent linking port to itself - protect peer ptr using RCU ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents e2d890a + 8ee60f9 commit 76f06cb

File tree

6 files changed

+342
-5
lines changed

6 files changed

+342
-5
lines changed

drivers/net/netdevsim/bus.c

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,9 +232,154 @@ del_device_store(const struct bus_type *bus, const char *buf, size_t count)
232232
}
233233
static BUS_ATTR_WO(del_device);
234234

235+
static ssize_t link_device_store(const struct bus_type *bus, const char *buf, size_t count)
236+
{
237+
struct netdevsim *nsim_a, *nsim_b, *peer;
238+
struct net_device *dev_a, *dev_b;
239+
unsigned int ifidx_a, ifidx_b;
240+
int netnsfd_a, netnsfd_b, err;
241+
struct net *ns_a, *ns_b;
242+
243+
err = sscanf(buf, "%d:%u %d:%u", &netnsfd_a, &ifidx_a, &netnsfd_b,
244+
&ifidx_b);
245+
if (err != 4) {
246+
pr_err("Format for linking two devices is \"netnsfd_a:ifidx_a netnsfd_b:ifidx_b\" (int uint int uint).\n");
247+
return -EINVAL;
248+
}
249+
250+
ns_a = get_net_ns_by_fd(netnsfd_a);
251+
if (IS_ERR(ns_a)) {
252+
pr_err("Could not find netns with fd: %d\n", netnsfd_a);
253+
return -EINVAL;
254+
}
255+
256+
ns_b = get_net_ns_by_fd(netnsfd_b);
257+
if (IS_ERR(ns_b)) {
258+
pr_err("Could not find netns with fd: %d\n", netnsfd_b);
259+
put_net(ns_a);
260+
return -EINVAL;
261+
}
262+
263+
err = -EINVAL;
264+
rtnl_lock();
265+
dev_a = __dev_get_by_index(ns_a, ifidx_a);
266+
if (!dev_a) {
267+
pr_err("Could not find device with ifindex %u in netnsfd %d\n",
268+
ifidx_a, netnsfd_a);
269+
goto out_err;
270+
}
271+
272+
if (!netdev_is_nsim(dev_a)) {
273+
pr_err("Device with ifindex %u in netnsfd %d is not a netdevsim\n",
274+
ifidx_a, netnsfd_a);
275+
goto out_err;
276+
}
277+
278+
dev_b = __dev_get_by_index(ns_b, ifidx_b);
279+
if (!dev_b) {
280+
pr_err("Could not find device with ifindex %u in netnsfd %d\n",
281+
ifidx_b, netnsfd_b);
282+
goto out_err;
283+
}
284+
285+
if (!netdev_is_nsim(dev_b)) {
286+
pr_err("Device with ifindex %u in netnsfd %d is not a netdevsim\n",
287+
ifidx_b, netnsfd_b);
288+
goto out_err;
289+
}
290+
291+
if (dev_a == dev_b) {
292+
pr_err("Cannot link a netdevsim to itself\n");
293+
goto out_err;
294+
}
295+
296+
err = -EBUSY;
297+
nsim_a = netdev_priv(dev_a);
298+
peer = rtnl_dereference(nsim_a->peer);
299+
if (peer) {
300+
pr_err("Netdevsim %d:%u is already linked\n", netnsfd_a,
301+
ifidx_a);
302+
goto out_err;
303+
}
304+
305+
nsim_b = netdev_priv(dev_b);
306+
peer = rtnl_dereference(nsim_b->peer);
307+
if (peer) {
308+
pr_err("Netdevsim %d:%u is already linked\n", netnsfd_b,
309+
ifidx_b);
310+
goto out_err;
311+
}
312+
313+
err = 0;
314+
rcu_assign_pointer(nsim_a->peer, nsim_b);
315+
rcu_assign_pointer(nsim_b->peer, nsim_a);
316+
317+
out_err:
318+
put_net(ns_b);
319+
put_net(ns_a);
320+
rtnl_unlock();
321+
322+
return !err ? count : err;
323+
}
324+
static BUS_ATTR_WO(link_device);
325+
326+
static ssize_t unlink_device_store(const struct bus_type *bus, const char *buf, size_t count)
327+
{
328+
struct netdevsim *nsim, *peer;
329+
struct net_device *dev;
330+
unsigned int ifidx;
331+
int netnsfd, err;
332+
struct net *ns;
333+
334+
err = sscanf(buf, "%u:%u", &netnsfd, &ifidx);
335+
if (err != 2) {
336+
pr_err("Format for unlinking a device is \"netnsfd:ifidx\" (int uint).\n");
337+
return -EINVAL;
338+
}
339+
340+
ns = get_net_ns_by_fd(netnsfd);
341+
if (IS_ERR(ns)) {
342+
pr_err("Could not find netns with fd: %d\n", netnsfd);
343+
return -EINVAL;
344+
}
345+
346+
err = -EINVAL;
347+
rtnl_lock();
348+
dev = __dev_get_by_index(ns, ifidx);
349+
if (!dev) {
350+
pr_err("Could not find device with ifindex %u in netnsfd %d\n",
351+
ifidx, netnsfd);
352+
goto out_put_netns;
353+
}
354+
355+
if (!netdev_is_nsim(dev)) {
356+
pr_err("Device with ifindex %u in netnsfd %d is not a netdevsim\n",
357+
ifidx, netnsfd);
358+
goto out_put_netns;
359+
}
360+
361+
nsim = netdev_priv(dev);
362+
peer = rtnl_dereference(nsim->peer);
363+
if (!peer)
364+
goto out_put_netns;
365+
366+
err = 0;
367+
RCU_INIT_POINTER(nsim->peer, NULL);
368+
RCU_INIT_POINTER(peer->peer, NULL);
369+
370+
out_put_netns:
371+
put_net(ns);
372+
rtnl_unlock();
373+
374+
return !err ? count : err;
375+
}
376+
static BUS_ATTR_WO(unlink_device);
377+
235378
static struct attribute *nsim_bus_attrs[] = {
236379
&bus_attr_new_device.attr,
237380
&bus_attr_del_device.attr,
381+
&bus_attr_link_device.attr,
382+
&bus_attr_unlink_device.attr,
238383
NULL
239384
};
240385
ATTRIBUTE_GROUPS(nsim_bus);

drivers/net/netdevsim/netdev.c

Lines changed: 48 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,18 +29,35 @@
2929
static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev)
3030
{
3131
struct netdevsim *ns = netdev_priv(dev);
32+
unsigned int len = skb->len;
33+
struct netdevsim *peer_ns;
3234

35+
rcu_read_lock();
3336
if (!nsim_ipsec_tx(ns, skb))
34-
goto out;
37+
goto out_drop_free;
3538

39+
peer_ns = rcu_dereference(ns->peer);
40+
if (!peer_ns)
41+
goto out_drop_free;
42+
43+
skb_tx_timestamp(skb);
44+
if (unlikely(dev_forward_skb(peer_ns->netdev, skb) == NET_RX_DROP))
45+
goto out_drop_cnt;
46+
47+
rcu_read_unlock();
3648
u64_stats_update_begin(&ns->syncp);
3749
ns->tx_packets++;
38-
ns->tx_bytes += skb->len;
50+
ns->tx_bytes += len;
3951
u64_stats_update_end(&ns->syncp);
52+
return NETDEV_TX_OK;
4053

41-
out:
54+
out_drop_free:
4255
dev_kfree_skb(skb);
43-
56+
out_drop_cnt:
57+
rcu_read_unlock();
58+
u64_stats_update_begin(&ns->syncp);
59+
ns->tx_dropped++;
60+
u64_stats_update_end(&ns->syncp);
4461
return NETDEV_TX_OK;
4562
}
4663

@@ -70,6 +87,7 @@ nsim_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
7087
start = u64_stats_fetch_begin(&ns->syncp);
7188
stats->tx_bytes = ns->tx_bytes;
7289
stats->tx_packets = ns->tx_packets;
90+
stats->tx_dropped = ns->tx_dropped;
7391
} while (u64_stats_fetch_retry(&ns->syncp, start));
7492
}
7593

@@ -265,6 +283,21 @@ nsim_set_features(struct net_device *dev, netdev_features_t features)
265283
return 0;
266284
}
267285

286+
static int nsim_get_iflink(const struct net_device *dev)
287+
{
288+
struct netdevsim *nsim, *peer;
289+
int iflink;
290+
291+
nsim = netdev_priv(dev);
292+
293+
rcu_read_lock();
294+
peer = rcu_dereference(nsim->peer);
295+
iflink = peer ? READ_ONCE(peer->netdev->ifindex) : 0;
296+
rcu_read_unlock();
297+
298+
return iflink;
299+
}
300+
268301
static const struct net_device_ops nsim_netdev_ops = {
269302
.ndo_start_xmit = nsim_start_xmit,
270303
.ndo_set_rx_mode = nsim_set_rx_mode,
@@ -282,6 +315,7 @@ static const struct net_device_ops nsim_netdev_ops = {
282315
.ndo_set_vf_rss_query_en = nsim_set_vf_rss_query_en,
283316
.ndo_setup_tc = nsim_setup_tc,
284317
.ndo_set_features = nsim_set_features,
318+
.ndo_get_iflink = nsim_get_iflink,
285319
.ndo_bpf = nsim_bpf,
286320
};
287321

@@ -302,7 +336,6 @@ static void nsim_setup(struct net_device *dev)
302336
eth_hw_addr_random(dev);
303337

304338
dev->tx_queue_len = 0;
305-
dev->flags |= IFF_NOARP;
306339
dev->flags &= ~IFF_MULTICAST;
307340
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE |
308341
IFF_NO_QUEUE;
@@ -413,8 +446,13 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port)
413446
void nsim_destroy(struct netdevsim *ns)
414447
{
415448
struct net_device *dev = ns->netdev;
449+
struct netdevsim *peer;
416450

417451
rtnl_lock();
452+
peer = rtnl_dereference(ns->peer);
453+
if (peer)
454+
RCU_INIT_POINTER(peer->peer, NULL);
455+
RCU_INIT_POINTER(ns->peer, NULL);
418456
unregister_netdevice(dev);
419457
if (nsim_dev_port_is_pf(ns->nsim_dev_port)) {
420458
nsim_macsec_teardown(ns);
@@ -427,6 +465,11 @@ void nsim_destroy(struct netdevsim *ns)
427465
free_netdev(dev);
428466
}
429467

468+
bool netdev_is_nsim(struct net_device *dev)
469+
{
470+
return dev->netdev_ops == &nsim_netdev_ops;
471+
}
472+
430473
static int nsim_validate(struct nlattr *tb[], struct nlattr *data[],
431474
struct netlink_ext_ack *extack)
432475
{

drivers/net/netdevsim/netdevsim.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ struct netdevsim {
9898

9999
u64 tx_packets;
100100
u64 tx_bytes;
101+
u64 tx_dropped;
101102
struct u64_stats_sync syncp;
102103

103104
struct nsim_bus_dev *nsim_bus_dev;
@@ -125,11 +126,13 @@ struct netdevsim {
125126
} udp_ports;
126127

127128
struct nsim_ethtool ethtool;
129+
struct netdevsim __rcu *peer;
128130
};
129131

130132
struct netdevsim *
131133
nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port);
132134
void nsim_destroy(struct netdevsim *ns);
135+
bool netdev_is_nsim(struct net_device *dev);
133136

134137
void nsim_ethtool_init(struct netdevsim *ns);
135138

tools/testing/selftests/drivers/net/netdevsim/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ TEST_PROGS = devlink.sh \
1010
fib.sh \
1111
hw_stats_l3.sh \
1212
nexthop.sh \
13+
peer.sh \
1314
psample.sh \
1415
tc-mq-visibility.sh \
1516
udp_tunnel_nic.sh \

0 commit comments

Comments
 (0)