Skip to content

Commit acdefab

Browse files
committed
Merge branch 'net-improve-core-queue-api-handling-while-device-is-down'
Jakub Kicinski says: ==================== net: improve core queue API handling while device is down The core netdev_rx_queue_restart() doesn't currently take into account that the device may be down. The current and proposed queue API implementations deal with this by rejecting queue API calls while the device is down. We can do better, in theory we can still allow devmem binding when the device is down - we shouldn't stop and start the queues just try to allocate the memory. The reason we allocate the memory is that memory provider binding checks if any compatible page pool has been created (page_pool_check_memory_provider()). Alternatively we could reject installing MP while the device is down but the MP assignment survives ifdown (so presumably MP doesn't cease to exist while down), and in general we allow configuration while down. Previously I thought we need this as a fix, but gve rejects page pool calls while down, and so did Saeed in the patches he posted. So this series just makes the core act more sensibly but practically should be a noop for now. v1: https://lore.kernel.org/[email protected] ==================== Link: https://patch.msgid.link/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
2 parents 6a0ca73 + 285b3f7 commit acdefab

File tree

6 files changed

+59
-29
lines changed

6 files changed

+59
-29
lines changed

drivers/net/netdevsim/netdev.c

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -645,8 +645,11 @@ nsim_queue_mem_alloc(struct net_device *dev, void *per_queue_mem, int idx)
645645
if (ns->rq_reset_mode > 3)
646646
return -EINVAL;
647647

648-
if (ns->rq_reset_mode == 1)
648+
if (ns->rq_reset_mode == 1) {
649+
if (!netif_running(ns->netdev))
650+
return -ENETDOWN;
649651
return nsim_create_page_pool(&qmem->pp, &ns->rq[idx]->napi);
652+
}
650653

651654
qmem->rq = nsim_queue_alloc();
652655
if (!qmem->rq)
@@ -754,11 +757,6 @@ nsim_qreset_write(struct file *file, const char __user *data,
754757
return -EINVAL;
755758

756759
rtnl_lock();
757-
if (!netif_running(ns->netdev)) {
758-
ret = -ENETDOWN;
759-
goto exit_unlock;
760-
}
761-
762760
if (queue >= ns->netdev->real_num_rx_queues) {
763761
ret = -EINVAL;
764762
goto exit_unlock;

include/net/netdev_queues.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,10 @@ struct netdev_stat_ops {
117117
*
118118
* @ndo_queue_stop: Stop the RX queue at the specified index. The stopped
119119
* queue's memory is written at the specified address.
120+
*
121+
* Note that @ndo_queue_mem_alloc and @ndo_queue_mem_free may be called while
122+
* the interface is closed. @ndo_queue_start and @ndo_queue_stop will only
123+
* be called for an interface which is open.
120124
*/
121125
struct netdev_queue_mgmt_ops {
122126
size_t ndo_queue_mem_size;

net/core/dev.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,18 @@ void xdp_do_check_flushed(struct napi_struct *napi);
299299
static inline void xdp_do_check_flushed(struct napi_struct *napi) { }
300300
#endif
301301

302+
/* Best effort check that NAPI is not idle (can't be scheduled to run) */
303+
static inline void napi_assert_will_not_race(const struct napi_struct *napi)
304+
{
305+
/* uninitialized instance, can't race */
306+
if (!napi->poll_list.next)
307+
return;
308+
309+
/* SCHED bit is set on disabled instances */
310+
WARN_ON(!test_bit(NAPI_STATE_SCHED, &napi->state));
311+
WARN_ON(READ_ONCE(napi->list_owner) != -1);
312+
}
313+
302314
void kick_defer_list_purge(struct softnet_data *sd, unsigned int cpu);
303315

304316
#define XMIT_RECURSION_LIMIT 8

net/core/netdev_rx_queue.c

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -10,44 +10,47 @@
1010
int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq_idx)
1111
{
1212
struct netdev_rx_queue *rxq = __netif_get_rx_queue(dev, rxq_idx);
13+
const struct netdev_queue_mgmt_ops *qops = dev->queue_mgmt_ops;
1314
void *new_mem, *old_mem;
1415
int err;
1516

16-
if (!dev->queue_mgmt_ops || !dev->queue_mgmt_ops->ndo_queue_stop ||
17-
!dev->queue_mgmt_ops->ndo_queue_mem_free ||
18-
!dev->queue_mgmt_ops->ndo_queue_mem_alloc ||
19-
!dev->queue_mgmt_ops->ndo_queue_start)
17+
if (!qops || !qops->ndo_queue_stop || !qops->ndo_queue_mem_free ||
18+
!qops->ndo_queue_mem_alloc || !qops->ndo_queue_start)
2019
return -EOPNOTSUPP;
2120

2221
ASSERT_RTNL();
2322

24-
new_mem = kvzalloc(dev->queue_mgmt_ops->ndo_queue_mem_size, GFP_KERNEL);
23+
new_mem = kvzalloc(qops->ndo_queue_mem_size, GFP_KERNEL);
2524
if (!new_mem)
2625
return -ENOMEM;
2726

28-
old_mem = kvzalloc(dev->queue_mgmt_ops->ndo_queue_mem_size, GFP_KERNEL);
27+
old_mem = kvzalloc(qops->ndo_queue_mem_size, GFP_KERNEL);
2928
if (!old_mem) {
3029
err = -ENOMEM;
3130
goto err_free_new_mem;
3231
}
3332

34-
err = dev->queue_mgmt_ops->ndo_queue_mem_alloc(dev, new_mem, rxq_idx);
33+
err = qops->ndo_queue_mem_alloc(dev, new_mem, rxq_idx);
3534
if (err)
3635
goto err_free_old_mem;
3736

3837
err = page_pool_check_memory_provider(dev, rxq);
3938
if (err)
4039
goto err_free_new_queue_mem;
4140

42-
err = dev->queue_mgmt_ops->ndo_queue_stop(dev, old_mem, rxq_idx);
43-
if (err)
44-
goto err_free_new_queue_mem;
41+
if (netif_running(dev)) {
42+
err = qops->ndo_queue_stop(dev, old_mem, rxq_idx);
43+
if (err)
44+
goto err_free_new_queue_mem;
4545

46-
err = dev->queue_mgmt_ops->ndo_queue_start(dev, new_mem, rxq_idx);
47-
if (err)
48-
goto err_start_queue;
46+
err = qops->ndo_queue_start(dev, new_mem, rxq_idx);
47+
if (err)
48+
goto err_start_queue;
49+
} else {
50+
swap(new_mem, old_mem);
51+
}
4952

50-
dev->queue_mgmt_ops->ndo_queue_mem_free(dev, old_mem);
53+
qops->ndo_queue_mem_free(dev, old_mem);
5154

5255
kvfree(old_mem);
5356
kvfree(new_mem);
@@ -62,15 +65,15 @@ int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq_idx)
6265
* WARN if we fail to recover the old rx queue, and at least free
6366
* old_mem so we don't also leak that.
6467
*/
65-
if (dev->queue_mgmt_ops->ndo_queue_start(dev, old_mem, rxq_idx)) {
68+
if (qops->ndo_queue_start(dev, old_mem, rxq_idx)) {
6669
WARN(1,
6770
"Failed to restart old queue in error path. RX queue %d may be unhealthy.",
6871
rxq_idx);
69-
dev->queue_mgmt_ops->ndo_queue_mem_free(dev, old_mem);
72+
qops->ndo_queue_mem_free(dev, old_mem);
7073
}
7174

7275
err_free_new_queue_mem:
73-
dev->queue_mgmt_ops->ndo_queue_mem_free(dev, new_mem);
76+
qops->ndo_queue_mem_free(dev, new_mem);
7477

7578
err_free_old_mem:
7679
kvfree(old_mem);

net/core/page_pool.c

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626

2727
#include <trace/events/page_pool.h>
2828

29+
#include "dev.h"
2930
#include "mp_dmabuf_devmem.h"
3031
#include "netmem_priv.h"
3132
#include "page_pool_priv.h"
@@ -1147,11 +1148,7 @@ void page_pool_disable_direct_recycling(struct page_pool *pool)
11471148
if (!pool->p.napi)
11481149
return;
11491150

1150-
/* To avoid races with recycling and additional barriers make sure
1151-
* pool and NAPI are unlinked when NAPI is disabled.
1152-
*/
1153-
WARN_ON(!test_bit(NAPI_STATE_SCHED, &pool->p.napi->state));
1154-
WARN_ON(READ_ONCE(pool->p.napi->list_owner) != -1);
1151+
napi_assert_will_not_race(pool->p.napi);
11551152

11561153
mutex_lock(&page_pools_lock);
11571154
WRITE_ONCE(pool->p.napi, NULL);

tools/testing/selftests/net/nl_netdev.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,21 @@ def napi_list_check(nf) -> None:
3535
comment=f"queue count after reset queue {q} mode {i}")
3636

3737

38+
def nsim_rxq_reset_down(nf) -> None:
39+
"""
40+
Test that the queue API supports resetting a queue
41+
while the interface is down. We should convert this
42+
test to testing real HW once more devices support
43+
queue API.
44+
"""
45+
with NetdevSimDev(queue_count=4) as nsimdev:
46+
nsim = nsimdev.nsims[0]
47+
48+
ip(f"link set dev {nsim.ifname} down")
49+
for i in [0, 2, 3]:
50+
nsim.dfs_write("queue_reset", f"1 {i}")
51+
52+
3853
def page_pool_check(nf) -> None:
3954
with NetdevSimDev() as nsimdev:
4055
nsim = nsimdev.nsims[0]
@@ -106,7 +121,8 @@ def get_pp():
106121

107122
def main() -> None:
108123
nf = NetdevFamily()
109-
ksft_run([empty_check, lo_check, page_pool_check, napi_list_check],
124+
ksft_run([empty_check, lo_check, page_pool_check, napi_list_check,
125+
nsim_rxq_reset_down],
110126
args=(nf, ))
111127
ksft_exit()
112128

0 commit comments

Comments
 (0)