Skip to content

Commit 29863d4

Browse files
tracywwnjdavem330
authored andcommitted
net: implement threaded-able napi poll loop support
This patch allows running each napi poll loop inside its own kernel thread. The kthread is created during netif_napi_add() if dev->threaded is set. And threaded mode is enabled in napi_enable(). We will provide a way to set dev->threaded and enable threaded mode without a device up/down in the following patch. Once that threaded mode is enabled and the kthread is started, napi_schedule() will wake-up such thread instead of scheduling the softirq. The threaded poll loop behaves quite likely the net_rx_action, but it does not have to manipulate local irqs and uses an explicit scheduling point based on netdev_budget. Co-developed-by: Paolo Abeni <[email protected]> Signed-off-by: Paolo Abeni <[email protected]> Co-developed-by: Hannes Frederic Sowa <[email protected]> Signed-off-by: Hannes Frederic Sowa <[email protected]> Co-developed-by: Jakub Kicinski <[email protected]> Signed-off-by: Jakub Kicinski <[email protected]> Signed-off-by: Wei Wang <[email protected]> Reviewed-by: Alexander Duyck <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 898f801 commit 29863d4

File tree

2 files changed

+119
-14
lines changed

2 files changed

+119
-14
lines changed

include/linux/netdevice.h

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,7 @@ struct napi_struct {
347347
struct list_head dev_list;
348348
struct hlist_node napi_hash_node;
349349
unsigned int napi_id;
350+
struct task_struct *thread;
350351
};
351352

352353
enum {
@@ -358,6 +359,7 @@ enum {
358359
NAPI_STATE_NO_BUSY_POLL, /* Do not add in napi_hash, no busy polling */
359360
NAPI_STATE_IN_BUSY_POLL, /* sk_busy_loop() owns this NAPI */
360361
NAPI_STATE_PREFER_BUSY_POLL, /* prefer busy-polling over softirq processing*/
362+
NAPI_STATE_THREADED, /* The poll is performed inside its own thread*/
361363
};
362364

363365
enum {
@@ -369,6 +371,7 @@ enum {
369371
NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL),
370372
NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL),
371373
NAPIF_STATE_PREFER_BUSY_POLL = BIT(NAPI_STATE_PREFER_BUSY_POLL),
374+
NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED),
372375
};
373376

374377
enum gro_result {
@@ -503,20 +506,7 @@ static inline bool napi_complete(struct napi_struct *n)
503506
*/
504507
void napi_disable(struct napi_struct *n);
505508

506-
/**
507-
* napi_enable - enable NAPI scheduling
508-
* @n: NAPI context
509-
*
510-
* Resume NAPI from being scheduled on this context.
511-
* Must be paired with napi_disable.
512-
*/
513-
static inline void napi_enable(struct napi_struct *n)
514-
{
515-
BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
516-
smp_mb__before_atomic();
517-
clear_bit(NAPI_STATE_SCHED, &n->state);
518-
clear_bit(NAPI_STATE_NPSVC, &n->state);
519-
}
509+
void napi_enable(struct napi_struct *n);
520510

521511
/**
522512
* napi_synchronize - wait until NAPI is not running
@@ -1827,6 +1817,8 @@ enum netdev_priv_flags {
18271817
*
18281818
* @wol_enabled: Wake-on-LAN is enabled
18291819
*
1820+
* @threaded: napi threaded mode is enabled
1821+
*
18301822
* @net_notifier_list: List of per-net netdev notifier block
18311823
* that follow this device when it is moved
18321824
* to another network namespace.
@@ -2145,6 +2137,7 @@ struct net_device {
21452137
struct lock_class_key *qdisc_running_key;
21462138
bool proto_down;
21472139
unsigned wol_enabled:1;
2140+
unsigned threaded:1;
21482141

21492142
struct list_head net_notifier_list;
21502143

net/core/dev.c

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@
9191
#include <linux/etherdevice.h>
9292
#include <linux/ethtool.h>
9393
#include <linux/skbuff.h>
94+
#include <linux/kthread.h>
9495
#include <linux/bpf.h>
9596
#include <linux/bpf_trace.h>
9697
#include <net/net_namespace.h>
@@ -1494,6 +1495,27 @@ void netdev_notify_peers(struct net_device *dev)
14941495
}
14951496
EXPORT_SYMBOL(netdev_notify_peers);
14961497

1498+
static int napi_threaded_poll(void *data);
1499+
1500+
static int napi_kthread_create(struct napi_struct *n)
1501+
{
1502+
int err = 0;
1503+
1504+
/* Create and wake up the kthread once to put it in
1505+
* TASK_INTERRUPTIBLE mode to avoid the blocked task
1506+
* warning and work with loadavg.
1507+
*/
1508+
n->thread = kthread_run(napi_threaded_poll, n, "napi/%s-%d",
1509+
n->dev->name, n->napi_id);
1510+
if (IS_ERR(n->thread)) {
1511+
err = PTR_ERR(n->thread);
1512+
pr_err("kthread_run failed with err %d\n", err);
1513+
n->thread = NULL;
1514+
}
1515+
1516+
return err;
1517+
}
1518+
14971519
static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
14981520
{
14991521
const struct net_device_ops *ops = dev->netdev_ops;
@@ -4265,6 +4287,21 @@ int gro_normal_batch __read_mostly = 8;
42654287
static inline void ____napi_schedule(struct softnet_data *sd,
42664288
struct napi_struct *napi)
42674289
{
4290+
struct task_struct *thread;
4291+
4292+
if (test_bit(NAPI_STATE_THREADED, &napi->state)) {
4293+
/* Paired with smp_mb__before_atomic() in
4294+
* napi_enable(). Use READ_ONCE() to guarantee
4295+
* a complete read on napi->thread. Only call
4296+
* wake_up_process() when it's not NULL.
4297+
*/
4298+
thread = READ_ONCE(napi->thread);
4299+
if (thread) {
4300+
wake_up_process(thread);
4301+
return;
4302+
}
4303+
}
4304+
42684305
list_add_tail(&napi->poll_list, &sd->poll_list);
42694306
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
42704307
}
@@ -6728,6 +6765,12 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
67286765
set_bit(NAPI_STATE_NPSVC, &napi->state);
67296766
list_add_rcu(&napi->dev_list, &dev->napi_list);
67306767
napi_hash_add(napi);
6768+
/* Create kthread for this napi if dev->threaded is set.
6769+
* Clear dev->threaded if kthread creation failed so that
6770+
* threaded mode will not be enabled in napi_enable().
6771+
*/
6772+
if (dev->threaded && napi_kthread_create(napi))
6773+
dev->threaded = 0;
67316774
}
67326775
EXPORT_SYMBOL(netif_napi_add);
67336776

@@ -6745,9 +6788,28 @@ void napi_disable(struct napi_struct *n)
67456788

67466789
clear_bit(NAPI_STATE_PREFER_BUSY_POLL, &n->state);
67476790
clear_bit(NAPI_STATE_DISABLE, &n->state);
6791+
clear_bit(NAPI_STATE_THREADED, &n->state);
67486792
}
67496793
EXPORT_SYMBOL(napi_disable);
67506794

6795+
/**
6796+
* napi_enable - enable NAPI scheduling
6797+
* @n: NAPI context
6798+
*
6799+
* Resume NAPI from being scheduled on this context.
6800+
* Must be paired with napi_disable.
6801+
*/
6802+
void napi_enable(struct napi_struct *n)
6803+
{
6804+
BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
6805+
smp_mb__before_atomic();
6806+
clear_bit(NAPI_STATE_SCHED, &n->state);
6807+
clear_bit(NAPI_STATE_NPSVC, &n->state);
6808+
if (n->dev->threaded && n->thread)
6809+
set_bit(NAPI_STATE_THREADED, &n->state);
6810+
}
6811+
EXPORT_SYMBOL(napi_enable);
6812+
67516813
static void flush_gro_hash(struct napi_struct *napi)
67526814
{
67536815
int i;
@@ -6773,6 +6835,11 @@ void __netif_napi_del(struct napi_struct *napi)
67736835

67746836
flush_gro_hash(napi);
67756837
napi->gro_bitmask = 0;
6838+
6839+
if (napi->thread) {
6840+
kthread_stop(napi->thread);
6841+
napi->thread = NULL;
6842+
}
67766843
}
67776844
EXPORT_SYMBOL(__netif_napi_del);
67786845

@@ -6867,6 +6934,51 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
68676934
return work;
68686935
}
68696936

6937+
static int napi_thread_wait(struct napi_struct *napi)
6938+
{
6939+
set_current_state(TASK_INTERRUPTIBLE);
6940+
6941+
while (!kthread_should_stop() && !napi_disable_pending(napi)) {
6942+
if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
6943+
WARN_ON(!list_empty(&napi->poll_list));
6944+
__set_current_state(TASK_RUNNING);
6945+
return 0;
6946+
}
6947+
6948+
schedule();
6949+
set_current_state(TASK_INTERRUPTIBLE);
6950+
}
6951+
__set_current_state(TASK_RUNNING);
6952+
return -1;
6953+
}
6954+
6955+
static int napi_threaded_poll(void *data)
6956+
{
6957+
struct napi_struct *napi = data;
6958+
void *have;
6959+
6960+
while (!napi_thread_wait(napi)) {
6961+
for (;;) {
6962+
bool repoll = false;
6963+
6964+
local_bh_disable();
6965+
6966+
have = netpoll_poll_lock(napi);
6967+
__napi_poll(napi, &repoll);
6968+
netpoll_poll_unlock(have);
6969+
6970+
__kfree_skb_flush();
6971+
local_bh_enable();
6972+
6973+
if (!repoll)
6974+
break;
6975+
6976+
cond_resched();
6977+
}
6978+
}
6979+
return 0;
6980+
}
6981+
68706982
static __latent_entropy void net_rx_action(struct softirq_action *h)
68716983
{
68726984
struct softnet_data *sd = this_cpu_ptr(&softnet_data);

0 commit comments

Comments
 (0)