Skip to content

Commit 7c951ca

Browse files
Björn Töpelborkmann
authored andcommitted
net: Add SO_BUSY_POLL_BUDGET socket option
This option lets a user set a per socket NAPI budget for busy-polling. If the options is not set, it will use the default of 8. Signed-off-by: Björn Töpel <[email protected]> Signed-off-by: Daniel Borkmann <[email protected]> Reviewed-by: Jakub Kicinski <[email protected]> Link: https://lore.kernel.org/bpf/[email protected]
1 parent 7fd3253 commit 7c951ca

File tree

10 files changed

+34
-14
lines changed

10 files changed

+34
-14
lines changed

arch/alpha/include/uapi/asm/socket.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@
125125
#define SO_DETACH_REUSEPORT_BPF 68
126126

127127
#define SO_PREFER_BUSY_POLL 69
128+
#define SO_BUSY_POLL_BUDGET 70
128129

129130
#if !defined(__KERNEL__)
130131

arch/mips/include/uapi/asm/socket.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@
136136
#define SO_DETACH_REUSEPORT_BPF 68
137137

138138
#define SO_PREFER_BUSY_POLL 69
139+
#define SO_BUSY_POLL_BUDGET 70
139140

140141
#if !defined(__KERNEL__)
141142

arch/parisc/include/uapi/asm/socket.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@
117117
#define SO_DETACH_REUSEPORT_BPF 0x4042
118118

119119
#define SO_PREFER_BUSY_POLL 0x4043
120+
#define SO_BUSY_POLL_BUDGET 0x4044
120121

121122
#if !defined(__KERNEL__)
122123

arch/sparc/include/uapi/asm/socket.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@
118118
#define SO_DETACH_REUSEPORT_BPF 0x0047
119119

120120
#define SO_PREFER_BUSY_POLL 0x0048
121+
#define SO_BUSY_POLL_BUDGET 0x0049
121122

122123
#if !defined(__KERNEL__)
123124

fs/eventpoll.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -397,7 +397,8 @@ static void ep_busy_loop(struct eventpoll *ep, int nonblock)
397397
unsigned int napi_id = READ_ONCE(ep->napi_id);
398398

399399
if ((napi_id >= MIN_NAPI_ID) && net_busy_loop_on())
400-
napi_busy_loop(napi_id, nonblock ? NULL : ep_busy_loop_end, ep, false);
400+
napi_busy_loop(napi_id, nonblock ? NULL : ep_busy_loop_end, ep, false,
401+
BUSY_POLL_BUDGET);
401402
}
402403

403404
static inline void ep_reset_busy_poll_napi_id(struct eventpoll *ep)

include/net/busy_poll.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
*/
2424
#define MIN_NAPI_ID ((unsigned int)(NR_CPUS + 1))
2525

26+
#define BUSY_POLL_BUDGET 8
27+
2628
#ifdef CONFIG_NET_RX_BUSY_POLL
2729

2830
struct napi_struct;
@@ -43,7 +45,7 @@ bool sk_busy_loop_end(void *p, unsigned long start_time);
4345

4446
void napi_busy_loop(unsigned int napi_id,
4547
bool (*loop_end)(void *, unsigned long),
46-
void *loop_end_arg, bool prefer_busy_poll);
48+
void *loop_end_arg, bool prefer_busy_poll, u16 budget);
4749

4850
#else /* CONFIG_NET_RX_BUSY_POLL */
4951
static inline unsigned long net_busy_loop_on(void)
@@ -106,7 +108,8 @@ static inline void sk_busy_loop(struct sock *sk, int nonblock)
106108

107109
if (napi_id >= MIN_NAPI_ID)
108110
napi_busy_loop(napi_id, nonblock ? NULL : sk_busy_loop_end, sk,
109-
READ_ONCE(sk->sk_prefer_busy_poll));
111+
READ_ONCE(sk->sk_prefer_busy_poll),
112+
READ_ONCE(sk->sk_busy_poll_budget) ?: BUSY_POLL_BUDGET);
110113
#endif
111114
}
112115

include/net/sock.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,7 @@ struct bpf_local_storage;
302302
* @sk_max_ack_backlog: listen backlog set in listen()
303303
* @sk_uid: user id of owner
304304
* @sk_prefer_busy_poll: prefer busypolling over softirq processing
305+
* @sk_busy_poll_budget: napi processing budget when busypolling
305306
* @sk_priority: %SO_PRIORITY setting
306307
* @sk_type: socket type (%SOCK_STREAM, etc)
307308
* @sk_protocol: which protocol this socket belongs in this network family
@@ -482,6 +483,7 @@ struct sock {
482483
kuid_t sk_uid;
483484
#ifdef CONFIG_NET_RX_BUSY_POLL
484485
u8 sk_prefer_busy_poll;
486+
u16 sk_busy_poll_budget;
485487
#endif
486488
struct pid *sk_peer_pid;
487489
const struct cred *sk_peer_cred;

include/uapi/asm-generic/socket.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@
120120
#define SO_DETACH_REUSEPORT_BPF 68
121121

122122
#define SO_PREFER_BUSY_POLL 69
123+
#define SO_BUSY_POLL_BUDGET 70
123124

124125
#if !defined(__KERNEL__)
125126

net/core/dev.c

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6496,8 +6496,6 @@ static struct napi_struct *napi_by_id(unsigned int napi_id)
64966496

64976497
#if defined(CONFIG_NET_RX_BUSY_POLL)
64986498

6499-
#define BUSY_POLL_BUDGET 8
6500-
65016499
static void __busy_poll_stop(struct napi_struct *napi, bool skip_schedule)
65026500
{
65036501
if (!skip_schedule) {
@@ -6517,7 +6515,8 @@ static void __busy_poll_stop(struct napi_struct *napi, bool skip_schedule)
65176515
clear_bit(NAPI_STATE_SCHED, &napi->state);
65186516
}
65196517

6520-
static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, bool prefer_busy_poll)
6518+
static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, bool prefer_busy_poll,
6519+
u16 budget)
65216520
{
65226521
bool skip_schedule = false;
65236522
unsigned long timeout;
@@ -6549,21 +6548,21 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, bool
65496548
/* All we really want here is to re-enable device interrupts.
65506549
* Ideally, a new ndo_busy_poll_stop() could avoid another round.
65516550
*/
6552-
rc = napi->poll(napi, BUSY_POLL_BUDGET);
6551+
rc = napi->poll(napi, budget);
65536552
/* We can't gro_normal_list() here, because napi->poll() might have
65546553
* rearmed the napi (napi_complete_done()) in which case it could
65556554
* already be running on another CPU.
65566555
*/
6557-
trace_napi_poll(napi, rc, BUSY_POLL_BUDGET);
6556+
trace_napi_poll(napi, rc, budget);
65586557
netpoll_poll_unlock(have_poll_lock);
6559-
if (rc == BUSY_POLL_BUDGET)
6558+
if (rc == budget)
65606559
__busy_poll_stop(napi, skip_schedule);
65616560
local_bh_enable();
65626561
}
65636562

65646563
void napi_busy_loop(unsigned int napi_id,
65656564
bool (*loop_end)(void *, unsigned long),
6566-
void *loop_end_arg, bool prefer_busy_poll)
6565+
void *loop_end_arg, bool prefer_busy_poll, u16 budget)
65676566
{
65686567
unsigned long start_time = loop_end ? busy_loop_current_time() : 0;
65696568
int (*napi_poll)(struct napi_struct *napi, int budget);
@@ -6606,8 +6605,8 @@ void napi_busy_loop(unsigned int napi_id,
66066605
have_poll_lock = netpoll_poll_lock(napi);
66076606
napi_poll = napi->poll;
66086607
}
6609-
work = napi_poll(napi, BUSY_POLL_BUDGET);
6610-
trace_napi_poll(napi, work, BUSY_POLL_BUDGET);
6608+
work = napi_poll(napi, budget);
6609+
trace_napi_poll(napi, work, budget);
66116610
gro_normal_list(napi);
66126611
count:
66136612
if (work > 0)
@@ -6620,7 +6619,7 @@ void napi_busy_loop(unsigned int napi_id,
66206619

66216620
if (unlikely(need_resched())) {
66226621
if (napi_poll)
6623-
busy_poll_stop(napi, have_poll_lock, prefer_busy_poll);
6622+
busy_poll_stop(napi, have_poll_lock, prefer_busy_poll, budget);
66246623
preempt_enable();
66256624
rcu_read_unlock();
66266625
cond_resched();
@@ -6631,7 +6630,7 @@ void napi_busy_loop(unsigned int napi_id,
66316630
cpu_relax();
66326631
}
66336632
if (napi_poll)
6634-
busy_poll_stop(napi, have_poll_lock, prefer_busy_poll);
6633+
busy_poll_stop(napi, have_poll_lock, prefer_busy_poll, budget);
66356634
preempt_enable();
66366635
out:
66376636
rcu_read_unlock();

net/core/sock.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1165,6 +1165,16 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
11651165
else
11661166
WRITE_ONCE(sk->sk_prefer_busy_poll, valbool);
11671167
break;
1168+
case SO_BUSY_POLL_BUDGET:
1169+
if (val > READ_ONCE(sk->sk_busy_poll_budget) && !capable(CAP_NET_ADMIN)) {
1170+
ret = -EPERM;
1171+
} else {
1172+
if (val < 0 || val > U16_MAX)
1173+
ret = -EINVAL;
1174+
else
1175+
WRITE_ONCE(sk->sk_busy_poll_budget, val);
1176+
}
1177+
break;
11681178
#endif
11691179

11701180
case SO_MAX_PACING_RATE:

0 commit comments

Comments
 (0)