|
42 | 42 | #include <linux/seq_file.h>
|
43 | 43 | #include <linux/compat.h>
|
44 | 44 | #include <linux/rculist.h>
|
| 45 | +#include <net/busy_poll.h> |
45 | 46 |
|
46 | 47 | /*
|
47 | 48 | * LOCKING:
|
@@ -224,6 +225,11 @@ struct eventpoll {
|
224 | 225 | /* used to optimize loop detection check */
|
225 | 226 | int visited;
|
226 | 227 | struct list_head visited_list_link;
|
| 228 | + |
| 229 | +#ifdef CONFIG_NET_RX_BUSY_POLL |
| 230 | + /* used to track busy poll napi_id */ |
| 231 | + unsigned int napi_id; |
| 232 | +#endif |
227 | 233 | };
|
228 | 234 |
|
229 | 235 | /* Wait structure used by the poll hooks */
|
@@ -384,6 +390,77 @@ static inline int ep_events_available(struct eventpoll *ep)
|
384 | 390 | return !list_empty(&ep->rdllist) || ep->ovflist != EP_UNACTIVE_PTR;
|
385 | 391 | }
|
386 | 392 |
|
| 393 | +#ifdef CONFIG_NET_RX_BUSY_POLL |
| 394 | +static bool ep_busy_loop_end(void *p, unsigned long start_time) |
| 395 | +{ |
| 396 | + struct eventpoll *ep = p; |
| 397 | + |
| 398 | + return ep_events_available(ep) || busy_loop_timeout(start_time); |
| 399 | +} |
| 400 | +#endif /* CONFIG_NET_RX_BUSY_POLL */ |
| 401 | + |
| 402 | +/* |
| 403 | + * Busy poll if globally on and supporting sockets found && no events, |
| 404 | + * busy loop will return if need_resched or ep_events_available. |
| 405 | + * |
| 406 | + * we must do our busy polling with irqs enabled |
| 407 | + */ |
| 408 | +static void ep_busy_loop(struct eventpoll *ep, int nonblock) |
| 409 | +{ |
| 410 | +#ifdef CONFIG_NET_RX_BUSY_POLL |
| 411 | + unsigned int napi_id = READ_ONCE(ep->napi_id); |
| 412 | + |
| 413 | + if ((napi_id >= MIN_NAPI_ID) && net_busy_loop_on()) |
| 414 | + napi_busy_loop(napi_id, nonblock ? NULL : ep_busy_loop_end, ep); |
| 415 | +#endif |
| 416 | +} |
| 417 | + |
| 418 | +static inline void ep_reset_busy_poll_napi_id(struct eventpoll *ep) |
| 419 | +{ |
| 420 | +#ifdef CONFIG_NET_RX_BUSY_POLL |
| 421 | + if (ep->napi_id) |
| 422 | + ep->napi_id = 0; |
| 423 | +#endif |
| 424 | +} |
| 425 | + |
| 426 | +/* |
| 427 | + * Set epoll busy poll NAPI ID from sk. |
| 428 | + */ |
| 429 | +static inline void ep_set_busy_poll_napi_id(struct epitem *epi) |
| 430 | +{ |
| 431 | +#ifdef CONFIG_NET_RX_BUSY_POLL |
| 432 | + struct eventpoll *ep; |
| 433 | + unsigned int napi_id; |
| 434 | + struct socket *sock; |
| 435 | + struct sock *sk; |
| 436 | + int err; |
| 437 | + |
| 438 | + if (!net_busy_loop_on()) |
| 439 | + return; |
| 440 | + |
| 441 | + sock = sock_from_file(epi->ffd.file, &err); |
| 442 | + if (!sock) |
| 443 | + return; |
| 444 | + |
| 445 | + sk = sock->sk; |
| 446 | + if (!sk) |
| 447 | + return; |
| 448 | + |
| 449 | + napi_id = READ_ONCE(sk->sk_napi_id); |
| 450 | + ep = epi->ep; |
| 451 | + |
| 452 | + /* Non-NAPI IDs can be rejected |
| 453 | + * or |
| 454 | + * Nothing to do if we already have this ID |
| 455 | + */ |
| 456 | + if (napi_id < MIN_NAPI_ID || napi_id == ep->napi_id) |
| 457 | + return; |
| 458 | + |
| 459 | + /* record NAPI ID for use in next busy poll */ |
| 460 | + ep->napi_id = napi_id; |
| 461 | +#endif |
| 462 | +} |
| 463 | + |
387 | 464 | /**
|
388 | 465 | * ep_call_nested - Perform a bound (possibly) nested call, by checking
|
389 | 466 | * that the recursion limit is not exceeded, and that
|
@@ -1022,6 +1099,8 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
|
1022 | 1099 |
|
1023 | 1100 | spin_lock_irqsave(&ep->lock, flags);
|
1024 | 1101 |
|
| 1102 | + ep_set_busy_poll_napi_id(epi); |
| 1103 | + |
1025 | 1104 | /*
|
1026 | 1105 | * If the event mask does not contain any poll(2) event, we consider the
|
1027 | 1106 | * descriptor to be disabled. This condition is likely the effect of the
|
@@ -1363,6 +1442,9 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
|
1363 | 1442 | /* We have to drop the new item inside our item list to keep track of it */
|
1364 | 1443 | spin_lock_irqsave(&ep->lock, flags);
|
1365 | 1444 |
|
| 1445 | + /* record NAPI ID of new item if present */ |
| 1446 | + ep_set_busy_poll_napi_id(epi); |
| 1447 | + |
1366 | 1448 | /* If the file is already "ready" we drop it inside the ready list */
|
1367 | 1449 | if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) {
|
1368 | 1450 | list_add_tail(&epi->rdllink, &ep->rdllist);
|
@@ -1637,9 +1719,20 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
|
1637 | 1719 | }
|
1638 | 1720 |
|
1639 | 1721 | fetch_events:
|
| 1722 | + |
| 1723 | + if (!ep_events_available(ep)) |
| 1724 | + ep_busy_loop(ep, timed_out); |
| 1725 | + |
1640 | 1726 | spin_lock_irqsave(&ep->lock, flags);
|
1641 | 1727 |
|
1642 | 1728 | if (!ep_events_available(ep)) {
|
| 1729 | + /* |
| 1730 | + * Busy poll timed out. Drop NAPI ID for now, we can add |
| 1731 | + * it back in when we have moved a socket with a valid NAPI |
| 1732 | + * ID onto the ready list. |
| 1733 | + */ |
| 1734 | + ep_reset_busy_poll_napi_id(ep); |
| 1735 | + |
1643 | 1736 | /*
|
1644 | 1737 | * We don't have any available event to return to the caller.
|
1645 | 1738 | * We need to sleep here, and we will be wake up by
|
|
0 commit comments