|
| 1 | +// SPDX-License-Identifier: GPL-2.0 |
| 2 | + |
| 3 | +#include "io_uring.h" |
| 4 | +#include "napi.h" |
| 5 | + |
| 6 | +#ifdef CONFIG_NET_RX_BUSY_POLL |
| 7 | + |
| 8 | +/* Timeout for cleanout of stale entries. */ |
| 9 | +#define NAPI_TIMEOUT (60 * SEC_CONVERSION) |
| 10 | + |
| 11 | +struct io_napi_entry { |
| 12 | + unsigned int napi_id; |
| 13 | + struct list_head list; |
| 14 | + |
| 15 | + unsigned long timeout; |
| 16 | + struct hlist_node node; |
| 17 | + |
| 18 | + struct rcu_head rcu; |
| 19 | +}; |
| 20 | + |
| 21 | +static struct io_napi_entry *io_napi_hash_find(struct hlist_head *hash_list, |
| 22 | + unsigned int napi_id) |
| 23 | +{ |
| 24 | + struct io_napi_entry *e; |
| 25 | + |
| 26 | + hlist_for_each_entry_rcu(e, hash_list, node) { |
| 27 | + if (e->napi_id != napi_id) |
| 28 | + continue; |
| 29 | + e->timeout = jiffies + NAPI_TIMEOUT; |
| 30 | + return e; |
| 31 | + } |
| 32 | + |
| 33 | + return NULL; |
| 34 | +} |
| 35 | + |
| 36 | +void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock) |
| 37 | +{ |
| 38 | + struct hlist_head *hash_list; |
| 39 | + unsigned int napi_id; |
| 40 | + struct sock *sk; |
| 41 | + struct io_napi_entry *e; |
| 42 | + |
| 43 | + sk = sock->sk; |
| 44 | + if (!sk) |
| 45 | + return; |
| 46 | + |
| 47 | + napi_id = READ_ONCE(sk->sk_napi_id); |
| 48 | + |
| 49 | + /* Non-NAPI IDs can be rejected. */ |
| 50 | + if (napi_id < MIN_NAPI_ID) |
| 51 | + return; |
| 52 | + |
| 53 | + hash_list = &ctx->napi_ht[hash_min(napi_id, HASH_BITS(ctx->napi_ht))]; |
| 54 | + |
| 55 | + rcu_read_lock(); |
| 56 | + e = io_napi_hash_find(hash_list, napi_id); |
| 57 | + if (e) { |
| 58 | + e->timeout = jiffies + NAPI_TIMEOUT; |
| 59 | + rcu_read_unlock(); |
| 60 | + return; |
| 61 | + } |
| 62 | + rcu_read_unlock(); |
| 63 | + |
| 64 | + e = kmalloc(sizeof(*e), GFP_NOWAIT); |
| 65 | + if (!e) |
| 66 | + return; |
| 67 | + |
| 68 | + e->napi_id = napi_id; |
| 69 | + e->timeout = jiffies + NAPI_TIMEOUT; |
| 70 | + |
| 71 | + spin_lock(&ctx->napi_lock); |
| 72 | + if (unlikely(io_napi_hash_find(hash_list, napi_id))) { |
| 73 | + spin_unlock(&ctx->napi_lock); |
| 74 | + kfree(e); |
| 75 | + return; |
| 76 | + } |
| 77 | + |
| 78 | + hlist_add_tail_rcu(&e->node, hash_list); |
| 79 | + list_add_tail(&e->list, &ctx->napi_list); |
| 80 | + spin_unlock(&ctx->napi_lock); |
| 81 | +} |
| 82 | + |
| 83 | +static void __io_napi_remove_stale(struct io_ring_ctx *ctx) |
| 84 | +{ |
| 85 | + struct io_napi_entry *e; |
| 86 | + unsigned int i; |
| 87 | + |
| 88 | + spin_lock(&ctx->napi_lock); |
| 89 | + hash_for_each(ctx->napi_ht, i, e, node) { |
| 90 | + if (time_after(jiffies, e->timeout)) { |
| 91 | + list_del(&e->list); |
| 92 | + hash_del_rcu(&e->node); |
| 93 | + kfree_rcu(e, rcu); |
| 94 | + } |
| 95 | + } |
| 96 | + spin_unlock(&ctx->napi_lock); |
| 97 | +} |
| 98 | + |
| 99 | +static inline void io_napi_remove_stale(struct io_ring_ctx *ctx, bool is_stale) |
| 100 | +{ |
| 101 | + if (is_stale) |
| 102 | + __io_napi_remove_stale(ctx); |
| 103 | +} |
| 104 | + |
| 105 | +static inline bool io_napi_busy_loop_timeout(unsigned long start_time, |
| 106 | + unsigned long bp_usec) |
| 107 | +{ |
| 108 | + if (bp_usec) { |
| 109 | + unsigned long end_time = start_time + bp_usec; |
| 110 | + unsigned long now = busy_loop_current_time(); |
| 111 | + |
| 112 | + return time_after(now, end_time); |
| 113 | + } |
| 114 | + |
| 115 | + return true; |
| 116 | +} |
| 117 | + |
| 118 | +static bool io_napi_busy_loop_should_end(void *data, |
| 119 | + unsigned long start_time) |
| 120 | +{ |
| 121 | + struct io_wait_queue *iowq = data; |
| 122 | + |
| 123 | + if (signal_pending(current)) |
| 124 | + return true; |
| 125 | + if (io_should_wake(iowq)) |
| 126 | + return true; |
| 127 | + if (io_napi_busy_loop_timeout(start_time, iowq->napi_busy_poll_to)) |
| 128 | + return true; |
| 129 | + |
| 130 | + return false; |
| 131 | +} |
| 132 | + |
| 133 | +static bool __io_napi_do_busy_loop(struct io_ring_ctx *ctx, |
| 134 | + void *loop_end_arg) |
| 135 | +{ |
| 136 | + struct io_napi_entry *e; |
| 137 | + bool (*loop_end)(void *, unsigned long) = NULL; |
| 138 | + bool is_stale = false; |
| 139 | + |
| 140 | + if (loop_end_arg) |
| 141 | + loop_end = io_napi_busy_loop_should_end; |
| 142 | + |
| 143 | + list_for_each_entry_rcu(e, &ctx->napi_list, list) { |
| 144 | + napi_busy_loop_rcu(e->napi_id, loop_end, loop_end_arg, |
| 145 | + ctx->napi_prefer_busy_poll, BUSY_POLL_BUDGET); |
| 146 | + |
| 147 | + if (time_after(jiffies, e->timeout)) |
| 148 | + is_stale = true; |
| 149 | + } |
| 150 | + |
| 151 | + return is_stale; |
| 152 | +} |
| 153 | + |
| 154 | +static void io_napi_blocking_busy_loop(struct io_ring_ctx *ctx, |
| 155 | + struct io_wait_queue *iowq) |
| 156 | +{ |
| 157 | + unsigned long start_time = busy_loop_current_time(); |
| 158 | + void *loop_end_arg = NULL; |
| 159 | + bool is_stale = false; |
| 160 | + |
| 161 | + /* Singular lists use a different napi loop end check function and are |
| 162 | + * only executed once. |
| 163 | + */ |
| 164 | + if (list_is_singular(&ctx->napi_list)) |
| 165 | + loop_end_arg = iowq; |
| 166 | + |
| 167 | + rcu_read_lock(); |
| 168 | + do { |
| 169 | + is_stale = __io_napi_do_busy_loop(ctx, loop_end_arg); |
| 170 | + } while (!io_napi_busy_loop_should_end(iowq, start_time) && !loop_end_arg); |
| 171 | + rcu_read_unlock(); |
| 172 | + |
| 173 | + io_napi_remove_stale(ctx, is_stale); |
| 174 | +} |
| 175 | + |
| 176 | +/* |
| 177 | + * io_napi_init() - Init napi settings |
| 178 | + * @ctx: pointer to io-uring context structure |
| 179 | + * |
| 180 | + * Init napi settings in the io-uring context. |
| 181 | + */ |
| 182 | +void io_napi_init(struct io_ring_ctx *ctx) |
| 183 | +{ |
| 184 | + INIT_LIST_HEAD(&ctx->napi_list); |
| 185 | + spin_lock_init(&ctx->napi_lock); |
| 186 | + ctx->napi_prefer_busy_poll = false; |
| 187 | + ctx->napi_busy_poll_to = READ_ONCE(sysctl_net_busy_poll); |
| 188 | +} |
| 189 | + |
| 190 | +/* |
| 191 | + * io_napi_free() - Deallocate napi |
| 192 | + * @ctx: pointer to io-uring context structure |
| 193 | + * |
| 194 | + * Free the napi list and the hash table in the io-uring context. |
| 195 | + */ |
| 196 | +void io_napi_free(struct io_ring_ctx *ctx) |
| 197 | +{ |
| 198 | + struct io_napi_entry *e; |
| 199 | + LIST_HEAD(napi_list); |
| 200 | + unsigned int i; |
| 201 | + |
| 202 | + spin_lock(&ctx->napi_lock); |
| 203 | + hash_for_each(ctx->napi_ht, i, e, node) { |
| 204 | + hash_del_rcu(&e->node); |
| 205 | + kfree_rcu(e, rcu); |
| 206 | + } |
| 207 | + spin_unlock(&ctx->napi_lock); |
| 208 | +} |
| 209 | + |
| 210 | +/* |
| 211 | + * __io_napi_adjust_timeout() - Add napi id to the busy poll list |
| 212 | + * @ctx: pointer to io-uring context structure |
| 213 | + * @iowq: pointer to io wait queue |
| 214 | + * @ts: pointer to timespec or NULL |
| 215 | + * |
| 216 | + * Adjust the busy loop timeout according to timespec and busy poll timeout. |
| 217 | + */ |
| 218 | +void __io_napi_adjust_timeout(struct io_ring_ctx *ctx, struct io_wait_queue *iowq, |
| 219 | + struct timespec64 *ts) |
| 220 | +{ |
| 221 | + unsigned int poll_to = READ_ONCE(ctx->napi_busy_poll_to); |
| 222 | + |
| 223 | + if (ts) { |
| 224 | + struct timespec64 poll_to_ts = ns_to_timespec64(1000 * (s64)poll_to); |
| 225 | + |
| 226 | + if (timespec64_compare(ts, &poll_to_ts) > 0) { |
| 227 | + *ts = timespec64_sub(*ts, poll_to_ts); |
| 228 | + } else { |
| 229 | + u64 to = timespec64_to_ns(ts); |
| 230 | + |
| 231 | + do_div(to, 1000); |
| 232 | + ts->tv_sec = 0; |
| 233 | + ts->tv_nsec = 0; |
| 234 | + } |
| 235 | + } |
| 236 | + |
| 237 | + iowq->napi_busy_poll_to = poll_to; |
| 238 | +} |
| 239 | + |
| 240 | +/* |
| 241 | + * __io_napi_busy_loop() - execute busy poll loop |
| 242 | + * @ctx: pointer to io-uring context structure |
| 243 | + * @iowq: pointer to io wait queue |
| 244 | + * |
| 245 | + * Execute the busy poll loop and merge the spliced off list. |
| 246 | + */ |
| 247 | +void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq) |
| 248 | +{ |
| 249 | + iowq->napi_prefer_busy_poll = READ_ONCE(ctx->napi_prefer_busy_poll); |
| 250 | + |
| 251 | + if (!(ctx->flags & IORING_SETUP_SQPOLL) && iowq->napi_busy_poll_to) |
| 252 | + io_napi_blocking_busy_loop(ctx, iowq); |
| 253 | +} |
| 254 | + |
| 255 | +#endif |
0 commit comments