55
55
#define DEV_CREATE_FLAG_MASK \
56
56
(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
57
57
58
+ #define DEV_MAP_BULK_SIZE 16
59
+ struct xdp_bulk_queue {
60
+ struct xdp_frame * q [DEV_MAP_BULK_SIZE ];
61
+ unsigned int count ;
62
+ };
63
+
58
64
struct bpf_dtab_netdev {
59
65
struct net_device * dev ; /* must be first member, due to tracepoint */
60
66
struct bpf_dtab * dtab ;
61
67
unsigned int bit ;
68
+ struct xdp_bulk_queue __percpu * bulkq ;
62
69
struct rcu_head rcu ;
63
70
};
64
71
@@ -208,6 +215,34 @@ void __dev_map_insert_ctx(struct bpf_map *map, u32 bit)
208
215
__set_bit (bit , bitmap );
209
216
}
210
217
218
+ static int bq_xmit_all (struct bpf_dtab_netdev * obj ,
219
+ struct xdp_bulk_queue * bq )
220
+ {
221
+ struct net_device * dev = obj -> dev ;
222
+ int i ;
223
+
224
+ if (unlikely (!bq -> count ))
225
+ return 0 ;
226
+
227
+ for (i = 0 ; i < bq -> count ; i ++ ) {
228
+ struct xdp_frame * xdpf = bq -> q [i ];
229
+
230
+ prefetch (xdpf );
231
+ }
232
+
233
+ for (i = 0 ; i < bq -> count ; i ++ ) {
234
+ struct xdp_frame * xdpf = bq -> q [i ];
235
+ int err ;
236
+
237
+ err = dev -> netdev_ops -> ndo_xdp_xmit (dev , xdpf );
238
+ if (err )
239
+ xdp_return_frame (xdpf );
240
+ }
241
+ bq -> count = 0 ;
242
+
243
+ return 0 ;
244
+ }
245
+
211
246
/* __dev_map_flush is called from xdp_do_flush_map() which _must_ be signaled
212
247
* from the driver before returning from its napi->poll() routine. The poll()
213
248
* routine is called either from busy_poll context or net_rx_action signaled
@@ -223,6 +258,7 @@ void __dev_map_flush(struct bpf_map *map)
223
258
224
259
for_each_set_bit (bit , bitmap , map -> max_entries ) {
225
260
struct bpf_dtab_netdev * dev = READ_ONCE (dtab -> netdev_map [bit ]);
261
+ struct xdp_bulk_queue * bq ;
226
262
struct net_device * netdev ;
227
263
228
264
/* This is possible if the dev entry is removed by user space
@@ -232,6 +268,9 @@ void __dev_map_flush(struct bpf_map *map)
232
268
continue ;
233
269
234
270
__clear_bit (bit , bitmap );
271
+
272
+ bq = this_cpu_ptr (dev -> bulkq );
273
+ bq_xmit_all (dev , bq );
235
274
netdev = dev -> dev ;
236
275
if (likely (netdev -> netdev_ops -> ndo_xdp_flush ))
237
276
netdev -> netdev_ops -> ndo_xdp_flush (netdev );
@@ -254,6 +293,20 @@ struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
254
293
return obj ;
255
294
}
256
295
296
+ /* Runs under RCU-read-side, plus in softirq under NAPI protection.
297
+ * Thus, safe percpu variable access.
298
+ */
299
+ static int bq_enqueue (struct bpf_dtab_netdev * obj , struct xdp_frame * xdpf )
300
+ {
301
+ struct xdp_bulk_queue * bq = this_cpu_ptr (obj -> bulkq );
302
+
303
+ if (unlikely (bq -> count == DEV_MAP_BULK_SIZE ))
304
+ bq_xmit_all (obj , bq );
305
+
306
+ bq -> q [bq -> count ++ ] = xdpf ;
307
+ return 0 ;
308
+ }
309
+
257
310
int dev_map_enqueue (struct bpf_dtab_netdev * dst , struct xdp_buff * xdp )
258
311
{
259
312
struct net_device * dev = dst -> dev ;
@@ -266,8 +319,7 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp)
266
319
if (unlikely (!xdpf ))
267
320
return - EOVERFLOW ;
268
321
269
- /* TODO: implement a bulking/enqueue step later */
270
- return dev -> netdev_ops -> ndo_xdp_xmit (dev , xdpf );
322
+ return bq_enqueue (dst , xdpf );
271
323
}
272
324
273
325
static void * dev_map_lookup_elem (struct bpf_map * map , void * key )
@@ -282,13 +334,18 @@ static void dev_map_flush_old(struct bpf_dtab_netdev *dev)
282
334
{
283
335
if (dev -> dev -> netdev_ops -> ndo_xdp_flush ) {
284
336
struct net_device * fl = dev -> dev ;
337
+ struct xdp_bulk_queue * bq ;
285
338
unsigned long * bitmap ;
339
+
286
340
int cpu ;
287
341
288
342
for_each_online_cpu (cpu ) {
289
343
bitmap = per_cpu_ptr (dev -> dtab -> flush_needed , cpu );
290
344
__clear_bit (dev -> bit , bitmap );
291
345
346
+ bq = per_cpu_ptr (dev -> bulkq , cpu );
347
+ bq_xmit_all (dev , bq );
348
+
292
349
fl -> netdev_ops -> ndo_xdp_flush (dev -> dev );
293
350
}
294
351
}
@@ -300,6 +357,7 @@ static void __dev_map_entry_free(struct rcu_head *rcu)
300
357
301
358
dev = container_of (rcu , struct bpf_dtab_netdev , rcu );
302
359
dev_map_flush_old (dev );
360
+ free_percpu (dev -> bulkq );
303
361
dev_put (dev -> dev );
304
362
kfree (dev );
305
363
}
@@ -332,6 +390,7 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value,
332
390
{
333
391
struct bpf_dtab * dtab = container_of (map , struct bpf_dtab , map );
334
392
struct net * net = current -> nsproxy -> net_ns ;
393
+ gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN ;
335
394
struct bpf_dtab_netdev * dev , * old_dev ;
336
395
u32 i = * (u32 * )key ;
337
396
u32 ifindex = * (u32 * )value ;
@@ -346,13 +405,20 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value,
346
405
if (!ifindex ) {
347
406
dev = NULL ;
348
407
} else {
349
- dev = kmalloc_node (sizeof (* dev ), GFP_ATOMIC | __GFP_NOWARN ,
350
- map -> numa_node );
408
+ dev = kmalloc_node (sizeof (* dev ), gfp , map -> numa_node );
351
409
if (!dev )
352
410
return - ENOMEM ;
353
411
412
+ dev -> bulkq = __alloc_percpu_gfp (sizeof (* dev -> bulkq ),
413
+ sizeof (void * ), gfp );
414
+ if (!dev -> bulkq ) {
415
+ kfree (dev );
416
+ return - ENOMEM ;
417
+ }
418
+
354
419
dev -> dev = dev_get_by_index (net , ifindex );
355
420
if (!dev -> dev ) {
421
+ free_percpu (dev -> bulkq );
356
422
kfree (dev );
357
423
return - EINVAL ;
358
424
}
0 commit comments