Skip to content

Commit a388457

Browse files
Jakub Kicinskiborkmann
authored andcommitted
bpf: offload: add map offload infrastructure
BPF map offload follow similar path to program offload. At creation time users may specify ifindex of the device on which they want to create the map. Map will be validated by the kernel's .map_alloc_check callback and device driver will be called for the actual allocation. Map will have an empty set of operations associated with it (save for alloc and free callbacks). The real device callbacks are kept in map->offload->dev_ops because they have slightly different signatures. Map operations are called in process context so the driver may communicate with HW freely, msleep(), wait() etc. Map alloc and free callbacks are muxed via existing .ndo_bpf, and are always called with rtnl lock held. Maps and programs are guaranteed to be destroyed before .ndo_uninit (i.e. before unregister_netdev() returns). Map callbacks are invoked with bpf_devs_lock *read* locked, drivers must take care of exclusive locking if necessary. All offload-specific branches are marked with unlikely() (through bpf_map_is_dev_bound()), given that branch penalty will be negligible compared to IO anyway, and we don't want to penalize SW path unnecessarily. Signed-off-by: Jakub Kicinski <[email protected]> Reviewed-by: Quentin Monnet <[email protected]> Signed-off-by: Daniel Borkmann <[email protected]>
1 parent 5bc2d55 commit a388457

File tree

7 files changed

+293
-13
lines changed

7 files changed

+293
-13
lines changed

include/linux/bpf.h

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,33 @@ struct bpf_map {
7474
char name[BPF_OBJ_NAME_LEN];
7575
};
7676

77+
struct bpf_offloaded_map;
78+
79+
struct bpf_map_dev_ops {
80+
int (*map_get_next_key)(struct bpf_offloaded_map *map,
81+
void *key, void *next_key);
82+
int (*map_lookup_elem)(struct bpf_offloaded_map *map,
83+
void *key, void *value);
84+
int (*map_update_elem)(struct bpf_offloaded_map *map,
85+
void *key, void *value, u64 flags);
86+
int (*map_delete_elem)(struct bpf_offloaded_map *map, void *key);
87+
};
88+
89+
struct bpf_offloaded_map {
90+
struct bpf_map map;
91+
struct net_device *netdev;
92+
const struct bpf_map_dev_ops *dev_ops;
93+
void *dev_priv;
94+
struct list_head offloads;
95+
};
96+
97+
static inline struct bpf_offloaded_map *map_to_offmap(struct bpf_map *map)
98+
{
99+
return container_of(map, struct bpf_offloaded_map, map);
100+
}
101+
102+
extern const struct bpf_map_ops bpf_map_offload_ops;
103+
77104
/* function argument constraints */
78105
enum bpf_arg_type {
79106
ARG_DONTCARE = 0, /* unused argument in helper function */
@@ -369,6 +396,7 @@ int __bpf_prog_charge(struct user_struct *user, u32 pages);
369396
void __bpf_prog_uncharge(struct user_struct *user, u32 pages);
370397

371398
void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock);
399+
void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock);
372400

373401
struct bpf_map *bpf_map_get_with_uref(u32 ufd);
374402
struct bpf_map *__bpf_map_get(struct fd f);
@@ -556,13 +584,30 @@ void bpf_prog_offload_destroy(struct bpf_prog *prog);
556584
int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
557585
struct bpf_prog *prog);
558586

587+
int bpf_map_offload_lookup_elem(struct bpf_map *map, void *key, void *value);
588+
int bpf_map_offload_update_elem(struct bpf_map *map,
589+
void *key, void *value, u64 flags);
590+
int bpf_map_offload_delete_elem(struct bpf_map *map, void *key);
591+
int bpf_map_offload_get_next_key(struct bpf_map *map,
592+
void *key, void *next_key);
593+
594+
bool bpf_offload_dev_match(struct bpf_prog *prog, struct bpf_map *map);
595+
559596
#if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL)
560597
int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr);
561598

562599
static inline bool bpf_prog_is_dev_bound(struct bpf_prog_aux *aux)
563600
{
564601
return aux->offload_requested;
565602
}
603+
604+
static inline bool bpf_map_is_dev_bound(struct bpf_map *map)
605+
{
606+
return unlikely(map->ops == &bpf_map_offload_ops);
607+
}
608+
609+
struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr);
610+
void bpf_map_offload_map_free(struct bpf_map *map);
566611
#else
567612
static inline int bpf_prog_offload_init(struct bpf_prog *prog,
568613
union bpf_attr *attr)
@@ -574,6 +619,20 @@ static inline bool bpf_prog_is_dev_bound(struct bpf_prog_aux *aux)
574619
{
575620
return false;
576621
}
622+
623+
static inline bool bpf_map_is_dev_bound(struct bpf_map *map)
624+
{
625+
return false;
626+
}
627+
628+
static inline struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr)
629+
{
630+
return ERR_PTR(-EOPNOTSUPP);
631+
}
632+
633+
static inline void bpf_map_offload_map_free(struct bpf_map *map)
634+
{
635+
}
577636
#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
578637

579638
#if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_INET)

include/linux/netdevice.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -804,6 +804,8 @@ enum bpf_netdev_command {
804804
BPF_OFFLOAD_VERIFIER_PREP,
805805
BPF_OFFLOAD_TRANSLATE,
806806
BPF_OFFLOAD_DESTROY,
807+
BPF_OFFLOAD_MAP_ALLOC,
808+
BPF_OFFLOAD_MAP_FREE,
807809
};
808810

809811
struct bpf_prog_offload_ops;
@@ -834,6 +836,10 @@ struct netdev_bpf {
834836
struct {
835837
struct bpf_prog *prog;
836838
} offload;
839+
/* BPF_OFFLOAD_MAP_ALLOC, BPF_OFFLOAD_MAP_FREE */
840+
struct {
841+
struct bpf_offloaded_map *offmap;
842+
};
837843
};
838844
};
839845

include/uapi/linux/bpf.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,7 @@ union bpf_attr {
245245
* BPF_F_NUMA_NODE is set).
246246
*/
247247
char map_name[BPF_OBJ_NAME_LEN];
248+
__u32 map_ifindex; /* ifindex of netdev to create on */
248249
};
249250

250251
struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */

kernel/bpf/offload.c

Lines changed: 181 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,13 @@
2424
#include <linux/rtnetlink.h>
2525
#include <linux/rwsem.h>
2626

27-
/* Protects bpf_prog_offload_devs and offload members of all progs.
27+
/* Protects bpf_prog_offload_devs, bpf_map_offload_devs and offload members
28+
* of all progs.
2829
* RTNL lock cannot be taken when holding this lock.
2930
*/
3031
static DECLARE_RWSEM(bpf_devs_lock);
3132
static LIST_HEAD(bpf_prog_offload_devs);
33+
static LIST_HEAD(bpf_map_offload_devs);
3234

3335
static int bpf_dev_offload_check(struct net_device *netdev)
3436
{
@@ -250,11 +252,186 @@ int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
250252
const struct bpf_prog_ops bpf_offload_prog_ops = {
251253
};
252254

255+
static int bpf_map_offload_ndo(struct bpf_offloaded_map *offmap,
256+
enum bpf_netdev_command cmd)
257+
{
258+
struct netdev_bpf data = {};
259+
struct net_device *netdev;
260+
261+
ASSERT_RTNL();
262+
263+
data.command = cmd;
264+
data.offmap = offmap;
265+
/* Caller must make sure netdev is valid */
266+
netdev = offmap->netdev;
267+
268+
return netdev->netdev_ops->ndo_bpf(netdev, &data);
269+
}
270+
271+
struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr)
272+
{
273+
struct net *net = current->nsproxy->net_ns;
274+
struct bpf_offloaded_map *offmap;
275+
int err;
276+
277+
if (!capable(CAP_SYS_ADMIN))
278+
return ERR_PTR(-EPERM);
279+
if (attr->map_type != BPF_MAP_TYPE_HASH)
280+
return ERR_PTR(-EINVAL);
281+
282+
offmap = kzalloc(sizeof(*offmap), GFP_USER);
283+
if (!offmap)
284+
return ERR_PTR(-ENOMEM);
285+
286+
bpf_map_init_from_attr(&offmap->map, attr);
287+
288+
rtnl_lock();
289+
down_write(&bpf_devs_lock);
290+
offmap->netdev = __dev_get_by_index(net, attr->map_ifindex);
291+
err = bpf_dev_offload_check(offmap->netdev);
292+
if (err)
293+
goto err_unlock;
294+
295+
err = bpf_map_offload_ndo(offmap, BPF_OFFLOAD_MAP_ALLOC);
296+
if (err)
297+
goto err_unlock;
298+
299+
list_add_tail(&offmap->offloads, &bpf_map_offload_devs);
300+
up_write(&bpf_devs_lock);
301+
rtnl_unlock();
302+
303+
return &offmap->map;
304+
305+
err_unlock:
306+
up_write(&bpf_devs_lock);
307+
rtnl_unlock();
308+
kfree(offmap);
309+
return ERR_PTR(err);
310+
}
311+
312+
static void __bpf_map_offload_destroy(struct bpf_offloaded_map *offmap)
313+
{
314+
WARN_ON(bpf_map_offload_ndo(offmap, BPF_OFFLOAD_MAP_FREE));
315+
/* Make sure BPF_MAP_GET_NEXT_ID can't find this dead map */
316+
bpf_map_free_id(&offmap->map, true);
317+
list_del_init(&offmap->offloads);
318+
offmap->netdev = NULL;
319+
}
320+
321+
void bpf_map_offload_map_free(struct bpf_map *map)
322+
{
323+
struct bpf_offloaded_map *offmap = map_to_offmap(map);
324+
325+
rtnl_lock();
326+
down_write(&bpf_devs_lock);
327+
if (offmap->netdev)
328+
__bpf_map_offload_destroy(offmap);
329+
up_write(&bpf_devs_lock);
330+
rtnl_unlock();
331+
332+
kfree(offmap);
333+
}
334+
335+
int bpf_map_offload_lookup_elem(struct bpf_map *map, void *key, void *value)
336+
{
337+
struct bpf_offloaded_map *offmap = map_to_offmap(map);
338+
int ret = -ENODEV;
339+
340+
down_read(&bpf_devs_lock);
341+
if (offmap->netdev)
342+
ret = offmap->dev_ops->map_lookup_elem(offmap, key, value);
343+
up_read(&bpf_devs_lock);
344+
345+
return ret;
346+
}
347+
348+
int bpf_map_offload_update_elem(struct bpf_map *map,
349+
void *key, void *value, u64 flags)
350+
{
351+
struct bpf_offloaded_map *offmap = map_to_offmap(map);
352+
int ret = -ENODEV;
353+
354+
if (unlikely(flags > BPF_EXIST))
355+
return -EINVAL;
356+
357+
down_read(&bpf_devs_lock);
358+
if (offmap->netdev)
359+
ret = offmap->dev_ops->map_update_elem(offmap, key, value,
360+
flags);
361+
up_read(&bpf_devs_lock);
362+
363+
return ret;
364+
}
365+
366+
int bpf_map_offload_delete_elem(struct bpf_map *map, void *key)
367+
{
368+
struct bpf_offloaded_map *offmap = map_to_offmap(map);
369+
int ret = -ENODEV;
370+
371+
down_read(&bpf_devs_lock);
372+
if (offmap->netdev)
373+
ret = offmap->dev_ops->map_delete_elem(offmap, key);
374+
up_read(&bpf_devs_lock);
375+
376+
return ret;
377+
}
378+
379+
int bpf_map_offload_get_next_key(struct bpf_map *map, void *key, void *next_key)
380+
{
381+
struct bpf_offloaded_map *offmap = map_to_offmap(map);
382+
int ret = -ENODEV;
383+
384+
down_read(&bpf_devs_lock);
385+
if (offmap->netdev)
386+
ret = offmap->dev_ops->map_get_next_key(offmap, key, next_key);
387+
up_read(&bpf_devs_lock);
388+
389+
return ret;
390+
}
391+
392+
bool bpf_offload_dev_match(struct bpf_prog *prog, struct bpf_map *map)
393+
{
394+
struct bpf_offloaded_map *offmap;
395+
struct bpf_prog_offload *offload;
396+
bool ret;
397+
398+
if (!!bpf_prog_is_dev_bound(prog->aux) != !!bpf_map_is_dev_bound(map))
399+
return false;
400+
if (!bpf_prog_is_dev_bound(prog->aux))
401+
return true;
402+
403+
down_read(&bpf_devs_lock);
404+
offload = prog->aux->offload;
405+
offmap = map_to_offmap(map);
406+
407+
ret = offload && offload->netdev == offmap->netdev;
408+
up_read(&bpf_devs_lock);
409+
410+
return ret;
411+
}
412+
413+
static void bpf_offload_orphan_all_progs(struct net_device *netdev)
414+
{
415+
struct bpf_prog_offload *offload, *tmp;
416+
417+
list_for_each_entry_safe(offload, tmp, &bpf_prog_offload_devs, offloads)
418+
if (offload->netdev == netdev)
419+
__bpf_prog_offload_destroy(offload->prog);
420+
}
421+
422+
static void bpf_offload_orphan_all_maps(struct net_device *netdev)
423+
{
424+
struct bpf_offloaded_map *offmap, *tmp;
425+
426+
list_for_each_entry_safe(offmap, tmp, &bpf_map_offload_devs, offloads)
427+
if (offmap->netdev == netdev)
428+
__bpf_map_offload_destroy(offmap);
429+
}
430+
253431
static int bpf_offload_notification(struct notifier_block *notifier,
254432
ulong event, void *ptr)
255433
{
256434
struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
257-
struct bpf_prog_offload *offload, *tmp;
258435

259436
ASSERT_RTNL();
260437

@@ -265,11 +442,8 @@ static int bpf_offload_notification(struct notifier_block *notifier,
265442
break;
266443

267444
down_write(&bpf_devs_lock);
268-
list_for_each_entry_safe(offload, tmp, &bpf_prog_offload_devs,
269-
offloads) {
270-
if (offload->netdev == netdev)
271-
__bpf_prog_offload_destroy(offload->prog);
272-
}
445+
bpf_offload_orphan_all_progs(netdev);
446+
bpf_offload_orphan_all_maps(netdev);
273447
up_write(&bpf_devs_lock);
274448
break;
275449
default:

0 commit comments

Comments
 (0)