Skip to content

Commit ef70f9a

Browse files
committed
Merge branch 'bpf-sockmap'
John Fastabend says: ==================== BPF: sockmap and sk redirect support This series implements a sockmap and socket redirect helper for BPF using a model similar to XDP netdev redirect. A sockmap is a BPF map type that holds references to sock structs. Then with a new sk redirect bpf helper BPF programs can use the map to redirect skbs between sockets, bpf_sk_redirect_map(map, key, flags) Finally, we need a call site to attach our BPF logic to do socket redirects. We added hooks to recv_sock using the existing strparser infrastructure to do this. The call site is added via the BPF attach map call. To enable users to use this infrastructure a new BPF program BPF_PROG_TYPE_SK_SKB is created that allows users to reference sock details, such as port and ip address fields, to build useful socket layer program. The sockmap datapath is as follows, recv -> strparser -> verdict/action where this series implements the drop and redirect actions. Additional, actions can be added as needed. A sample program is provided to illustrate how a sockmap can be integrated with cgroups and used to add/delete sockets in a sockmap. The program is simple but should show many of the key ideas. To test this work test_maps in selftests/bpf was leveraged. We added a set of tests to add sockets and do send/recv ops on the sockets to ensure correct behavior. Additionally, the selftests tests a series of negative test cases. We can expand on this in the future. I also have a basic test program I use with iperf/netperf clients that could be sent as an additional sample if folks want this. It needs a bit of cleanup to send to the list and wasn't included in this series. For people who prefer git over pulling patches out of their mail editor I've posted the code here, https://github.com/jrfastab/linux-kernel-xdp/tree/sockmap For some background information on the genesis of this work it might be helpful to review these slides from netconf 2017 by Thomas Graf, http://vger.kernel.org/netconf2017.html https://docs.google.com/a/covalent.io/presentation/d/1dwSKSBGpUHD3WO5xxzZWj8awV_-xL-oYhvqQMOBhhtk/edit?usp=sharing Thanks to Daniel Borkmann for reviewing and providing initial feedback. ==================== Acked-by: Daniel Borkmann <[email protected]> Signed-off-by: David S. Miller <[email protected]>
2 parents d369bca + 6f6d33f commit ef70f9a

File tree

29 files changed

+2316
-59
lines changed

29 files changed

+2316
-59
lines changed

include/linux/bpf.h

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include <linux/rbtree_latch.h>
1717

1818
struct perf_event;
19+
struct bpf_prog;
1920
struct bpf_map;
2021

2122
/* map is generic key/value storage optionally accesible by eBPF programs */
@@ -37,6 +38,8 @@ struct bpf_map_ops {
3738
void (*map_fd_put_ptr)(void *ptr);
3839
u32 (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf);
3940
u32 (*map_fd_sys_lookup_elem)(void *ptr);
41+
int (*map_attach)(struct bpf_map *map,
42+
struct bpf_prog *p1, struct bpf_prog *p2);
4043
};
4144

4245
struct bpf_map {
@@ -138,8 +141,6 @@ enum bpf_reg_type {
138141
PTR_TO_PACKET_END, /* skb->data + headlen */
139142
};
140143

141-
struct bpf_prog;
142-
143144
/* The information passed from prog-specific *_is_valid_access
144145
* back to the verifier.
145146
*/
@@ -252,6 +253,7 @@ struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type);
252253
struct bpf_prog * __must_check bpf_prog_add(struct bpf_prog *prog, int i);
253254
void bpf_prog_sub(struct bpf_prog *prog, int i);
254255
struct bpf_prog * __must_check bpf_prog_inc(struct bpf_prog *prog);
256+
struct bpf_prog * __must_check bpf_prog_inc_not_zero(struct bpf_prog *prog);
255257
void bpf_prog_put(struct bpf_prog *prog);
256258
int __bpf_prog_charge(struct user_struct *user, u32 pages);
257259
void __bpf_prog_uncharge(struct user_struct *user, u32 pages);
@@ -311,6 +313,7 @@ int bpf_check(struct bpf_prog **fp, union bpf_attr *attr);
311313

312314
/* Map specifics */
313315
struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
316+
struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key);
314317
void __dev_map_insert_ctx(struct bpf_map *map, u32 index);
315318
void __dev_map_flush(struct bpf_map *map);
316319

@@ -344,6 +347,12 @@ static inline struct bpf_prog * __must_check bpf_prog_inc(struct bpf_prog *prog)
344347
return ERR_PTR(-EOPNOTSUPP);
345348
}
346349

350+
static inline struct bpf_prog *__must_check
351+
bpf_prog_inc_not_zero(struct bpf_prog *prog)
352+
{
353+
return ERR_PTR(-EOPNOTSUPP);
354+
}
355+
347356
static inline int __bpf_prog_charge(struct user_struct *user, u32 pages)
348357
{
349358
return 0;
@@ -384,6 +393,7 @@ extern const struct bpf_func_proto bpf_get_current_comm_proto;
384393
extern const struct bpf_func_proto bpf_skb_vlan_push_proto;
385394
extern const struct bpf_func_proto bpf_skb_vlan_pop_proto;
386395
extern const struct bpf_func_proto bpf_get_stackid_proto;
396+
extern const struct bpf_func_proto bpf_sock_map_update_proto;
387397

388398
/* Shared helpers among cBPF and eBPF. */
389399
void bpf_user_rnd_init_once(void);

include/linux/bpf_types.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_inout_prog_ops)
1111
BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_inout_prog_ops)
1212
BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit_prog_ops)
1313
BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops_prog_ops)
14+
BPF_PROG_TYPE(BPF_PROG_TYPE_SK_SKB, sk_skb_prog_ops)
1415
#endif
1516
#ifdef CONFIG_BPF_EVENTS
1617
BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe_prog_ops)
@@ -37,4 +38,5 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops)
3738
BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops)
3839
#ifdef CONFIG_NET
3940
BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops)
41+
BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops)
4042
#endif

include/linux/filter.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -727,6 +727,8 @@ void xdp_do_flush_map(void);
727727
void bpf_warn_invalid_xdp_action(u32 act);
728728
void bpf_warn_invalid_xdp_redirect(u32 ifindex);
729729

730+
struct sock *do_sk_redirect_map(void);
731+
730732
#ifdef CONFIG_BPF_JIT
731733
extern int bpf_jit_enable;
732734
extern int bpf_jit_harden;

include/uapi/linux/bpf.h

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ enum bpf_map_type {
110110
BPF_MAP_TYPE_ARRAY_OF_MAPS,
111111
BPF_MAP_TYPE_HASH_OF_MAPS,
112112
BPF_MAP_TYPE_DEVMAP,
113+
BPF_MAP_TYPE_SOCKMAP,
113114
};
114115

115116
enum bpf_prog_type {
@@ -127,18 +128,23 @@ enum bpf_prog_type {
127128
BPF_PROG_TYPE_LWT_OUT,
128129
BPF_PROG_TYPE_LWT_XMIT,
129130
BPF_PROG_TYPE_SOCK_OPS,
131+
BPF_PROG_TYPE_SK_SKB,
130132
};
131133

132134
enum bpf_attach_type {
133135
BPF_CGROUP_INET_INGRESS,
134136
BPF_CGROUP_INET_EGRESS,
135137
BPF_CGROUP_INET_SOCK_CREATE,
136138
BPF_CGROUP_SOCK_OPS,
139+
BPF_CGROUP_SMAP_INGRESS,
137140
__MAX_BPF_ATTACH_TYPE
138141
};
139142

140143
#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
141144

145+
/* If BPF_SOCKMAP_STRPARSER is used sockmap will use strparser on receive */
146+
#define BPF_SOCKMAP_STRPARSER (1U << 0)
147+
142148
/* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command
143149
* to the given target_fd cgroup the descendent cgroup will be able to
144150
* override effective bpf program that was inherited from this cgroup
@@ -210,6 +216,7 @@ union bpf_attr {
210216
__u32 attach_bpf_fd; /* eBPF program to attach */
211217
__u32 attach_type;
212218
__u32 attach_flags;
219+
__u32 attach_bpf_fd2;
213220
};
214221

215222
struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */
@@ -556,6 +563,23 @@ union bpf_attr {
556563
* @mode: operation mode (enum bpf_adj_room_mode)
557564
* @flags: reserved for future use
558565
* Return: 0 on success or negative error code
566+
*
567+
* int bpf_sk_redirect_map(map, key, flags)
568+
* Redirect skb to a sock in map using key as a lookup key for the
569+
* sock in map.
570+
* @map: pointer to sockmap
571+
* @key: key to lookup sock in map
572+
* @flags: reserved for future use
573+
* Return: SK_REDIRECT
574+
*
575+
* int bpf_sock_map_update(skops, map, key, flags, map_flags)
576+
* @skops: pointer to bpf_sock_ops
577+
* @map: pointer to sockmap to update
578+
* @key: key to insert/update sock in map
579+
* @flags: same flags as map update elem
580+
* @map_flags: sock map specific flags
581+
* bit 1: Enable strparser
582+
* other bits: reserved
559583
*/
560584
#define __BPF_FUNC_MAPPER(FN) \
561585
FN(unspec), \
@@ -609,7 +633,9 @@ union bpf_attr {
609633
FN(set_hash), \
610634
FN(setsockopt), \
611635
FN(skb_adjust_room), \
612-
FN(redirect_map),
636+
FN(redirect_map), \
637+
FN(sk_redirect_map), \
638+
FN(sock_map_update), \
613639

614640
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
615641
* function eBPF program intends to call
@@ -686,6 +712,15 @@ struct __sk_buff {
686712
__u32 data;
687713
__u32 data_end;
688714
__u32 napi_id;
715+
716+
/* accessed by BPF_PROG_TYPE_sk_skb types */
717+
__u32 family;
718+
__u32 remote_ip4; /* Stored in network byte order */
719+
__u32 local_ip4; /* Stored in network byte order */
720+
__u32 remote_ip6[4]; /* Stored in network byte order */
721+
__u32 local_ip6[4]; /* Stored in network byte order */
722+
__u32 remote_port; /* Stored in network byte order */
723+
__u32 local_port; /* stored in host byte order */
689724
};
690725

691726
struct bpf_tunnel_key {
@@ -746,6 +781,12 @@ struct xdp_md {
746781
__u32 data_end;
747782
};
748783

784+
enum sk_action {
785+
SK_ABORTED = 0,
786+
SK_DROP,
787+
SK_REDIRECT,
788+
};
789+
749790
#define BPF_TAG_SIZE 8
750791

751792
struct bpf_prog_info {

kernel/bpf/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ obj-y := core.o
33
obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o
44
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
55
ifeq ($(CONFIG_NET),y)
6-
obj-$(CONFIG_BPF_SYSCALL) += devmap.o
6+
obj-$(CONFIG_BPF_SYSCALL) += devmap.o sockmap.o
77
endif
88
ifeq ($(CONFIG_PERF_EVENTS),y)
99
obj-$(CONFIG_BPF_SYSCALL) += stackmap.o

0 commit comments

Comments
 (0)