Skip to content

Commit b92d44b

Browse files
author
Alexei Starovoitov
committed
Merge branch 'expand-cg_skb-helpers'
Andrey Ignatov says: ==================== v2->v3: - better documentation for bpf_sk_cgroup_id in uapi (Yonghong Song) - save/restore errno in network helpers (Yonghong Song) - cleanup leftover after switching selftest to skeleton (Yonghong Song) - switch from map to skel->bss in selftest (Yonghong Song) v1->v2: - switch selftests to skeleton. This patch set allows a bunch of existing sk lookup and skb cgroup id helpers, and adds two new bpf_sk_{,ancestor_}cgroup_id helpers to be used in cgroup skb programs. It fills the gap to cover a use-case to apply intra-host cgroup-bpf network policy based on a source cgroup a packet comes from. For example, there can be multiple containers A, B, C running on a host. Every such container runs in its own cgroup that can have multiple sub-cgroups. But all these containers can share some IP addresses. At the same time container A wants to have a policy for a server S running in it so that only clients from this same container can connect to S, but not from other containers (such as B, C). Source IP address can't be used to decide whether to allow or deny a packet, but it looks reasonable to filter by cgroup id. The patch set allows to implement the following policy: * when an ingress packet comes to container's cgroup, lookup peer (client) socket this packet comes from; * having peer socket, get its cgroup id; * compare peer cgroup id with self cgroup id and allow packet only if they match, i.e. it comes from same cgroup; * the "sub-cgroup" part of the story can be addressed by getting not direct cgroup id of the peer socket, but ancestor cgroup id on specified level, similar to existing "ancestor" flavors of cgroup id helpers. A newly introduced selftest implements such a policy in its basic form to provide a better idea on the use-case. Patch 1 allows existing sk lookup helpers in cgroup skb. Patch 2 allows skb_ancestor_cgroup_id in cgrou skb. Patch 3 introduces two new helpers to get cgroup id of socket. Patch 4 extends network helpers to use them in the next patch. Patch 5 adds selftest / example of use-case. ==================== Signed-off-by: Alexei Starovoitov <[email protected]>
2 parents 5b0004d + 68e916b commit b92d44b

File tree

7 files changed

+386
-24
lines changed

7 files changed

+386
-24
lines changed

include/uapi/linux/bpf.h

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3121,6 +3121,38 @@ union bpf_attr {
31213121
* 0 on success, or a negative error in case of failure:
31223122
*
31233123
* **-EOVERFLOW** if an overflow happened: The same object will be tried again.
3124+
*
3125+
* u64 bpf_sk_cgroup_id(struct bpf_sock *sk)
3126+
* Description
3127+
* Return the cgroup v2 id of the socket *sk*.
3128+
*
3129+
* *sk* must be a non-**NULL** pointer to a full socket, e.g. one
3130+
* returned from **bpf_sk_lookup_xxx**\ (),
3131+
* **bpf_sk_fullsock**\ (), etc. The format of returned id is
3132+
* same as in **bpf_skb_cgroup_id**\ ().
3133+
*
3134+
* This helper is available only if the kernel was compiled with
3135+
* the **CONFIG_SOCK_CGROUP_DATA** configuration option.
3136+
* Return
3137+
* The id is returned or 0 in case the id could not be retrieved.
3138+
*
3139+
* u64 bpf_sk_ancestor_cgroup_id(struct bpf_sock *sk, int ancestor_level)
3140+
* Description
3141+
* Return id of cgroup v2 that is ancestor of cgroup associated
3142+
* with the *sk* at the *ancestor_level*. The root cgroup is at
3143+
* *ancestor_level* zero and each step down the hierarchy
3144+
* increments the level. If *ancestor_level* == level of cgroup
3145+
* associated with *sk*, then return value will be same as that
3146+
* of **bpf_sk_cgroup_id**\ ().
3147+
*
3148+
* The helper is useful to implement policies based on cgroups
3149+
* that are upper in hierarchy than immediate cgroup associated
3150+
* with *sk*.
3151+
*
3152+
* The format of returned id and helper limitations are same as in
3153+
* **bpf_sk_cgroup_id**\ ().
3154+
* Return
3155+
* The id is returned or 0 in case the id could not be retrieved.
31243156
*/
31253157
#define __BPF_FUNC_MAPPER(FN) \
31263158
FN(unspec), \
@@ -3250,7 +3282,9 @@ union bpf_attr {
32503282
FN(sk_assign), \
32513283
FN(ktime_get_boot_ns), \
32523284
FN(seq_printf), \
3253-
FN(seq_write),
3285+
FN(seq_write), \
3286+
FN(sk_cgroup_id), \
3287+
FN(sk_ancestor_cgroup_id),
32543288

32553289
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
32563290
* function eBPF program intends to call

net/core/filter.c

Lines changed: 61 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4003,16 +4003,22 @@ static const struct bpf_func_proto bpf_skb_under_cgroup_proto = {
40034003
};
40044004

40054005
#ifdef CONFIG_SOCK_CGROUP_DATA
4006+
static inline u64 __bpf_sk_cgroup_id(struct sock *sk)
4007+
{
4008+
struct cgroup *cgrp;
4009+
4010+
cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
4011+
return cgroup_id(cgrp);
4012+
}
4013+
40064014
BPF_CALL_1(bpf_skb_cgroup_id, const struct sk_buff *, skb)
40074015
{
40084016
struct sock *sk = skb_to_full_sk(skb);
4009-
struct cgroup *cgrp;
40104017

40114018
if (!sk || !sk_fullsock(sk))
40124019
return 0;
40134020

4014-
cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
4015-
return cgroup_id(cgrp);
4021+
return __bpf_sk_cgroup_id(sk);
40164022
}
40174023

40184024
static const struct bpf_func_proto bpf_skb_cgroup_id_proto = {
@@ -4022,16 +4028,12 @@ static const struct bpf_func_proto bpf_skb_cgroup_id_proto = {
40224028
.arg1_type = ARG_PTR_TO_CTX,
40234029
};
40244030

4025-
BPF_CALL_2(bpf_skb_ancestor_cgroup_id, const struct sk_buff *, skb, int,
4026-
ancestor_level)
4031+
static inline u64 __bpf_sk_ancestor_cgroup_id(struct sock *sk,
4032+
int ancestor_level)
40274033
{
4028-
struct sock *sk = skb_to_full_sk(skb);
40294034
struct cgroup *ancestor;
40304035
struct cgroup *cgrp;
40314036

4032-
if (!sk || !sk_fullsock(sk))
4033-
return 0;
4034-
40354037
cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
40364038
ancestor = cgroup_ancestor(cgrp, ancestor_level);
40374039
if (!ancestor)
@@ -4040,13 +4042,49 @@ BPF_CALL_2(bpf_skb_ancestor_cgroup_id, const struct sk_buff *, skb, int,
40404042
return cgroup_id(ancestor);
40414043
}
40424044

4045+
BPF_CALL_2(bpf_skb_ancestor_cgroup_id, const struct sk_buff *, skb, int,
4046+
ancestor_level)
4047+
{
4048+
struct sock *sk = skb_to_full_sk(skb);
4049+
4050+
if (!sk || !sk_fullsock(sk))
4051+
return 0;
4052+
4053+
return __bpf_sk_ancestor_cgroup_id(sk, ancestor_level);
4054+
}
4055+
40434056
static const struct bpf_func_proto bpf_skb_ancestor_cgroup_id_proto = {
40444057
.func = bpf_skb_ancestor_cgroup_id,
40454058
.gpl_only = false,
40464059
.ret_type = RET_INTEGER,
40474060
.arg1_type = ARG_PTR_TO_CTX,
40484061
.arg2_type = ARG_ANYTHING,
40494062
};
4063+
4064+
BPF_CALL_1(bpf_sk_cgroup_id, struct sock *, sk)
4065+
{
4066+
return __bpf_sk_cgroup_id(sk);
4067+
}
4068+
4069+
static const struct bpf_func_proto bpf_sk_cgroup_id_proto = {
4070+
.func = bpf_sk_cgroup_id,
4071+
.gpl_only = false,
4072+
.ret_type = RET_INTEGER,
4073+
.arg1_type = ARG_PTR_TO_SOCKET,
4074+
};
4075+
4076+
BPF_CALL_2(bpf_sk_ancestor_cgroup_id, struct sock *, sk, int, ancestor_level)
4077+
{
4078+
return __bpf_sk_ancestor_cgroup_id(sk, ancestor_level);
4079+
}
4080+
4081+
static const struct bpf_func_proto bpf_sk_ancestor_cgroup_id_proto = {
4082+
.func = bpf_sk_ancestor_cgroup_id,
4083+
.gpl_only = false,
4084+
.ret_type = RET_INTEGER,
4085+
.arg1_type = ARG_PTR_TO_SOCKET,
4086+
.arg2_type = ARG_ANYTHING,
4087+
};
40504088
#endif
40514089

40524090
static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff,
@@ -6157,8 +6195,22 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
61576195
#ifdef CONFIG_SOCK_CGROUP_DATA
61586196
case BPF_FUNC_skb_cgroup_id:
61596197
return &bpf_skb_cgroup_id_proto;
6198+
case BPF_FUNC_skb_ancestor_cgroup_id:
6199+
return &bpf_skb_ancestor_cgroup_id_proto;
6200+
case BPF_FUNC_sk_cgroup_id:
6201+
return &bpf_sk_cgroup_id_proto;
6202+
case BPF_FUNC_sk_ancestor_cgroup_id:
6203+
return &bpf_sk_ancestor_cgroup_id_proto;
61606204
#endif
61616205
#ifdef CONFIG_INET
6206+
case BPF_FUNC_sk_lookup_tcp:
6207+
return &bpf_sk_lookup_tcp_proto;
6208+
case BPF_FUNC_sk_lookup_udp:
6209+
return &bpf_sk_lookup_udp_proto;
6210+
case BPF_FUNC_sk_release:
6211+
return &bpf_sk_release_proto;
6212+
case BPF_FUNC_skc_lookup_tcp:
6213+
return &bpf_skc_lookup_tcp_proto;
61626214
case BPF_FUNC_tcp_sock:
61636215
return &bpf_tcp_sock_proto;
61646216
case BPF_FUNC_get_listener_sock:

tools/include/uapi/linux/bpf.h

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3121,6 +3121,38 @@ union bpf_attr {
31213121
* 0 on success, or a negative error in case of failure:
31223122
*
31233123
* **-EOVERFLOW** if an overflow happened: The same object will be tried again.
3124+
*
3125+
* u64 bpf_sk_cgroup_id(struct bpf_sock *sk)
3126+
* Description
3127+
* Return the cgroup v2 id of the socket *sk*.
3128+
*
3129+
* *sk* must be a non-**NULL** pointer to a full socket, e.g. one
3130+
* returned from **bpf_sk_lookup_xxx**\ (),
3131+
* **bpf_sk_fullsock**\ (), etc. The format of returned id is
3132+
* same as in **bpf_skb_cgroup_id**\ ().
3133+
*
3134+
* This helper is available only if the kernel was compiled with
3135+
* the **CONFIG_SOCK_CGROUP_DATA** configuration option.
3136+
* Return
3137+
* The id is returned or 0 in case the id could not be retrieved.
3138+
*
3139+
* u64 bpf_sk_ancestor_cgroup_id(struct bpf_sock *sk, int ancestor_level)
3140+
* Description
3141+
* Return id of cgroup v2 that is ancestor of cgroup associated
3142+
* with the *sk* at the *ancestor_level*. The root cgroup is at
3143+
* *ancestor_level* zero and each step down the hierarchy
3144+
* increments the level. If *ancestor_level* == level of cgroup
3145+
* associated with *sk*, then return value will be same as that
3146+
* of **bpf_sk_cgroup_id**\ ().
3147+
*
3148+
* The helper is useful to implement policies based on cgroups
3149+
* that are upper in hierarchy than immediate cgroup associated
3150+
* with *sk*.
3151+
*
3152+
* The format of returned id and helper limitations are same as in
3153+
* **bpf_sk_cgroup_id**\ ().
3154+
* Return
3155+
* The id is returned or 0 in case the id could not be retrieved.
31243156
*/
31253157
#define __BPF_FUNC_MAPPER(FN) \
31263158
FN(unspec), \
@@ -3250,7 +3282,9 @@ union bpf_attr {
32503282
FN(sk_assign), \
32513283
FN(ktime_get_boot_ns), \
32523284
FN(seq_printf), \
3253-
FN(seq_write),
3285+
FN(seq_write), \
3286+
FN(sk_cgroup_id), \
3287+
FN(sk_ancestor_cgroup_id),
32543288

32553289
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
32563290
* function eBPF program intends to call

tools/testing/selftests/bpf/network_helpers.c

Lines changed: 61 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,14 @@
44
#include <stdio.h>
55
#include <string.h>
66
#include <unistd.h>
7+
8+
#include <sys/epoll.h>
9+
710
#include <linux/err.h>
811
#include <linux/in.h>
912
#include <linux/in6.h>
1013

14+
#include "bpf_util.h"
1115
#include "network_helpers.h"
1216

1317
#define clean_errno() (errno == 0 ? "None" : strerror(errno))
@@ -77,34 +81,78 @@ static const size_t timeo_optlen = sizeof(timeo_sec);
7781

7882
int connect_to_fd(int family, int type, int server_fd)
7983
{
80-
struct sockaddr_storage addr;
81-
socklen_t len = sizeof(addr);
82-
int fd;
84+
int fd, save_errno;
8385

8486
fd = socket(family, type, 0);
8587
if (fd < 0) {
8688
log_err("Failed to create client socket");
8789
return -1;
8890
}
8991

90-
if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeo_sec, timeo_optlen)) {
92+
if (connect_fd_to_fd(fd, server_fd) < 0 && errno != EINPROGRESS) {
93+
save_errno = errno;
94+
close(fd);
95+
errno = save_errno;
96+
return -1;
97+
}
98+
99+
return fd;
100+
}
101+
102+
int connect_fd_to_fd(int client_fd, int server_fd)
103+
{
104+
struct sockaddr_storage addr;
105+
socklen_t len = sizeof(addr);
106+
int save_errno;
107+
108+
if (setsockopt(client_fd, SOL_SOCKET, SO_RCVTIMEO, &timeo_sec,
109+
timeo_optlen)) {
91110
log_err("Failed to set SO_RCVTIMEO");
92-
goto out;
111+
return -1;
93112
}
94113

95114
if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
96115
log_err("Failed to get server addr");
97-
goto out;
116+
return -1;
98117
}
99118

100-
if (connect(fd, (const struct sockaddr *)&addr, len) < 0) {
101-
log_err("Fail to connect to server with family %d", family);
102-
goto out;
119+
if (connect(client_fd, (const struct sockaddr *)&addr, len) < 0) {
120+
if (errno != EINPROGRESS) {
121+
save_errno = errno;
122+
log_err("Failed to connect to server");
123+
errno = save_errno;
124+
}
125+
return -1;
103126
}
104127

105-
return fd;
128+
return 0;
129+
}
130+
131+
int connect_wait(int fd)
132+
{
133+
struct epoll_event ev = {}, events[2];
134+
int timeout_ms = 1000;
135+
int efd, nfd;
136+
137+
efd = epoll_create1(EPOLL_CLOEXEC);
138+
if (efd < 0) {
139+
log_err("Failed to open epoll fd");
140+
return -1;
141+
}
142+
143+
ev.events = EPOLLRDHUP | EPOLLOUT;
144+
ev.data.fd = fd;
145+
146+
if (epoll_ctl(efd, EPOLL_CTL_ADD, fd, &ev) < 0) {
147+
log_err("Failed to register fd=%d on epoll fd=%d", fd, efd);
148+
close(efd);
149+
return -1;
150+
}
151+
152+
nfd = epoll_wait(efd, events, ARRAY_SIZE(events), timeout_ms);
153+
if (nfd < 0)
154+
log_err("Failed to wait for I/O event on epoll fd=%d", efd);
106155

107-
out:
108-
close(fd);
109-
return -1;
156+
close(efd);
157+
return nfd;
110158
}

tools/testing/selftests/bpf/network_helpers.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,5 +35,7 @@ extern struct ipv6_packet pkt_v6;
3535

3636
int start_server(int family, int type);
3737
int connect_to_fd(int family, int type, int server_fd);
38+
int connect_fd_to_fd(int client_fd, int server_fd);
39+
int connect_wait(int client_fd);
3840

3941
#endif

0 commit comments

Comments
 (0)