Skip to content

Commit beecf11

Browse files
fomichevAlexei Starovoitov
authored andcommitted
bpf: Bpf_{g,s}etsockopt for struct bpf_sock_addr
Currently, bpf_getsockopt and bpf_setsockopt helpers operate on the 'struct bpf_sock_ops' context in BPF_PROG_TYPE_SOCK_OPS program. Let's generalize them and make them available for 'struct bpf_sock_addr'. That way, in the future, we can allow those helpers in more places. As an example, let's expose those 'struct bpf_sock_addr' based helpers to BPF_CGROUP_INET{4,6}_CONNECT hooks. That way we can override CC before the connection is made. v3: * Expose custom helpers for bpf_sock_addr context instead of doing generic bpf_sock argument (as suggested by Daniel). Even with try_socket_lock that doesn't sleep we have a problem where context sk is already locked and socket lock is non-nestable. v2: * s/BPF_PROG_TYPE_CGROUP_SOCKOPT/BPF_PROG_TYPE_SOCK_OPS/ Signed-off-by: Stanislav Fomichev <[email protected]> Signed-off-by: Alexei Starovoitov <[email protected]> Acked-by: Martin KaFai Lau <[email protected]> Acked-by: John Fastabend <[email protected]> Link: https://lore.kernel.org/bpf/[email protected]
1 parent 3dbb5b5 commit beecf11

File tree

5 files changed

+166
-27
lines changed

5 files changed

+166
-27
lines changed

include/uapi/linux/bpf.h

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1587,14 +1587,19 @@ union bpf_attr {
15871587
* Return
15881588
* 0
15891589
*
1590-
* int bpf_setsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, void *optval, int optlen)
1590+
* int bpf_setsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen)
15911591
* Description
15921592
* Emulate a call to **setsockopt()** on the socket associated to
15931593
* *bpf_socket*, which must be a full socket. The *level* at
15941594
* which the option resides and the name *optname* of the option
15951595
* must be specified, see **setsockopt(2)** for more information.
15961596
* The option value of length *optlen* is pointed by *optval*.
15971597
*
1598+
* *bpf_socket* should be one of the following:
1599+
* * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**.
1600+
* * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**
1601+
* and **BPF_CGROUP_INET6_CONNECT**.
1602+
*
15981603
* This helper actually implements a subset of **setsockopt()**.
15991604
* It supports the following *level*\ s:
16001605
*
@@ -1789,7 +1794,7 @@ union bpf_attr {
17891794
* Return
17901795
* 0 on success, or a negative error in case of failure.
17911796
*
1792-
* int bpf_getsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, void *optval, int optlen)
1797+
* int bpf_getsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen)
17931798
* Description
17941799
* Emulate a call to **getsockopt()** on the socket associated to
17951800
* *bpf_socket*, which must be a full socket. The *level* at
@@ -1798,6 +1803,11 @@ union bpf_attr {
17981803
* The retrieved value is stored in the structure pointed by
17991804
* *opval* and of length *optlen*.
18001805
*
1806+
* *bpf_socket* should be one of the following:
1807+
* * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**.
1808+
* * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**
1809+
* and **BPF_CGROUP_INET6_CONNECT**.
1810+
*
18011811
* This helper actually implements a subset of **getsockopt()**.
18021812
* It supports the following *level*\ s:
18031813
*

net/core/filter.c

Lines changed: 95 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -4194,16 +4194,19 @@ static const struct bpf_func_proto bpf_get_socket_uid_proto = {
41944194
.arg1_type = ARG_PTR_TO_CTX,
41954195
};
41964196

4197-
BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
4198-
int, level, int, optname, char *, optval, int, optlen)
4197+
#define SOCKOPT_CC_REINIT (1 << 0)
4198+
4199+
static int _bpf_setsockopt(struct sock *sk, int level, int optname,
4200+
char *optval, int optlen, u32 flags)
41994201
{
4200-
struct sock *sk = bpf_sock->sk;
42014202
int ret = 0;
42024203
int val;
42034204

42044205
if (!sk_fullsock(sk))
42054206
return -EINVAL;
42064207

4208+
sock_owned_by_me(sk);
4209+
42074210
if (level == SOL_SOCKET) {
42084211
if (optlen != sizeof(int))
42094212
return -EINVAL;
@@ -4298,7 +4301,7 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
42984301
sk->sk_prot->setsockopt == tcp_setsockopt) {
42994302
if (optname == TCP_CONGESTION) {
43004303
char name[TCP_CA_NAME_MAX];
4301-
bool reinit = bpf_sock->op > BPF_SOCK_OPS_NEEDS_ECN;
4304+
bool reinit = flags & SOCKOPT_CC_REINIT;
43024305

43034306
strncpy(name, optval, min_t(long, optlen,
43044307
TCP_CA_NAME_MAX-1));
@@ -4345,24 +4348,14 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
43454348
return ret;
43464349
}
43474350

4348-
static const struct bpf_func_proto bpf_setsockopt_proto = {
4349-
.func = bpf_setsockopt,
4350-
.gpl_only = false,
4351-
.ret_type = RET_INTEGER,
4352-
.arg1_type = ARG_PTR_TO_CTX,
4353-
.arg2_type = ARG_ANYTHING,
4354-
.arg3_type = ARG_ANYTHING,
4355-
.arg4_type = ARG_PTR_TO_MEM,
4356-
.arg5_type = ARG_CONST_SIZE,
4357-
};
4358-
4359-
BPF_CALL_5(bpf_getsockopt, struct bpf_sock_ops_kern *, bpf_sock,
4360-
int, level, int, optname, char *, optval, int, optlen)
4351+
static int _bpf_getsockopt(struct sock *sk, int level, int optname,
4352+
char *optval, int optlen)
43614353
{
4362-
struct sock *sk = bpf_sock->sk;
4363-
43644354
if (!sk_fullsock(sk))
43654355
goto err_clear;
4356+
4357+
sock_owned_by_me(sk);
4358+
43664359
#ifdef CONFIG_INET
43674360
if (level == SOL_TCP && sk->sk_prot->getsockopt == tcp_getsockopt) {
43684361
struct inet_connection_sock *icsk;
@@ -4428,8 +4421,71 @@ BPF_CALL_5(bpf_getsockopt, struct bpf_sock_ops_kern *, bpf_sock,
44284421
return -EINVAL;
44294422
}
44304423

4431-
static const struct bpf_func_proto bpf_getsockopt_proto = {
4432-
.func = bpf_getsockopt,
4424+
BPF_CALL_5(bpf_sock_addr_setsockopt, struct bpf_sock_addr_kern *, ctx,
4425+
int, level, int, optname, char *, optval, int, optlen)
4426+
{
4427+
u32 flags = 0;
4428+
return _bpf_setsockopt(ctx->sk, level, optname, optval, optlen,
4429+
flags);
4430+
}
4431+
4432+
static const struct bpf_func_proto bpf_sock_addr_setsockopt_proto = {
4433+
.func = bpf_sock_addr_setsockopt,
4434+
.gpl_only = false,
4435+
.ret_type = RET_INTEGER,
4436+
.arg1_type = ARG_PTR_TO_CTX,
4437+
.arg2_type = ARG_ANYTHING,
4438+
.arg3_type = ARG_ANYTHING,
4439+
.arg4_type = ARG_PTR_TO_MEM,
4440+
.arg5_type = ARG_CONST_SIZE,
4441+
};
4442+
4443+
BPF_CALL_5(bpf_sock_addr_getsockopt, struct bpf_sock_addr_kern *, ctx,
4444+
int, level, int, optname, char *, optval, int, optlen)
4445+
{
4446+
return _bpf_getsockopt(ctx->sk, level, optname, optval, optlen);
4447+
}
4448+
4449+
static const struct bpf_func_proto bpf_sock_addr_getsockopt_proto = {
4450+
.func = bpf_sock_addr_getsockopt,
4451+
.gpl_only = false,
4452+
.ret_type = RET_INTEGER,
4453+
.arg1_type = ARG_PTR_TO_CTX,
4454+
.arg2_type = ARG_ANYTHING,
4455+
.arg3_type = ARG_ANYTHING,
4456+
.arg4_type = ARG_PTR_TO_UNINIT_MEM,
4457+
.arg5_type = ARG_CONST_SIZE,
4458+
};
4459+
4460+
BPF_CALL_5(bpf_sock_ops_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
4461+
int, level, int, optname, char *, optval, int, optlen)
4462+
{
4463+
u32 flags = 0;
4464+
if (bpf_sock->op > BPF_SOCK_OPS_NEEDS_ECN)
4465+
flags |= SOCKOPT_CC_REINIT;
4466+
return _bpf_setsockopt(bpf_sock->sk, level, optname, optval, optlen,
4467+
flags);
4468+
}
4469+
4470+
static const struct bpf_func_proto bpf_sock_ops_setsockopt_proto = {
4471+
.func = bpf_sock_ops_setsockopt,
4472+
.gpl_only = false,
4473+
.ret_type = RET_INTEGER,
4474+
.arg1_type = ARG_PTR_TO_CTX,
4475+
.arg2_type = ARG_ANYTHING,
4476+
.arg3_type = ARG_ANYTHING,
4477+
.arg4_type = ARG_PTR_TO_MEM,
4478+
.arg5_type = ARG_CONST_SIZE,
4479+
};
4480+
4481+
BPF_CALL_5(bpf_sock_ops_getsockopt, struct bpf_sock_ops_kern *, bpf_sock,
4482+
int, level, int, optname, char *, optval, int, optlen)
4483+
{
4484+
return _bpf_getsockopt(bpf_sock->sk, level, optname, optval, optlen);
4485+
}
4486+
4487+
static const struct bpf_func_proto bpf_sock_ops_getsockopt_proto = {
4488+
.func = bpf_sock_ops_getsockopt,
44334489
.gpl_only = false,
44344490
.ret_type = RET_INTEGER,
44354491
.arg1_type = ARG_PTR_TO_CTX,
@@ -6043,6 +6099,22 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
60436099
return &bpf_sk_storage_get_proto;
60446100
case BPF_FUNC_sk_storage_delete:
60456101
return &bpf_sk_storage_delete_proto;
6102+
case BPF_FUNC_setsockopt:
6103+
switch (prog->expected_attach_type) {
6104+
case BPF_CGROUP_INET4_CONNECT:
6105+
case BPF_CGROUP_INET6_CONNECT:
6106+
return &bpf_sock_addr_setsockopt_proto;
6107+
default:
6108+
return NULL;
6109+
}
6110+
case BPF_FUNC_getsockopt:
6111+
switch (prog->expected_attach_type) {
6112+
case BPF_CGROUP_INET4_CONNECT:
6113+
case BPF_CGROUP_INET6_CONNECT:
6114+
return &bpf_sock_addr_getsockopt_proto;
6115+
default:
6116+
return NULL;
6117+
}
60466118
default:
60476119
return bpf_base_func_proto(func_id);
60486120
}
@@ -6261,9 +6333,9 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
62616333
{
62626334
switch (func_id) {
62636335
case BPF_FUNC_setsockopt:
6264-
return &bpf_setsockopt_proto;
6336+
return &bpf_sock_ops_setsockopt_proto;
62656337
case BPF_FUNC_getsockopt:
6266-
return &bpf_getsockopt_proto;
6338+
return &bpf_sock_ops_getsockopt_proto;
62676339
case BPF_FUNC_sock_ops_cb_flags_set:
62686340
return &bpf_sock_ops_cb_flags_set_proto;
62696341
case BPF_FUNC_sock_map_update:

tools/include/uapi/linux/bpf.h

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1587,14 +1587,19 @@ union bpf_attr {
15871587
* Return
15881588
* 0
15891589
*
1590-
* int bpf_setsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, void *optval, int optlen)
1590+
* int bpf_setsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen)
15911591
* Description
15921592
* Emulate a call to **setsockopt()** on the socket associated to
15931593
* *bpf_socket*, which must be a full socket. The *level* at
15941594
* which the option resides and the name *optname* of the option
15951595
* must be specified, see **setsockopt(2)** for more information.
15961596
* The option value of length *optlen* is pointed by *optval*.
15971597
*
1598+
* *bpf_socket* should be one of the following:
1599+
* * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**.
1600+
* * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**
1601+
* and **BPF_CGROUP_INET6_CONNECT**.
1602+
*
15981603
* This helper actually implements a subset of **setsockopt()**.
15991604
* It supports the following *level*\ s:
16001605
*
@@ -1789,7 +1794,7 @@ union bpf_attr {
17891794
* Return
17901795
* 0 on success, or a negative error in case of failure.
17911796
*
1792-
* int bpf_getsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, void *optval, int optlen)
1797+
* int bpf_getsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen)
17931798
* Description
17941799
* Emulate a call to **getsockopt()** on the socket associated to
17951800
* *bpf_socket*, which must be a full socket. The *level* at
@@ -1798,6 +1803,11 @@ union bpf_attr {
17981803
* The retrieved value is stored in the structure pointed by
17991804
* *opval* and of length *optlen*.
18001805
*
1806+
* *bpf_socket* should be one of the following:
1807+
* * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**.
1808+
* * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**
1809+
* and **BPF_CGROUP_INET6_CONNECT**.
1810+
*
18011811
* This helper actually implements a subset of **getsockopt()**.
18021812
* It supports the following *level*\ s:
18031813
*

tools/testing/selftests/bpf/config

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,3 +37,4 @@ CONFIG_IPV6_SIT=m
3737
CONFIG_BPF_JIT=y
3838
CONFIG_BPF_LSM=y
3939
CONFIG_SECURITY=y
40+
CONFIG_TCP_CONG_DCTCP=y

tools/testing/selftests/bpf/progs/connect4_prog.c

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include <linux/in.h>
99
#include <linux/in6.h>
1010
#include <sys/socket.h>
11+
#include <netinet/tcp.h>
1112

1213
#include <bpf/bpf_helpers.h>
1314
#include <bpf/bpf_endian.h>
@@ -16,6 +17,10 @@
1617
#define DST_REWRITE_IP4 0x7f000001U
1718
#define DST_REWRITE_PORT4 4444
1819

20+
#ifndef TCP_CA_NAME_MAX
21+
#define TCP_CA_NAME_MAX 16
22+
#endif
23+
1924
int _version SEC("version") = 1;
2025

2126
__attribute__ ((noinline))
@@ -33,6 +38,43 @@ int do_bind(struct bpf_sock_addr *ctx)
3338
return 1;
3439
}
3540

41+
static __inline int verify_cc(struct bpf_sock_addr *ctx,
42+
char expected[TCP_CA_NAME_MAX])
43+
{
44+
char buf[TCP_CA_NAME_MAX];
45+
int i;
46+
47+
if (bpf_getsockopt(ctx, SOL_TCP, TCP_CONGESTION, &buf, sizeof(buf)))
48+
return 1;
49+
50+
for (i = 0; i < TCP_CA_NAME_MAX; i++) {
51+
if (buf[i] != expected[i])
52+
return 1;
53+
if (buf[i] == 0)
54+
break;
55+
}
56+
57+
return 0;
58+
}
59+
60+
static __inline int set_cc(struct bpf_sock_addr *ctx)
61+
{
62+
char dctcp[TCP_CA_NAME_MAX] = "dctcp";
63+
char cubic[TCP_CA_NAME_MAX] = "cubic";
64+
65+
if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, &dctcp, sizeof(dctcp)))
66+
return 1;
67+
if (verify_cc(ctx, dctcp))
68+
return 1;
69+
70+
if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, &cubic, sizeof(cubic)))
71+
return 1;
72+
if (verify_cc(ctx, cubic))
73+
return 1;
74+
75+
return 0;
76+
}
77+
3678
SEC("cgroup/connect4")
3779
int connect_v4_prog(struct bpf_sock_addr *ctx)
3880
{
@@ -66,6 +108,10 @@ int connect_v4_prog(struct bpf_sock_addr *ctx)
66108

67109
bpf_sk_release(sk);
68110

111+
/* Rewrite congestion control. */
112+
if (ctx->type == SOCK_STREAM && set_cc(ctx))
113+
return 0;
114+
69115
/* Rewrite destination. */
70116
ctx->user_ip4 = bpf_htonl(DST_REWRITE_IP4);
71117
ctx->user_port = bpf_htons(DST_REWRITE_PORT4);

0 commit comments

Comments
 (0)