Skip to content

Commit 02bc2b6

Browse files
author
Alexei Starovoitov
committed
Merge branch 'setsockopt-extra-mem'
Stanislav Fomichev says: ==================== Current setsockopt hook is limited to the size of the buffer that user had supplied. Since we always allocate memory and copy the value into kernel space, allocate just a little bit more in case BPF program needs to override input data with a larger value. The canonical example is TCP_CONGESTION socket option where input buffer is a string and if user calls it with a short string, BPF program has no way of extending it. The tests are extended with TCP_CONGESTION use case. ==================== Signed-off-by: Alexei Starovoitov <[email protected]>
2 parents a98bf57 + fd5ef31 commit 02bc2b6

File tree

3 files changed

+60
-4
lines changed

3 files changed

+60
-4
lines changed

kernel/bpf/cgroup.c

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -964,7 +964,6 @@ static int sockopt_alloc_buf(struct bpf_sockopt_kern *ctx, int max_optlen)
964964
return -ENOMEM;
965965

966966
ctx->optval_end = ctx->optval + max_optlen;
967-
ctx->optlen = max_optlen;
968967

969968
return 0;
970969
}
@@ -984,7 +983,7 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
984983
.level = *level,
985984
.optname = *optname,
986985
};
987-
int ret;
986+
int ret, max_optlen;
988987

989988
/* Opportunistic check to see whether we have any BPF program
990989
* attached to the hook so we don't waste time allocating
@@ -994,10 +993,18 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
994993
__cgroup_bpf_prog_array_is_empty(cgrp, BPF_CGROUP_SETSOCKOPT))
995994
return 0;
996995

997-
ret = sockopt_alloc_buf(&ctx, *optlen);
996+
/* Allocate a bit more than the initial user buffer for
997+
* BPF program. The canonical use case is overriding
998+
* TCP_CONGESTION(nv) to TCP_CONGESTION(cubic).
999+
*/
1000+
max_optlen = max_t(int, 16, *optlen);
1001+
1002+
ret = sockopt_alloc_buf(&ctx, max_optlen);
9981003
if (ret)
9991004
return ret;
10001005

1006+
ctx.optlen = *optlen;
1007+
10011008
if (copy_from_user(ctx.optval, optval, *optlen) != 0) {
10021009
ret = -EFAULT;
10031010
goto out;
@@ -1016,7 +1023,7 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
10161023
if (ctx.optlen == -1) {
10171024
/* optlen set to -1, bypass kernel */
10181025
ret = 1;
1019-
} else if (ctx.optlen > *optlen || ctx.optlen < -1) {
1026+
} else if (ctx.optlen > max_optlen || ctx.optlen < -1) {
10201027
/* optlen is out of bounds */
10211028
ret = -EFAULT;
10221029
} else {
@@ -1063,6 +1070,8 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
10631070
if (ret)
10641071
return ret;
10651072

1073+
ctx.optlen = max_optlen;
1074+
10661075
if (!retval) {
10671076
/* If kernel getsockopt finished successfully,
10681077
* copy whatever was returned to the user back

tools/testing/selftests/bpf/progs/sockopt_sk.c

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
// SPDX-License-Identifier: GPL-2.0
2+
#include <string.h>
23
#include <netinet/in.h>
4+
#include <netinet/tcp.h>
35
#include <linux/bpf.h>
46
#include "bpf_helpers.h"
57

@@ -42,6 +44,14 @@ int _getsockopt(struct bpf_sockopt *ctx)
4244
return 1;
4345
}
4446

47+
if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) {
48+
/* Not interested in SOL_TCP:TCP_CONGESTION;
49+
* let next BPF program in the cgroup chain or kernel
50+
* handle it.
51+
*/
52+
return 1;
53+
}
54+
4555
if (ctx->level != SOL_CUSTOM)
4656
return 0; /* EPERM, deny everything except custom level */
4757

@@ -91,6 +101,18 @@ int _setsockopt(struct bpf_sockopt *ctx)
91101
return 1;
92102
}
93103

104+
if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) {
105+
/* Always use cubic */
106+
107+
if (optval + 5 > optval_end)
108+
return 0; /* EPERM, bounds check */
109+
110+
memcpy(optval, "cubic", 5);
111+
ctx->optlen = 5;
112+
113+
return 1;
114+
}
115+
94116
if (ctx->level != SOL_CUSTOM)
95117
return 0; /* EPERM, deny everything except custom level */
96118

tools/testing/selftests/bpf/test_sockopt_sk.c

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include <sys/types.h>
77
#include <sys/socket.h>
88
#include <netinet/in.h>
9+
#include <netinet/tcp.h>
910

1011
#include <linux/filter.h>
1112
#include <bpf/bpf.h>
@@ -25,6 +26,7 @@ static int getsetsockopt(void)
2526
union {
2627
char u8[4];
2728
__u32 u32;
29+
char cc[16]; /* TCP_CA_NAME_MAX */
2830
} buf = {};
2931
socklen_t optlen;
3032

@@ -115,6 +117,29 @@ static int getsetsockopt(void)
115117
goto err;
116118
}
117119

120+
/* TCP_CONGESTION can extend the string */
121+
122+
strcpy(buf.cc, "nv");
123+
err = setsockopt(fd, SOL_TCP, TCP_CONGESTION, &buf, strlen("nv"));
124+
if (err) {
125+
log_err("Failed to call setsockopt(TCP_CONGESTION)");
126+
goto err;
127+
}
128+
129+
130+
optlen = sizeof(buf.cc);
131+
err = getsockopt(fd, SOL_TCP, TCP_CONGESTION, &buf, &optlen);
132+
if (err) {
133+
log_err("Failed to call getsockopt(TCP_CONGESTION)");
134+
goto err;
135+
}
136+
137+
if (strcmp(buf.cc, "cubic") != 0) {
138+
log_err("Unexpected getsockopt(TCP_CONGESTION) %s != %s",
139+
buf.cc, "cubic");
140+
goto err;
141+
}
142+
118143
close(fd);
119144
return 0;
120145
err:

0 commit comments

Comments
 (0)