Skip to content

Commit bb7a425

Browse files
author
Alexei Starovoitov
committed
Merge branch 'Align BPF TCP CCs implementing cong_control() with non-BPF CCs'
Jörn-Thorben Hinz says: ==================== This series corrects some inconveniences for a BPF TCP CC that implements and uses tcp_congestion_ops.cong_control(). Until now, such a CC did not have all necessary write access to struct sock and unnecessarily needed to implement cong_avoid(). v4: - Remove braces around single statements after if - Don’t check pointer passed to bpf_link__destroy() v3: - Add a selftest writing sk_pacing_* - Add a selftest with incomplete tcp_congestion_ops - Add a selftest with unsupported get_info() - Remove an unused variable - Reword a comment about reg() in bpf_struct_ops_map_update_elem() v2: - Drop redundant check for required functions and just rely on tcp_register_congestion_control() (Martin KaFai Lau) ==================== Reviewed-by: Martin KaFai Lau <[email protected]> Signed-off-by: Alexei Starovoitov <[email protected]>
2 parents 9676fec + f14a3f6 commit bb7a425

File tree

6 files changed

+186
-37
lines changed

6 files changed

+186
-37
lines changed

kernel/bpf/bpf_struct_ops.c

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -503,10 +503,9 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
503503
goto unlock;
504504
}
505505

506-
/* Error during st_ops->reg(). It is very unlikely since
507-
* the above init_member() should have caught it earlier
508-
* before reg(). The only possibility is if there was a race
509-
* in registering the struct_ops (under the same name) to
506+
/* Error during st_ops->reg(). Can happen if this struct_ops needs to be
507+
* verified as a whole, after all init_member() calls. Can also happen if
508+
* there was a race in registering the struct_ops (under the same name) to
510509
* a sub-system through different struct_ops's maps.
511510
*/
512511
set_memory_nx((long)st_map->image, 1);

net/ipv4/bpf_tcp_ca.c

Lines changed: 6 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -14,18 +14,6 @@
1414
/* "extern" is to avoid sparse warning. It is only used in bpf_struct_ops.c. */
1515
extern struct bpf_struct_ops bpf_tcp_congestion_ops;
1616

17-
static u32 optional_ops[] = {
18-
offsetof(struct tcp_congestion_ops, init),
19-
offsetof(struct tcp_congestion_ops, release),
20-
offsetof(struct tcp_congestion_ops, set_state),
21-
offsetof(struct tcp_congestion_ops, cwnd_event),
22-
offsetof(struct tcp_congestion_ops, in_ack_event),
23-
offsetof(struct tcp_congestion_ops, pkts_acked),
24-
offsetof(struct tcp_congestion_ops, min_tso_segs),
25-
offsetof(struct tcp_congestion_ops, sndbuf_expand),
26-
offsetof(struct tcp_congestion_ops, cong_control),
27-
};
28-
2917
static u32 unsupported_ops[] = {
3018
offsetof(struct tcp_congestion_ops, get_info),
3119
};
@@ -51,18 +39,6 @@ static int bpf_tcp_ca_init(struct btf *btf)
5139
return 0;
5240
}
5341

54-
static bool is_optional(u32 member_offset)
55-
{
56-
unsigned int i;
57-
58-
for (i = 0; i < ARRAY_SIZE(optional_ops); i++) {
59-
if (member_offset == optional_ops[i])
60-
return true;
61-
}
62-
63-
return false;
64-
}
65-
6642
static bool is_unsupported(u32 member_offset)
6743
{
6844
unsigned int i;
@@ -111,6 +87,12 @@ static int bpf_tcp_ca_btf_struct_access(struct bpf_verifier_log *log,
11187
}
11288

11389
switch (off) {
90+
case offsetof(struct sock, sk_pacing_rate):
91+
end = offsetofend(struct sock, sk_pacing_rate);
92+
break;
93+
case offsetof(struct sock, sk_pacing_status):
94+
end = offsetofend(struct sock, sk_pacing_status);
95+
break;
11496
case bpf_ctx_range(struct inet_connection_sock, icsk_ca_priv):
11597
end = offsetofend(struct inet_connection_sock, icsk_ca_priv);
11698
break;
@@ -240,7 +222,6 @@ static int bpf_tcp_ca_init_member(const struct btf_type *t,
240222
{
241223
const struct tcp_congestion_ops *utcp_ca;
242224
struct tcp_congestion_ops *tcp_ca;
243-
int prog_fd;
244225
u32 moff;
245226

246227
utcp_ca = (const struct tcp_congestion_ops *)udata;
@@ -262,14 +243,6 @@ static int bpf_tcp_ca_init_member(const struct btf_type *t,
262243
return 1;
263244
}
264245

265-
if (!btf_type_resolve_func_ptr(btf_vmlinux, member->type, NULL))
266-
return 0;
267-
268-
/* Ensure bpf_prog is provided for compulsory func ptr */
269-
prog_fd = (int)(*(unsigned long *)(udata + moff));
270-
if (!prog_fd && !is_optional(moff) && !is_unsupported(moff))
271-
return -EINVAL;
272-
273246
return 0;
274247
}
275248

tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@
99
#include "bpf_cubic.skel.h"
1010
#include "bpf_tcp_nogpl.skel.h"
1111
#include "bpf_dctcp_release.skel.h"
12+
#include "tcp_ca_write_sk_pacing.skel.h"
13+
#include "tcp_ca_incompl_cong_ops.skel.h"
14+
#include "tcp_ca_unsupp_cong_op.skel.h"
1215

1316
#ifndef ENOTSUPP
1417
#define ENOTSUPP 524
@@ -322,6 +325,58 @@ static void test_rel_setsockopt(void)
322325
bpf_dctcp_release__destroy(rel_skel);
323326
}
324327

328+
static void test_write_sk_pacing(void)
329+
{
330+
struct tcp_ca_write_sk_pacing *skel;
331+
struct bpf_link *link;
332+
333+
skel = tcp_ca_write_sk_pacing__open_and_load();
334+
if (!ASSERT_OK_PTR(skel, "open_and_load"))
335+
return;
336+
337+
link = bpf_map__attach_struct_ops(skel->maps.write_sk_pacing);
338+
ASSERT_OK_PTR(link, "attach_struct_ops");
339+
340+
bpf_link__destroy(link);
341+
tcp_ca_write_sk_pacing__destroy(skel);
342+
}
343+
344+
static void test_incompl_cong_ops(void)
345+
{
346+
struct tcp_ca_incompl_cong_ops *skel;
347+
struct bpf_link *link;
348+
349+
skel = tcp_ca_incompl_cong_ops__open_and_load();
350+
if (!ASSERT_OK_PTR(skel, "open_and_load"))
351+
return;
352+
353+
/* That cong_avoid() and cong_control() are missing is only reported at
354+
* this point:
355+
*/
356+
link = bpf_map__attach_struct_ops(skel->maps.incompl_cong_ops);
357+
ASSERT_ERR_PTR(link, "attach_struct_ops");
358+
359+
bpf_link__destroy(link);
360+
tcp_ca_incompl_cong_ops__destroy(skel);
361+
}
362+
363+
static void test_unsupp_cong_op(void)
364+
{
365+
libbpf_print_fn_t old_print_fn;
366+
struct tcp_ca_unsupp_cong_op *skel;
367+
368+
err_str = "attach to unsupported member get_info";
369+
found = false;
370+
old_print_fn = libbpf_set_print(libbpf_debug_print);
371+
372+
skel = tcp_ca_unsupp_cong_op__open_and_load();
373+
ASSERT_NULL(skel, "open_and_load");
374+
ASSERT_EQ(found, true, "expected_err_msg");
375+
376+
tcp_ca_unsupp_cong_op__destroy(skel);
377+
libbpf_set_print(old_print_fn);
378+
}
379+
325380
void test_bpf_tcp_ca(void)
326381
{
327382
if (test__start_subtest("dctcp"))
@@ -334,4 +389,10 @@ void test_bpf_tcp_ca(void)
334389
test_dctcp_fallback();
335390
if (test__start_subtest("rel_setsockopt"))
336391
test_rel_setsockopt();
392+
if (test__start_subtest("write_sk_pacing"))
393+
test_write_sk_pacing();
394+
if (test__start_subtest("incompl_cong_ops"))
395+
test_incompl_cong_ops();
396+
if (test__start_subtest("unsupp_cong_op"))
397+
test_unsupp_cong_op();
337398
}
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
3+
#include "vmlinux.h"
4+
5+
#include <bpf/bpf_helpers.h>
6+
#include <bpf/bpf_tracing.h>
7+
8+
char _license[] SEC("license") = "GPL";
9+
10+
static inline struct tcp_sock *tcp_sk(const struct sock *sk)
11+
{
12+
return (struct tcp_sock *)sk;
13+
}
14+
15+
SEC("struct_ops/incompl_cong_ops_ssthresh")
16+
__u32 BPF_PROG(incompl_cong_ops_ssthresh, struct sock *sk)
17+
{
18+
return tcp_sk(sk)->snd_ssthresh;
19+
}
20+
21+
SEC("struct_ops/incompl_cong_ops_undo_cwnd")
22+
__u32 BPF_PROG(incompl_cong_ops_undo_cwnd, struct sock *sk)
23+
{
24+
return tcp_sk(sk)->snd_cwnd;
25+
}
26+
27+
SEC(".struct_ops")
28+
struct tcp_congestion_ops incompl_cong_ops = {
29+
/* Intentionally leaving out any of the required cong_avoid() and
30+
* cong_control() here.
31+
*/
32+
.ssthresh = (void *)incompl_cong_ops_ssthresh,
33+
.undo_cwnd = (void *)incompl_cong_ops_undo_cwnd,
34+
.name = "bpf_incompl_ops",
35+
};
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
3+
#include "vmlinux.h"
4+
5+
#include <bpf/bpf_helpers.h>
6+
#include <bpf/bpf_tracing.h>
7+
8+
char _license[] SEC("license") = "GPL";
9+
10+
SEC("struct_ops/unsupp_cong_op_get_info")
11+
size_t BPF_PROG(unsupp_cong_op_get_info, struct sock *sk, u32 ext, int *attr,
12+
union tcp_cc_info *info)
13+
{
14+
return 0;
15+
}
16+
17+
SEC(".struct_ops")
18+
struct tcp_congestion_ops unsupp_cong_op = {
19+
.get_info = (void *)unsupp_cong_op_get_info,
20+
.name = "bpf_unsupp_op",
21+
};
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
3+
#include "vmlinux.h"
4+
5+
#include <bpf/bpf_helpers.h>
6+
#include <bpf/bpf_tracing.h>
7+
8+
char _license[] SEC("license") = "GPL";
9+
10+
#define USEC_PER_SEC 1000000UL
11+
12+
#define min(a, b) ((a) < (b) ? (a) : (b))
13+
14+
static inline struct tcp_sock *tcp_sk(const struct sock *sk)
15+
{
16+
return (struct tcp_sock *)sk;
17+
}
18+
19+
SEC("struct_ops/write_sk_pacing_init")
20+
void BPF_PROG(write_sk_pacing_init, struct sock *sk)
21+
{
22+
#ifdef ENABLE_ATOMICS_TESTS
23+
__sync_bool_compare_and_swap(&sk->sk_pacing_status, SK_PACING_NONE,
24+
SK_PACING_NEEDED);
25+
#else
26+
sk->sk_pacing_status = SK_PACING_NEEDED;
27+
#endif
28+
}
29+
30+
SEC("struct_ops/write_sk_pacing_cong_control")
31+
void BPF_PROG(write_sk_pacing_cong_control, struct sock *sk,
32+
const struct rate_sample *rs)
33+
{
34+
const struct tcp_sock *tp = tcp_sk(sk);
35+
unsigned long rate =
36+
((tp->snd_cwnd * tp->mss_cache * USEC_PER_SEC) << 3) /
37+
(tp->srtt_us ?: 1U << 3);
38+
sk->sk_pacing_rate = min(rate, sk->sk_max_pacing_rate);
39+
}
40+
41+
SEC("struct_ops/write_sk_pacing_ssthresh")
42+
__u32 BPF_PROG(write_sk_pacing_ssthresh, struct sock *sk)
43+
{
44+
return tcp_sk(sk)->snd_ssthresh;
45+
}
46+
47+
SEC("struct_ops/write_sk_pacing_undo_cwnd")
48+
__u32 BPF_PROG(write_sk_pacing_undo_cwnd, struct sock *sk)
49+
{
50+
return tcp_sk(sk)->snd_cwnd;
51+
}
52+
53+
SEC(".struct_ops")
54+
struct tcp_congestion_ops write_sk_pacing = {
55+
.init = (void *)write_sk_pacing_init,
56+
.cong_control = (void *)write_sk_pacing_cong_control,
57+
.ssthresh = (void *)write_sk_pacing_ssthresh,
58+
.undo_cwnd = (void *)write_sk_pacing_undo_cwnd,
59+
.name = "bpf_w_sk_pacing",
60+
};

0 commit comments

Comments
 (0)