Skip to content

Commit b98057e

Browse files
author
Alexei Starovoitov
committed
Merge branch 'Add bpf_loop helper'
Joanne Koong says: ==================== This patchset add a new helper, bpf_loop. One of the complexities of using for loops in bpf programs is that the verifier needs to ensure that in every possibility of the loop logic, the loop will always terminate. As such, there is a limit on how many iterations the loop can do. The bpf_loop helper moves the loop logic into the kernel and can thereby guarantee that the loop will always terminate. The bpf_loop helper simplifies a lot of the complexity the verifier needs to check, as well as removes the constraint on the number of loops able to be run. From the test results, we see that using bpf_loop in place of the traditional for loop led to a decrease in verification time and number of bpf instructions by ~99%. The benchmark results show that as the number of iterations increases, the overhead per iteration decreases. The high-level overview of the patches - Patch 1 - kernel-side + API changes for adding bpf_loop Patch 2 - tests Patch 3 - use bpf_loop in strobemeta + pyperf600 and measure verifier performance Patch 4 - benchmark for throughput + latency of bpf_loop call v3 -> v4: ~ Address nits: use usleep for triggering bpf programs, fix copyright style v2 -> v3: ~ Rerun benchmarks on physical machine, update results ~ Propagate original error codes in the verifier v1 -> v2: ~ Change helper name to bpf_loop (instead of bpf_for_each) ~ Set max nr_loops (~8 million loops) for bpf_loop call ~ Split tests + strobemeta/pyperf600 changes into two patches ~ Add new ops_report_final helper for outputting throughput and latency ==================== Signed-off-by: Alexei Starovoitov <[email protected]>
2 parents 88691e9 + ec15103 commit b98057e

File tree

20 files changed

+771
-39
lines changed

20 files changed

+771
-39
lines changed

include/linux/bpf.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2164,6 +2164,7 @@ extern const struct bpf_func_proto bpf_sk_setsockopt_proto;
21642164
extern const struct bpf_func_proto bpf_sk_getsockopt_proto;
21652165
extern const struct bpf_func_proto bpf_kallsyms_lookup_name_proto;
21662166
extern const struct bpf_func_proto bpf_find_vma_proto;
2167+
extern const struct bpf_func_proto bpf_loop_proto;
21672168

21682169
const struct bpf_func_proto *tracing_prog_func_proto(
21692170
enum bpf_func_id func_id, const struct bpf_prog *prog);

include/uapi/linux/bpf.h

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4957,6 +4957,30 @@ union bpf_attr {
49574957
* **-ENOENT** if *task->mm* is NULL, or no vma contains *addr*.
49584958
* **-EBUSY** if failed to try lock mmap_lock.
49594959
* **-EINVAL** for invalid **flags**.
4960+
*
4961+
* long bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx, u64 flags)
4962+
* Description
4963+
* For **nr_loops**, call **callback_fn** function
4964+
* with **callback_ctx** as the context parameter.
4965+
* The **callback_fn** should be a static function and
4966+
* the **callback_ctx** should be a pointer to the stack.
4967+
* The **flags** is used to control certain aspects of the helper.
4968+
* Currently, the **flags** must be 0. Currently, nr_loops is
4969+
* limited to 1 << 23 (~8 million) loops.
4970+
*
4971+
* long (\*callback_fn)(u32 index, void \*ctx);
4972+
*
4973+
* where **index** is the current index in the loop. The index
4974+
* is zero-indexed.
4975+
*
4976+
* If **callback_fn** returns 0, the helper will continue to the next
4977+
* loop. If return value is 1, the helper will skip the rest of
4978+
* the loops and return. Other return values are not used now,
4979+
* and will be rejected by the verifier.
4980+
*
4981+
* Return
4982+
* The number of loops performed, **-EINVAL** for invalid **flags**,
4983+
* **-E2BIG** if **nr_loops** exceeds the maximum number of loops.
49604984
*/
49614985
#define __BPF_FUNC_MAPPER(FN) \
49624986
FN(unspec), \
@@ -5140,6 +5164,7 @@ union bpf_attr {
51405164
FN(skc_to_unix_sock), \
51415165
FN(kallsyms_lookup_name), \
51425166
FN(find_vma), \
5167+
FN(loop), \
51435168
/* */
51445169

51455170
/* integer value in 'imm' field of BPF_CALL instruction selects which helper

kernel/bpf/bpf_iter.c

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -714,3 +714,38 @@ const struct bpf_func_proto bpf_for_each_map_elem_proto = {
714714
.arg3_type = ARG_PTR_TO_STACK_OR_NULL,
715715
.arg4_type = ARG_ANYTHING,
716716
};
717+
718+
/* maximum number of loops */
719+
#define MAX_LOOPS BIT(23)
720+
721+
BPF_CALL_4(bpf_loop, u32, nr_loops, void *, callback_fn, void *, callback_ctx,
722+
u64, flags)
723+
{
724+
bpf_callback_t callback = (bpf_callback_t)callback_fn;
725+
u64 ret;
726+
u32 i;
727+
728+
if (flags)
729+
return -EINVAL;
730+
if (nr_loops > MAX_LOOPS)
731+
return -E2BIG;
732+
733+
for (i = 0; i < nr_loops; i++) {
734+
ret = callback((u64)i, (u64)(long)callback_ctx, 0, 0, 0);
735+
/* return value: 0 - continue, 1 - stop and return */
736+
if (ret)
737+
return i + 1;
738+
}
739+
740+
return i;
741+
}
742+
743+
const struct bpf_func_proto bpf_loop_proto = {
744+
.func = bpf_loop,
745+
.gpl_only = false,
746+
.ret_type = RET_INTEGER,
747+
.arg1_type = ARG_ANYTHING,
748+
.arg2_type = ARG_PTR_TO_FUNC,
749+
.arg3_type = ARG_PTR_TO_STACK_OR_NULL,
750+
.arg4_type = ARG_ANYTHING,
751+
};

kernel/bpf/helpers.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1378,6 +1378,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
13781378
return &bpf_ringbuf_query_proto;
13791379
case BPF_FUNC_for_each_map_elem:
13801380
return &bpf_for_each_map_elem_proto;
1381+
case BPF_FUNC_loop:
1382+
return &bpf_loop_proto;
13811383
default:
13821384
break;
13831385
}

kernel/bpf/verifier.c

Lines changed: 54 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -6085,6 +6085,27 @@ static int set_map_elem_callback_state(struct bpf_verifier_env *env,
60856085
return 0;
60866086
}
60876087

6088+
static int set_loop_callback_state(struct bpf_verifier_env *env,
6089+
struct bpf_func_state *caller,
6090+
struct bpf_func_state *callee,
6091+
int insn_idx)
6092+
{
6093+
/* bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx,
6094+
* u64 flags);
6095+
* callback_fn(u32 index, void *callback_ctx);
6096+
*/
6097+
callee->regs[BPF_REG_1].type = SCALAR_VALUE;
6098+
callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
6099+
6100+
/* unused */
6101+
__mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
6102+
__mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
6103+
__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
6104+
6105+
callee->in_callback_fn = true;
6106+
return 0;
6107+
}
6108+
60886109
static int set_timer_callback_state(struct bpf_verifier_env *env,
60896110
struct bpf_func_state *caller,
60906111
struct bpf_func_state *callee,
@@ -6458,13 +6479,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
64586479
return err;
64596480
}
64606481

6461-
if (func_id == BPF_FUNC_tail_call) {
6462-
err = check_reference_leak(env);
6463-
if (err) {
6464-
verbose(env, "tail_call would lead to reference leak\n");
6465-
return err;
6466-
}
6467-
} else if (is_release_function(func_id)) {
6482+
if (is_release_function(func_id)) {
64686483
err = release_reference(env, meta.ref_obj_id);
64696484
if (err) {
64706485
verbose(env, "func %s#%d reference has not been acquired before\n",
@@ -6475,42 +6490,47 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
64756490

64766491
regs = cur_regs(env);
64776492

6478-
/* check that flags argument in get_local_storage(map, flags) is 0,
6479-
* this is required because get_local_storage() can't return an error.
6480-
*/
6481-
if (func_id == BPF_FUNC_get_local_storage &&
6482-
!register_is_null(&regs[BPF_REG_2])) {
6483-
verbose(env, "get_local_storage() doesn't support non-zero flags\n");
6484-
return -EINVAL;
6485-
}
6486-
6487-
if (func_id == BPF_FUNC_for_each_map_elem) {
6493+
switch (func_id) {
6494+
case BPF_FUNC_tail_call:
6495+
err = check_reference_leak(env);
6496+
if (err) {
6497+
verbose(env, "tail_call would lead to reference leak\n");
6498+
return err;
6499+
}
6500+
break;
6501+
case BPF_FUNC_get_local_storage:
6502+
/* check that flags argument in get_local_storage(map, flags) is 0,
6503+
* this is required because get_local_storage() can't return an error.
6504+
*/
6505+
if (!register_is_null(&regs[BPF_REG_2])) {
6506+
verbose(env, "get_local_storage() doesn't support non-zero flags\n");
6507+
return -EINVAL;
6508+
}
6509+
break;
6510+
case BPF_FUNC_for_each_map_elem:
64886511
err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
64896512
set_map_elem_callback_state);
6490-
if (err < 0)
6491-
return -EINVAL;
6492-
}
6493-
6494-
if (func_id == BPF_FUNC_timer_set_callback) {
6513+
break;
6514+
case BPF_FUNC_timer_set_callback:
64956515
err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
64966516
set_timer_callback_state);
6497-
if (err < 0)
6498-
return -EINVAL;
6499-
}
6500-
6501-
if (func_id == BPF_FUNC_find_vma) {
6517+
break;
6518+
case BPF_FUNC_find_vma:
65026519
err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
65036520
set_find_vma_callback_state);
6504-
if (err < 0)
6505-
return -EINVAL;
6506-
}
6507-
6508-
if (func_id == BPF_FUNC_snprintf) {
6521+
break;
6522+
case BPF_FUNC_snprintf:
65096523
err = check_bpf_snprintf_call(env, regs);
6510-
if (err < 0)
6511-
return err;
6524+
break;
6525+
case BPF_FUNC_loop:
6526+
err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
6527+
set_loop_callback_state);
6528+
break;
65126529
}
65136530

6531+
if (err)
6532+
return err;
6533+
65146534
/* reset caller saved regs */
65156535
for (i = 0; i < CALLER_SAVED_REGS; i++) {
65166536
mark_reg_not_init(env, regs, caller_saved[i]);

tools/include/uapi/linux/bpf.h

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4957,6 +4957,30 @@ union bpf_attr {
49574957
* **-ENOENT** if *task->mm* is NULL, or no vma contains *addr*.
49584958
* **-EBUSY** if failed to try lock mmap_lock.
49594959
* **-EINVAL** for invalid **flags**.
4960+
*
4961+
* long bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx, u64 flags)
4962+
* Description
4963+
* For **nr_loops**, call **callback_fn** function
4964+
* with **callback_ctx** as the context parameter.
4965+
* The **callback_fn** should be a static function and
4966+
* the **callback_ctx** should be a pointer to the stack.
4967+
* The **flags** is used to control certain aspects of the helper.
4968+
* Currently, the **flags** must be 0. Currently, nr_loops is
4969+
* limited to 1 << 23 (~8 million) loops.
4970+
*
4971+
* long (\*callback_fn)(u32 index, void \*ctx);
4972+
*
4973+
* where **index** is the current index in the loop. The index
4974+
* is zero-indexed.
4975+
*
4976+
* If **callback_fn** returns 0, the helper will continue to the next
4977+
* loop. If return value is 1, the helper will skip the rest of
4978+
* the loops and return. Other return values are not used now,
4979+
* and will be rejected by the verifier.
4980+
*
4981+
* Return
4982+
* The number of loops performed, **-EINVAL** for invalid **flags**,
4983+
* **-E2BIG** if **nr_loops** exceeds the maximum number of loops.
49604984
*/
49614985
#define __BPF_FUNC_MAPPER(FN) \
49624986
FN(unspec), \
@@ -5140,6 +5164,7 @@ union bpf_attr {
51405164
FN(skc_to_unix_sock), \
51415165
FN(kallsyms_lookup_name), \
51425166
FN(find_vma), \
5167+
FN(loop), \
51435168
/* */
51445169

51455170
/* integer value in 'imm' field of BPF_CALL instruction selects which helper

tools/testing/selftests/bpf/Makefile

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -531,6 +531,7 @@ $(OUTPUT)/bench_trigger.o: $(OUTPUT)/trigger_bench.skel.h
531531
$(OUTPUT)/bench_ringbufs.o: $(OUTPUT)/ringbuf_bench.skel.h \
532532
$(OUTPUT)/perfbuf_bench.skel.h
533533
$(OUTPUT)/bench_bloom_filter_map.o: $(OUTPUT)/bloom_filter_bench.skel.h
534+
$(OUTPUT)/bench_bpf_loop.o: $(OUTPUT)/bpf_loop_bench.skel.h
534535
$(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ)
535536
$(OUTPUT)/bench: LDLIBS += -lm
536537
$(OUTPUT)/bench: $(OUTPUT)/bench.o \
@@ -540,7 +541,8 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \
540541
$(OUTPUT)/bench_rename.o \
541542
$(OUTPUT)/bench_trigger.o \
542543
$(OUTPUT)/bench_ringbufs.o \
543-
$(OUTPUT)/bench_bloom_filter_map.o
544+
$(OUTPUT)/bench_bloom_filter_map.o \
545+
$(OUTPUT)/bench_bpf_loop.o
544546
$(call msg,BINARY,,$@)
545547
$(Q)$(CC) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@
546548

tools/testing/selftests/bpf/bench.c

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,39 @@ void hits_drops_report_final(struct bench_res res[], int res_cnt)
134134
total_ops_mean, total_ops_stddev);
135135
}
136136

137+
void ops_report_progress(int iter, struct bench_res *res, long delta_ns)
138+
{
139+
double hits_per_sec, hits_per_prod;
140+
141+
hits_per_sec = res->hits / 1000000.0 / (delta_ns / 1000000000.0);
142+
hits_per_prod = hits_per_sec / env.producer_cnt;
143+
144+
printf("Iter %3d (%7.3lfus): ", iter, (delta_ns - 1000000000) / 1000.0);
145+
146+
printf("hits %8.3lfM/s (%7.3lfM/prod)\n", hits_per_sec, hits_per_prod);
147+
}
148+
149+
void ops_report_final(struct bench_res res[], int res_cnt)
150+
{
151+
double hits_mean = 0.0, hits_stddev = 0.0;
152+
int i;
153+
154+
for (i = 0; i < res_cnt; i++)
155+
hits_mean += res[i].hits / 1000000.0 / (0.0 + res_cnt);
156+
157+
if (res_cnt > 1) {
158+
for (i = 0; i < res_cnt; i++)
159+
hits_stddev += (hits_mean - res[i].hits / 1000000.0) *
160+
(hits_mean - res[i].hits / 1000000.0) /
161+
(res_cnt - 1.0);
162+
163+
hits_stddev = sqrt(hits_stddev);
164+
}
165+
printf("Summary: throughput %8.3lf \u00B1 %5.3lf M ops/s (%7.3lfM ops/prod), ",
166+
hits_mean, hits_stddev, hits_mean / env.producer_cnt);
167+
printf("latency %8.3lf ns/op\n", 1000.0 / hits_mean * env.producer_cnt);
168+
}
169+
137170
const char *argp_program_version = "benchmark";
138171
const char *argp_program_bug_address = "<[email protected]>";
139172
const char argp_program_doc[] =
@@ -171,10 +204,12 @@ static const struct argp_option opts[] = {
171204

172205
extern struct argp bench_ringbufs_argp;
173206
extern struct argp bench_bloom_map_argp;
207+
extern struct argp bench_bpf_loop_argp;
174208

175209
static const struct argp_child bench_parsers[] = {
176210
{ &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 },
177211
{ &bench_bloom_map_argp, 0, "Bloom filter map benchmark", 0 },
212+
{ &bench_bpf_loop_argp, 0, "bpf_loop helper benchmark", 0 },
178213
{},
179214
};
180215

@@ -373,6 +408,7 @@ extern const struct bench bench_bloom_update;
373408
extern const struct bench bench_bloom_false_positive;
374409
extern const struct bench bench_hashmap_without_bloom;
375410
extern const struct bench bench_hashmap_with_bloom;
411+
extern const struct bench bench_bpf_loop;
376412

377413
static const struct bench *benchs[] = {
378414
&bench_count_global,
@@ -404,6 +440,7 @@ static const struct bench *benchs[] = {
404440
&bench_bloom_false_positive,
405441
&bench_hashmap_without_bloom,
406442
&bench_hashmap_with_bloom,
443+
&bench_bpf_loop,
407444
};
408445

409446
static void setup_benchmark()

tools/testing/selftests/bpf/bench.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns);
5959
void hits_drops_report_final(struct bench_res res[], int res_cnt);
6060
void false_hits_report_progress(int iter, struct bench_res *res, long delta_ns);
6161
void false_hits_report_final(struct bench_res res[], int res_cnt);
62+
void ops_report_progress(int iter, struct bench_res *res, long delta_ns);
63+
void ops_report_final(struct bench_res res[], int res_cnt);
6264

6365
static inline __u64 get_time_ns() {
6466
struct timespec t;

0 commit comments

Comments
 (0)