Skip to content

Commit 1ade237

Browse files
eddyz87Alexei Starovoitov
authored andcommitted
bpf: Inline calls to bpf_loop when callback is known
Calls to `bpf_loop` are replaced with direct loops to avoid indirection. E.g. the following: bpf_loop(10, foo, NULL, 0); Is replaced by equivalent of the following: for (int i = 0; i < 10; ++i) foo(i, NULL); This transformation could be applied when: - callback is known and does not change during program execution; - flags passed to `bpf_loop` are always zero. Inlining logic works as follows: - During execution simulation function `update_loop_inline_state` tracks the following information for each `bpf_loop` call instruction: - is callback known and constant? - are flags constant and zero? - Function `optimize_bpf_loop` increases stack depth for functions where `bpf_loop` calls can be inlined and invokes `inline_bpf_loop` to apply the inlining. The additional stack space is used to spill registers R6, R7 and R8. These registers are used as loop counter, loop maximal bound and callback context parameter; Measurements using `benchs/run_bench_bpf_loop.sh` inside QEMU / KVM on i7-4710HQ CPU show a drop in latency from 14 ns/op to 2 ns/op. Signed-off-by: Eduard Zingerman <[email protected]> Acked-by: Song Liu <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Alexei Starovoitov <[email protected]>
1 parent 7a42008 commit 1ade237

File tree

4 files changed

+195
-9
lines changed

4 files changed

+195
-9
lines changed

include/linux/bpf.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1286,6 +1286,9 @@ struct bpf_array {
12861286
#define BPF_COMPLEXITY_LIMIT_INSNS 1000000 /* yes. 1M insns */
12871287
#define MAX_TAIL_CALL_CNT 33
12881288

1289+
/* Maximum number of loops for bpf_loop */
1290+
#define BPF_MAX_LOOPS BIT(23)
1291+
12891292
#define BPF_F_ACCESS_MASK (BPF_F_RDONLY | \
12901293
BPF_F_RDONLY_PROG | \
12911294
BPF_F_WRONLY | \

include/linux/bpf_verifier.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -344,6 +344,14 @@ struct bpf_verifier_state_list {
344344
int miss_cnt, hit_cnt;
345345
};
346346

347+
struct bpf_loop_inline_state {
348+
int initialized:1; /* set to true upon first entry */
349+
int fit_for_inline:1; /* true if callback function is the same
350+
* at each call and flags are always zero
351+
*/
352+
u32 callback_subprogno; /* valid when fit_for_inline is true */
353+
};
354+
347355
/* Possible states for alu_state member. */
348356
#define BPF_ALU_SANITIZE_SRC (1U << 0)
349357
#define BPF_ALU_SANITIZE_DST (1U << 1)
@@ -373,6 +381,10 @@ struct bpf_insn_aux_data {
373381
u32 mem_size; /* mem_size for non-struct typed var */
374382
};
375383
} btf_var;
384+
/* if instruction is a call to bpf_loop this field tracks
385+
* the state of the relevant registers to make decision about inlining
386+
*/
387+
struct bpf_loop_inline_state loop_inline_state;
376388
};
377389
u64 map_key_state; /* constant (32 bit) key tracking for maps */
378390
int ctx_field_size; /* the ctx field size for load insn, maybe 0 */

kernel/bpf/bpf_iter.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -723,19 +723,20 @@ const struct bpf_func_proto bpf_for_each_map_elem_proto = {
723723
.arg4_type = ARG_ANYTHING,
724724
};
725725

726-
/* maximum number of loops */
727-
#define MAX_LOOPS BIT(23)
728-
729726
BPF_CALL_4(bpf_loop, u32, nr_loops, void *, callback_fn, void *, callback_ctx,
730727
u64, flags)
731728
{
732729
bpf_callback_t callback = (bpf_callback_t)callback_fn;
733730
u64 ret;
734731
u32 i;
735732

733+
/* Note: these safety checks are also verified when bpf_loop
734+
* is inlined, be careful to modify this code in sync. See
735+
* function verifier.c:inline_bpf_loop.
736+
*/
736737
if (flags)
737738
return -EINVAL;
738-
if (nr_loops > MAX_LOOPS)
739+
if (nr_loops > BPF_MAX_LOOPS)
739740
return -E2BIG;
740741

741742
for (i = 0; i < nr_loops; i++) {

kernel/bpf/verifier.c

Lines changed: 175 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7124,6 +7124,41 @@ static int check_get_func_ip(struct bpf_verifier_env *env)
71247124
return -ENOTSUPP;
71257125
}
71267126

7127+
static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
7128+
{
7129+
return &env->insn_aux_data[env->insn_idx];
7130+
}
7131+
7132+
static bool loop_flag_is_zero(struct bpf_verifier_env *env)
7133+
{
7134+
struct bpf_reg_state *regs = cur_regs(env);
7135+
struct bpf_reg_state *reg = &regs[BPF_REG_4];
7136+
bool reg_is_null = register_is_null(reg);
7137+
7138+
if (reg_is_null)
7139+
mark_chain_precision(env, BPF_REG_4);
7140+
7141+
return reg_is_null;
7142+
}
7143+
7144+
static void update_loop_inline_state(struct bpf_verifier_env *env, u32 subprogno)
7145+
{
7146+
struct bpf_loop_inline_state *state = &cur_aux(env)->loop_inline_state;
7147+
7148+
if (!state->initialized) {
7149+
state->initialized = 1;
7150+
state->fit_for_inline = loop_flag_is_zero(env);
7151+
state->callback_subprogno = subprogno;
7152+
return;
7153+
}
7154+
7155+
if (!state->fit_for_inline)
7156+
return;
7157+
7158+
state->fit_for_inline = (loop_flag_is_zero(env) &&
7159+
state->callback_subprogno == subprogno);
7160+
}
7161+
71277162
static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
71287163
int *insn_idx_p)
71297164
{
@@ -7276,6 +7311,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
72767311
err = check_bpf_snprintf_call(env, regs);
72777312
break;
72787313
case BPF_FUNC_loop:
7314+
update_loop_inline_state(env, meta.subprogno);
72797315
err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
72807316
set_loop_callback_state);
72817317
break;
@@ -7682,11 +7718,6 @@ static bool check_reg_sane_offset(struct bpf_verifier_env *env,
76827718
return true;
76837719
}
76847720

7685-
static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
7686-
{
7687-
return &env->insn_aux_data[env->insn_idx];
7688-
}
7689-
76907721
enum {
76917722
REASON_BOUNDS = -1,
76927723
REASON_TYPE = -2,
@@ -14315,6 +14346,142 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
1431514346
return 0;
1431614347
}
1431714348

14349+
static struct bpf_prog *inline_bpf_loop(struct bpf_verifier_env *env,
14350+
int position,
14351+
s32 stack_base,
14352+
u32 callback_subprogno,
14353+
u32 *cnt)
14354+
{
14355+
s32 r6_offset = stack_base + 0 * BPF_REG_SIZE;
14356+
s32 r7_offset = stack_base + 1 * BPF_REG_SIZE;
14357+
s32 r8_offset = stack_base + 2 * BPF_REG_SIZE;
14358+
int reg_loop_max = BPF_REG_6;
14359+
int reg_loop_cnt = BPF_REG_7;
14360+
int reg_loop_ctx = BPF_REG_8;
14361+
14362+
struct bpf_prog *new_prog;
14363+
u32 callback_start;
14364+
u32 call_insn_offset;
14365+
s32 callback_offset;
14366+
14367+
/* This represents an inlined version of bpf_iter.c:bpf_loop,
14368+
* be careful to modify this code in sync.
14369+
*/
14370+
struct bpf_insn insn_buf[] = {
14371+
/* Return error and jump to the end of the patch if
14372+
* expected number of iterations is too big.
14373+
*/
14374+
BPF_JMP_IMM(BPF_JLE, BPF_REG_1, BPF_MAX_LOOPS, 2),
14375+
BPF_MOV32_IMM(BPF_REG_0, -E2BIG),
14376+
BPF_JMP_IMM(BPF_JA, 0, 0, 16),
14377+
/* spill R6, R7, R8 to use these as loop vars */
14378+
BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, r6_offset),
14379+
BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, r7_offset),
14380+
BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, r8_offset),
14381+
/* initialize loop vars */
14382+
BPF_MOV64_REG(reg_loop_max, BPF_REG_1),
14383+
BPF_MOV32_IMM(reg_loop_cnt, 0),
14384+
BPF_MOV64_REG(reg_loop_ctx, BPF_REG_3),
14385+
/* loop header,
14386+
* if reg_loop_cnt >= reg_loop_max skip the loop body
14387+
*/
14388+
BPF_JMP_REG(BPF_JGE, reg_loop_cnt, reg_loop_max, 5),
14389+
/* callback call,
14390+
* correct callback offset would be set after patching
14391+
*/
14392+
BPF_MOV64_REG(BPF_REG_1, reg_loop_cnt),
14393+
BPF_MOV64_REG(BPF_REG_2, reg_loop_ctx),
14394+
BPF_CALL_REL(0),
14395+
/* increment loop counter */
14396+
BPF_ALU64_IMM(BPF_ADD, reg_loop_cnt, 1),
14397+
/* jump to loop header if callback returned 0 */
14398+
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -6),
14399+
/* return value of bpf_loop,
14400+
* set R0 to the number of iterations
14401+
*/
14402+
BPF_MOV64_REG(BPF_REG_0, reg_loop_cnt),
14403+
/* restore original values of R6, R7, R8 */
14404+
BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, r6_offset),
14405+
BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, r7_offset),
14406+
BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, r8_offset),
14407+
};
14408+
14409+
*cnt = ARRAY_SIZE(insn_buf);
14410+
new_prog = bpf_patch_insn_data(env, position, insn_buf, *cnt);
14411+
if (!new_prog)
14412+
return new_prog;
14413+
14414+
/* callback start is known only after patching */
14415+
callback_start = env->subprog_info[callback_subprogno].start;
14416+
/* Note: insn_buf[12] is an offset of BPF_CALL_REL instruction */
14417+
call_insn_offset = position + 12;
14418+
callback_offset = callback_start - call_insn_offset - 1;
14419+
env->prog->insnsi[call_insn_offset].imm = callback_offset;
14420+
14421+
return new_prog;
14422+
}
14423+
14424+
static bool is_bpf_loop_call(struct bpf_insn *insn)
14425+
{
14426+
return insn->code == (BPF_JMP | BPF_CALL) &&
14427+
insn->src_reg == 0 &&
14428+
insn->imm == BPF_FUNC_loop;
14429+
}
14430+
14431+
/* For all sub-programs in the program (including main) check
14432+
* insn_aux_data to see if there are bpf_loop calls that require
14433+
* inlining. If such calls are found the calls are replaced with a
14434+
* sequence of instructions produced by `inline_bpf_loop` function and
14435+
* subprog stack_depth is increased by the size of 3 registers.
14436+
* This stack space is used to spill values of the R6, R7, R8. These
14437+
* registers are used to store the loop bound, counter and context
14438+
* variables.
14439+
*/
14440+
static int optimize_bpf_loop(struct bpf_verifier_env *env)
14441+
{
14442+
struct bpf_subprog_info *subprogs = env->subprog_info;
14443+
int i, cur_subprog = 0, cnt, delta = 0;
14444+
struct bpf_insn *insn = env->prog->insnsi;
14445+
int insn_cnt = env->prog->len;
14446+
u16 stack_depth = subprogs[cur_subprog].stack_depth;
14447+
u16 stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
14448+
u16 stack_depth_extra = 0;
14449+
14450+
for (i = 0; i < insn_cnt; i++, insn++) {
14451+
struct bpf_loop_inline_state *inline_state =
14452+
&env->insn_aux_data[i + delta].loop_inline_state;
14453+
14454+
if (is_bpf_loop_call(insn) && inline_state->fit_for_inline) {
14455+
struct bpf_prog *new_prog;
14456+
14457+
stack_depth_extra = BPF_REG_SIZE * 3 + stack_depth_roundup;
14458+
new_prog = inline_bpf_loop(env,
14459+
i + delta,
14460+
-(stack_depth + stack_depth_extra),
14461+
inline_state->callback_subprogno,
14462+
&cnt);
14463+
if (!new_prog)
14464+
return -ENOMEM;
14465+
14466+
delta += cnt - 1;
14467+
env->prog = new_prog;
14468+
insn = new_prog->insnsi + i + delta;
14469+
}
14470+
14471+
if (subprogs[cur_subprog + 1].start == i + delta + 1) {
14472+
subprogs[cur_subprog].stack_depth += stack_depth_extra;
14473+
cur_subprog++;
14474+
stack_depth = subprogs[cur_subprog].stack_depth;
14475+
stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
14476+
stack_depth_extra = 0;
14477+
}
14478+
}
14479+
14480+
env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
14481+
14482+
return 0;
14483+
}
14484+
1431814485
static void free_states(struct bpf_verifier_env *env)
1431914486
{
1432014487
struct bpf_verifier_state_list *sl, *sln;
@@ -15052,6 +15219,9 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr)
1505215219
ret = check_max_stack_depth(env);
1505315220

1505415221
/* instruction rewrites happen after this point */
15222+
if (ret == 0)
15223+
ret = optimize_bpf_loop(env);
15224+
1505515225
if (is_priv) {
1505615226
if (ret == 0)
1505715227
opt_hard_wire_dead_code_branches(env);

0 commit comments

Comments
 (0)