Skip to content

Commit 408fcf9

Browse files
eddyz87Alexei Starovoitov
authored andcommitted
bpf: free verifier states when they are no longer referenced
When fixes from patches 1 and 3 are applied, Patrick Somaru reported an increase in memory consumption for sched_ext iterator-based programs hitting 1M instructions limit. For example, 2Gb VMs ran out of memory while verifying a program. Similar behaviour could be reproduced on current bpf-next master. Here is an example of such program: /* verification completes if given 16G or RAM, * final env->free_list size is 369,960 entries. */ SEC("raw_tp") __flag(BPF_F_TEST_STATE_FREQ) __success int free_list_bomb(const void *ctx) { volatile char buf[48] = {}; unsigned i, j; j = 0; bpf_for(i, 0, 10) { /* this forks verifier state: * - verification of current path continues and * creates a checkpoint after 'if'; * - verification of forked path hits the * checkpoint and marks it as loop_entry. */ if (bpf_get_prandom_u32()) asm volatile (""); /* this marks 'j' as precise, thus any checkpoint * created on current iteration would not be matched * on the next iteration. */ buf[j++] = 42; j %= ARRAY_SIZE(buf); } asm volatile (""::"r"(buf)); return 0; } Memory consumption increased due to more states being marked as loop entries and eventually added to env->free_list. This commit introduces logic to free states from env->free_list during verification. A state in env->free_list can be freed if: - it has no child states; - it is not used as a loop_entry. This commit: - updates bpf_verifier_state->used_as_loop_entry to be a counter that tracks how many states use this one as a loop entry; - adds a function maybe_free_verifier_state(), which: - frees a state if its ->branches and ->used_as_loop_entry counters are both zero; - if the state is freed, state->loop_entry->used_as_loop_entry is decremented, and an attempt is made to free state->loop_entry. In the example above, this approach reduces the maximum number of states in the free list from 369,960 to 16,223. However, this approach has its limitations. If the buf size in the example above is modified to 64, state caching overflows: the state for j=0 is evicted from the cache before it can be used to stop traversal. As a result, states in the free list accumulate because their branch counters do not reach zero. The effect of this patch on the selftests looks as follows: File Program Max free list (A) Max free list (B) Max free list (DIFF) -------------------------------- ------------------------------------ ----------------- ----------------- -------------------- arena_list.bpf.o arena_list_add 17 3 -14 (-82.35%) bpf_iter_task_stack.bpf.o dump_task_stack 39 9 -30 (-76.92%) iters.bpf.o checkpoint_states_deletion 265 89 -176 (-66.42%) iters.bpf.o clean_live_states 19 0 -19 (-100.00%) profiler2.bpf.o tracepoint__syscalls__sys_enter_kill 102 1 -101 (-99.02%) profiler3.bpf.o tracepoint__syscalls__sys_enter_kill 144 0 -144 (-100.00%) pyperf600_iter.bpf.o on_event 15 0 -15 (-100.00%) pyperf600_nounroll.bpf.o on_event 1170 1158 -12 (-1.03%) setget_sockopt.bpf.o skops_sockopt 18 0 -18 (-100.00%) strobemeta_nounroll1.bpf.o on_event 147 83 -64 (-43.54%) strobemeta_nounroll2.bpf.o on_event 312 209 -103 (-33.01%) strobemeta_subprogs.bpf.o on_event 124 86 -38 (-30.65%) test_cls_redirect_subprogs.bpf.o cls_redirect 15 0 -15 (-100.00%) timer.bpf.o test1 30 15 -15 (-50.00%) Measured using "do-not-submit" patches from here: https://github.com/eddyz87/bpf/tree/get-loop-entry-hungup Reported-by: Patrick Somaru <[email protected]> Signed-off-by: Eduard Zingerman <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Alexei Starovoitov <[email protected]>
1 parent 5564ee3 commit 408fcf9

File tree

2 files changed

+75
-30
lines changed

2 files changed

+75
-30
lines changed

include/linux/bpf_verifier.h

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -427,11 +427,6 @@ struct bpf_verifier_state {
427427
bool active_rcu_lock;
428428

429429
bool speculative;
430-
/* If this state was ever pointed-to by other state's loop_entry field
431-
* this flag would be set to true. Used to avoid freeing such states
432-
* while they are still in use.
433-
*/
434-
bool used_as_loop_entry;
435430
bool in_sleepable;
436431

437432
/* first and last insn idx of this verifier state */
@@ -458,6 +453,11 @@ struct bpf_verifier_state {
458453
u32 dfs_depth;
459454
u32 callback_unroll_depth;
460455
u32 may_goto_depth;
456+
/* If this state was ever pointed-to by other state's loop_entry field
457+
* this flag would be set to true. Used to avoid freeing such states
458+
* while they are still in use.
459+
*/
460+
u32 used_as_loop_entry;
461461
};
462462

463463
#define bpf_get_spilled_reg(slot, frame, mask) \
@@ -499,7 +499,9 @@ struct bpf_verifier_state {
499499
struct bpf_verifier_state_list {
500500
struct bpf_verifier_state state;
501501
struct list_head node;
502-
int miss_cnt, hit_cnt;
502+
u32 miss_cnt;
503+
u32 hit_cnt:31;
504+
u32 in_free_list:1;
503505
};
504506

505507
struct bpf_loop_inline_state {

kernel/bpf/verifier.c

Lines changed: 67 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1631,6 +1631,50 @@ static void free_verifier_state(struct bpf_verifier_state *state,
16311631
kfree(state);
16321632
}
16331633

1634+
/* struct bpf_verifier_state->{parent,loop_entry} refer to states
1635+
* that are in either of env->{expored_states,free_list}.
1636+
* In both cases the state is contained in struct bpf_verifier_state_list.
1637+
*/
1638+
static struct bpf_verifier_state_list *state_parent_as_list(struct bpf_verifier_state *st)
1639+
{
1640+
if (st->parent)
1641+
return container_of(st->parent, struct bpf_verifier_state_list, state);
1642+
return NULL;
1643+
}
1644+
1645+
static struct bpf_verifier_state_list *state_loop_entry_as_list(struct bpf_verifier_state *st)
1646+
{
1647+
if (st->loop_entry)
1648+
return container_of(st->loop_entry, struct bpf_verifier_state_list, state);
1649+
return NULL;
1650+
}
1651+
1652+
/* A state can be freed if it is no longer referenced:
1653+
* - is in the env->free_list;
1654+
* - has no children states;
1655+
* - is not used as loop_entry.
1656+
*
1657+
* Freeing a state can make it's loop_entry free-able.
1658+
*/
1659+
static void maybe_free_verifier_state(struct bpf_verifier_env *env,
1660+
struct bpf_verifier_state_list *sl)
1661+
{
1662+
struct bpf_verifier_state_list *loop_entry_sl;
1663+
1664+
while (sl && sl->in_free_list &&
1665+
sl->state.branches == 0 &&
1666+
sl->state.used_as_loop_entry == 0) {
1667+
loop_entry_sl = state_loop_entry_as_list(&sl->state);
1668+
if (loop_entry_sl)
1669+
loop_entry_sl->state.used_as_loop_entry--;
1670+
list_del(&sl->node);
1671+
free_verifier_state(&sl->state, false);
1672+
kfree(sl);
1673+
env->peak_states--;
1674+
sl = loop_entry_sl;
1675+
}
1676+
}
1677+
16341678
/* copy verifier state from src to dst growing dst stack space
16351679
* when necessary to accommodate larger src stack
16361680
*/
@@ -1848,7 +1892,8 @@ static struct bpf_verifier_state *get_loop_entry(struct bpf_verifier_env *env,
18481892
return topmost;
18491893
}
18501894

1851-
static void update_loop_entry(struct bpf_verifier_state *cur, struct bpf_verifier_state *hdr)
1895+
static void update_loop_entry(struct bpf_verifier_env *env,
1896+
struct bpf_verifier_state *cur, struct bpf_verifier_state *hdr)
18521897
{
18531898
/* The hdr->branches check decides between cases B and C in
18541899
* comment for get_loop_entry(). If hdr->branches == 0 then
@@ -1857,13 +1902,20 @@ static void update_loop_entry(struct bpf_verifier_state *cur, struct bpf_verifie
18571902
* no need to update cur->loop_entry.
18581903
*/
18591904
if (hdr->branches && hdr->dfs_depth < (cur->loop_entry ?: cur)->dfs_depth) {
1905+
if (cur->loop_entry) {
1906+
cur->loop_entry->used_as_loop_entry--;
1907+
maybe_free_verifier_state(env, state_loop_entry_as_list(cur));
1908+
}
18601909
cur->loop_entry = hdr;
1861-
hdr->used_as_loop_entry = true;
1910+
hdr->used_as_loop_entry++;
18621911
}
18631912
}
18641913

18651914
static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
18661915
{
1916+
struct bpf_verifier_state_list *sl = NULL, *parent_sl;
1917+
struct bpf_verifier_state *parent;
1918+
18671919
while (st) {
18681920
u32 br = --st->branches;
18691921

@@ -1873,7 +1925,7 @@ static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifi
18731925
* This is a part of 'case A' in get_loop_entry() comment.
18741926
*/
18751927
if (br == 0 && st->parent && st->loop_entry)
1876-
update_loop_entry(st->parent, st->loop_entry);
1928+
update_loop_entry(env, st->parent, st->loop_entry);
18771929

18781930
/* WARN_ON(br > 1) technically makes sense here,
18791931
* but see comment in push_stack(), hence:
@@ -1883,7 +1935,12 @@ static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifi
18831935
br);
18841936
if (br)
18851937
break;
1886-
st = st->parent;
1938+
parent = st->parent;
1939+
parent_sl = state_parent_as_list(st);
1940+
if (sl)
1941+
maybe_free_verifier_state(env, sl);
1942+
st = parent;
1943+
sl = parent_sl;
18871944
}
18881945
}
18891946

@@ -18667,7 +18724,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
1866718724
spi = __get_spi(iter_reg->off + iter_reg->var_off.value);
1866818725
iter_state = &func(env, iter_reg)->stack[spi].spilled_ptr;
1866918726
if (iter_state->iter.state == BPF_ITER_STATE_ACTIVE) {
18670-
update_loop_entry(cur, &sl->state);
18727+
update_loop_entry(env, cur, &sl->state);
1867118728
goto hit;
1867218729
}
1867318730
}
@@ -18676,7 +18733,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
1867618733
if (is_may_goto_insn_at(env, insn_idx)) {
1867718734
if (sl->state.may_goto_depth != cur->may_goto_depth &&
1867818735
states_equal(env, &sl->state, cur, RANGE_WITHIN)) {
18679-
update_loop_entry(cur, &sl->state);
18736+
update_loop_entry(env, cur, &sl->state);
1868018737
goto hit;
1868118738
}
1868218739
}
@@ -18749,7 +18806,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
1874918806
force_exact = loop_entry && loop_entry->branches > 0;
1875018807
if (states_equal(env, &sl->state, cur, force_exact ? RANGE_WITHIN : NOT_EXACT)) {
1875118808
if (force_exact)
18752-
update_loop_entry(cur, loop_entry);
18809+
update_loop_entry(env, cur, loop_entry);
1875318810
hit:
1875418811
sl->hit_cnt++;
1875518812
/* reached equivalent register/stack state,
@@ -18798,24 +18855,10 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
1879818855
/* the state is unlikely to be useful. Remove it to
1879918856
* speed up verification
1880018857
*/
18858+
sl->in_free_list = true;
1880118859
list_del(&sl->node);
18802-
if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE &&
18803-
!sl->state.used_as_loop_entry) {
18804-
u32 br = sl->state.branches;
18805-
18806-
WARN_ONCE(br,
18807-
"BUG live_done but branches_to_explore %d\n",
18808-
br);
18809-
free_verifier_state(&sl->state, false);
18810-
kfree(sl);
18811-
env->peak_states--;
18812-
} else {
18813-
/* cannot free this state, since parentage chain may
18814-
* walk it later. Add it for free_list instead to
18815-
* be freed at the end of verification
18816-
*/
18817-
list_add(&sl->node, &env->free_list);
18818-
}
18860+
list_add(&sl->node, &env->free_list);
18861+
maybe_free_verifier_state(env, sl);
1881918862
}
1882018863
}
1882118864

0 commit comments

Comments
 (0)