Skip to content

Commit 2793a8b

Browse files
eddyz87Alexei Starovoitov
authored andcommitted
bpf: exact states comparison for iterator convergence checks
Convergence for open coded iterators is computed in is_state_visited() by examining states with branches count > 1 and using states_equal(). states_equal() computes sub-state relation using read and precision marks. Read and precision marks are propagated from children states, thus are not guaranteed to be complete inside a loop when branches count > 1. This could be demonstrated using the following unsafe program: 1. r7 = -16 2. r6 = bpf_get_prandom_u32() 3. while (bpf_iter_num_next(&fp[-8])) { 4. if (r6 != 42) { 5. r7 = -32 6. r6 = bpf_get_prandom_u32() 7. continue 8. } 9. r0 = r10 10. r0 += r7 11. r8 = *(u64 *)(r0 + 0) 12. r6 = bpf_get_prandom_u32() 13. } Here verifier would first visit path 1-3, create a checkpoint at 3 with r7=-16, continue to 4-7,3 with r7=-32. Because instructions at 9-12 had not been visitied yet existing checkpoint at 3 does not have read or precision mark for r7. Thus states_equal() would return true and verifier would discard current state, thus unsafe memory access at 11 would not be caught. This commit fixes this loophole by introducing exact state comparisons for iterator convergence logic: - registers are compared using regs_exact() regardless of read or precision marks; - stack slots have to have identical type. Unfortunately, this is too strict even for simple programs like below: i = 0; while(iter_next(&it)) i++; At each iteration step i++ would produce a new distinct state and eventually instruction processing limit would be reached. To avoid such behavior speculatively forget (widen) range for imprecise scalar registers, if those registers were not precise at the end of the previous iteration and do not match exactly. This a conservative heuristic that allows to verify wide range of programs, however it precludes verification of programs that conjure an imprecise value on the first loop iteration and use it as precise on the second. Test case iter_task_vma_for_each() presents one of such cases: unsigned int seen = 0; ... bpf_for_each(task_vma, vma, task, 0) { if (seen >= 1000) break; ... seen++; } Here clang generates the following code: <LBB0_4>: 24: r8 = r6 ; stash current value of ... body ... 'seen' 29: r1 = r10 30: r1 += -0x8 31: call bpf_iter_task_vma_next 32: r6 += 0x1 ; seen++; 33: if r0 == 0x0 goto +0x2 <LBB0_6> ; exit on next() == NULL 34: r7 += 0x10 35: if r8 < 0x3e7 goto -0xc <LBB0_4> ; loop on seen < 1000 <LBB0_6>: ... exit ... Note that counter in r6 is copied to r8 and then incremented, conditional jump is done using r8. Because of this precision mark for r6 lags one state behind of precision mark on r8 and widening logic kicks in. Adding barrier_var(seen) after conditional is sufficient to force clang use the same register for both counting and conditional jump. This issue was discussed in the thread [1] which was started by Andrew Werner <[email protected]> demonstrating a similar bug in callback functions handling. The callbacks would be addressed in a followup patch. [1] https://lore.kernel.org/bpf/[email protected]/ Co-developed-by: Andrii Nakryiko <[email protected]> Co-developed-by: Alexei Starovoitov <[email protected]> Signed-off-by: Eduard Zingerman <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Alexei Starovoitov <[email protected]>
1 parent 4c97259 commit 2793a8b

File tree

3 files changed

+189
-31
lines changed

3 files changed

+189
-31
lines changed

include/linux/bpf_verifier.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,7 @@ struct bpf_verifier_state {
384384
*/
385385
struct bpf_idx_pair *jmp_history;
386386
u32 jmp_history_cnt;
387+
u32 dfs_depth;
387388
};
388389

389390
#define bpf_get_spilled_reg(slot, frame, mask) \

kernel/bpf/verifier.c

Lines changed: 187 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1802,6 +1802,7 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
18021802
dst_state->parent = src->parent;
18031803
dst_state->first_insn_idx = src->first_insn_idx;
18041804
dst_state->last_insn_idx = src->last_insn_idx;
1805+
dst_state->dfs_depth = src->dfs_depth;
18051806
for (i = 0; i <= src->curframe; i++) {
18061807
dst = dst_state->frame[i];
18071808
if (!dst) {
@@ -7723,6 +7724,81 @@ static int process_iter_arg(struct bpf_verifier_env *env, int regno, int insn_id
77237724
return 0;
77247725
}
77257726

7727+
/* Look for a previous loop entry at insn_idx: nearest parent state
7728+
* stopped at insn_idx with callsites matching those in cur->frame.
7729+
*/
7730+
static struct bpf_verifier_state *find_prev_entry(struct bpf_verifier_env *env,
7731+
struct bpf_verifier_state *cur,
7732+
int insn_idx)
7733+
{
7734+
struct bpf_verifier_state_list *sl;
7735+
struct bpf_verifier_state *st;
7736+
7737+
/* Explored states are pushed in stack order, most recent states come first */
7738+
sl = *explored_state(env, insn_idx);
7739+
for (; sl; sl = sl->next) {
7740+
/* If st->branches != 0 state is a part of current DFS verification path,
7741+
* hence cur & st for a loop.
7742+
*/
7743+
st = &sl->state;
7744+
if (st->insn_idx == insn_idx && st->branches && same_callsites(st, cur) &&
7745+
st->dfs_depth < cur->dfs_depth)
7746+
return st;
7747+
}
7748+
7749+
return NULL;
7750+
}
7751+
7752+
static void reset_idmap_scratch(struct bpf_verifier_env *env);
7753+
static bool regs_exact(const struct bpf_reg_state *rold,
7754+
const struct bpf_reg_state *rcur,
7755+
struct bpf_idmap *idmap);
7756+
7757+
static void maybe_widen_reg(struct bpf_verifier_env *env,
7758+
struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
7759+
struct bpf_idmap *idmap)
7760+
{
7761+
if (rold->type != SCALAR_VALUE)
7762+
return;
7763+
if (rold->type != rcur->type)
7764+
return;
7765+
if (rold->precise || rcur->precise || regs_exact(rold, rcur, idmap))
7766+
return;
7767+
__mark_reg_unknown(env, rcur);
7768+
}
7769+
7770+
static int widen_imprecise_scalars(struct bpf_verifier_env *env,
7771+
struct bpf_verifier_state *old,
7772+
struct bpf_verifier_state *cur)
7773+
{
7774+
struct bpf_func_state *fold, *fcur;
7775+
int i, fr;
7776+
7777+
reset_idmap_scratch(env);
7778+
for (fr = old->curframe; fr >= 0; fr--) {
7779+
fold = old->frame[fr];
7780+
fcur = cur->frame[fr];
7781+
7782+
for (i = 0; i < MAX_BPF_REG; i++)
7783+
maybe_widen_reg(env,
7784+
&fold->regs[i],
7785+
&fcur->regs[i],
7786+
&env->idmap_scratch);
7787+
7788+
for (i = 0; i < fold->allocated_stack / BPF_REG_SIZE; i++) {
7789+
if (!is_spilled_reg(&fold->stack[i]) ||
7790+
!is_spilled_reg(&fcur->stack[i]))
7791+
continue;
7792+
7793+
maybe_widen_reg(env,
7794+
&fold->stack[i].spilled_ptr,
7795+
&fcur->stack[i].spilled_ptr,
7796+
&env->idmap_scratch);
7797+
}
7798+
}
7799+
return 0;
7800+
}
7801+
77267802
/* process_iter_next_call() is called when verifier gets to iterator's next
77277803
* "method" (e.g., bpf_iter_num_next() for numbers iterator) call. We'll refer
77287804
* to it as just "iter_next()" in comments below.
@@ -7764,25 +7840,47 @@ static int process_iter_arg(struct bpf_verifier_env *env, int regno, int insn_id
77647840
* is some statically known limit on number of iterations (e.g., if there is
77657841
* an explicit `if n > 100 then break;` statement somewhere in the loop).
77667842
*
7767-
* One very subtle but very important aspect is that we *always* simulate NULL
7768-
* condition first (as the current state) before we simulate non-NULL case.
7769-
* This has to do with intricacies of scalar precision tracking. By simulating
7770-
* "exit condition" of iter_next() returning NULL first, we make sure all the
7771-
* relevant precision marks *that will be set **after** we exit iterator loop*
7772-
* are propagated backwards to common parent state of NULL and non-NULL
7773-
* branches. Thanks to that, state equivalence checks done later in forked
7774-
* state, when reaching iter_next() for ACTIVE iterator, can assume that
7775-
* precision marks are finalized and won't change. Because simulating another
7776-
* ACTIVE iterator iteration won't change them (because given same input
7777-
* states we'll end up with exactly same output states which we are currently
7778-
* comparing; and verification after the loop already propagated back what
7779-
* needs to be **additionally** tracked as precise). It's subtle, grok
7780-
* precision tracking for more intuitive understanding.
7843+
* Iteration convergence logic in is_state_visited() relies on exact
7844+
* states comparison, which ignores read and precision marks.
7845+
* This is necessary because read and precision marks are not finalized
7846+
* while in the loop. Exact comparison might preclude convergence for
7847+
* simple programs like below:
7848+
*
7849+
* i = 0;
7850+
* while(iter_next(&it))
7851+
* i++;
7852+
*
7853+
* At each iteration step i++ would produce a new distinct state and
7854+
* eventually instruction processing limit would be reached.
7855+
*
7856+
* To avoid such behavior speculatively forget (widen) range for
7857+
* imprecise scalar registers, if those registers were not precise at the
7858+
* end of the previous iteration and do not match exactly.
7859+
*
7860+
* This is a conservative heuristic that allows to verify wide range of programs,
7861+
* however it precludes verification of programs that conjure an
7862+
* imprecise value on the first loop iteration and use it as precise on a second.
7863+
* For example, the following safe program would fail to verify:
7864+
*
7865+
* struct bpf_num_iter it;
7866+
* int arr[10];
7867+
* int i = 0, a = 0;
7868+
* bpf_iter_num_new(&it, 0, 10);
7869+
* while (bpf_iter_num_next(&it)) {
7870+
* if (a == 0) {
7871+
* a = 1;
7872+
* i = 7; // Because i changed verifier would forget
7873+
* // it's range on second loop entry.
7874+
* } else {
7875+
* arr[i] = 42; // This would fail to verify.
7876+
* }
7877+
* }
7878+
* bpf_iter_num_destroy(&it);
77817879
*/
77827880
static int process_iter_next_call(struct bpf_verifier_env *env, int insn_idx,
77837881
struct bpf_kfunc_call_arg_meta *meta)
77847882
{
7785-
struct bpf_verifier_state *cur_st = env->cur_state, *queued_st;
7883+
struct bpf_verifier_state *cur_st = env->cur_state, *queued_st, *prev_st;
77867884
struct bpf_func_state *cur_fr = cur_st->frame[cur_st->curframe], *queued_fr;
77877885
struct bpf_reg_state *cur_iter, *queued_iter;
77887886
int iter_frameno = meta->iter.frameno;
@@ -7800,6 +7898,19 @@ static int process_iter_next_call(struct bpf_verifier_env *env, int insn_idx,
78007898
}
78017899

78027900
if (cur_iter->iter.state == BPF_ITER_STATE_ACTIVE) {
7901+
/* Because iter_next() call is a checkpoint is_state_visitied()
7902+
* should guarantee parent state with same call sites and insn_idx.
7903+
*/
7904+
if (!cur_st->parent || cur_st->parent->insn_idx != insn_idx ||
7905+
!same_callsites(cur_st->parent, cur_st)) {
7906+
verbose(env, "bug: bad parent state for iter next call");
7907+
return -EFAULT;
7908+
}
7909+
/* Note cur_st->parent in the call below, it is necessary to skip
7910+
* checkpoint created for cur_st by is_state_visited()
7911+
* right at this instruction.
7912+
*/
7913+
prev_st = find_prev_entry(env, cur_st->parent, insn_idx);
78037914
/* branch out active iter state */
78047915
queued_st = push_stack(env, insn_idx + 1, insn_idx, false);
78057916
if (!queued_st)
@@ -7808,6 +7919,8 @@ static int process_iter_next_call(struct bpf_verifier_env *env, int insn_idx,
78087919
queued_iter = &queued_st->frame[iter_frameno]->stack[iter_spi].spilled_ptr;
78097920
queued_iter->iter.state = BPF_ITER_STATE_ACTIVE;
78107921
queued_iter->iter.depth++;
7922+
if (prev_st)
7923+
widen_imprecise_scalars(env, prev_st, queued_st);
78117924

78127925
queued_fr = queued_st->frame[queued_st->curframe];
78137926
mark_ptr_not_null_reg(&queued_fr->regs[BPF_REG_0]);
@@ -15948,8 +16061,11 @@ static bool regs_exact(const struct bpf_reg_state *rold,
1594816061

1594916062
/* Returns true if (rold safe implies rcur safe) */
1595016063
static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
15951-
struct bpf_reg_state *rcur, struct bpf_idmap *idmap)
16064+
struct bpf_reg_state *rcur, struct bpf_idmap *idmap, bool exact)
1595216065
{
16066+
if (exact)
16067+
return regs_exact(rold, rcur, idmap);
16068+
1595316069
if (!(rold->live & REG_LIVE_READ))
1595416070
/* explored state didn't use this */
1595516071
return true;
@@ -16066,7 +16182,7 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
1606616182
}
1606716183

1606816184
static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
16069-
struct bpf_func_state *cur, struct bpf_idmap *idmap)
16185+
struct bpf_func_state *cur, struct bpf_idmap *idmap, bool exact)
1607016186
{
1607116187
int i, spi;
1607216188

@@ -16079,7 +16195,12 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
1607916195

1608016196
spi = i / BPF_REG_SIZE;
1608116197

16082-
if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) {
16198+
if (exact &&
16199+
old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
16200+
cur->stack[spi].slot_type[i % BPF_REG_SIZE])
16201+
return false;
16202+
16203+
if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ) && !exact) {
1608316204
i += BPF_REG_SIZE - 1;
1608416205
/* explored state didn't use this */
1608516206
continue;
@@ -16129,7 +16250,7 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
1612916250
* return false to continue verification of this path
1613016251
*/
1613116252
if (!regsafe(env, &old->stack[spi].spilled_ptr,
16132-
&cur->stack[spi].spilled_ptr, idmap))
16253+
&cur->stack[spi].spilled_ptr, idmap, exact))
1613316254
return false;
1613416255
break;
1613516256
case STACK_DYNPTR:
@@ -16211,16 +16332,16 @@ static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur,
1621116332
* the current state will reach 'bpf_exit' instruction safely
1621216333
*/
1621316334
static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old,
16214-
struct bpf_func_state *cur)
16335+
struct bpf_func_state *cur, bool exact)
1621516336
{
1621616337
int i;
1621716338

1621816339
for (i = 0; i < MAX_BPF_REG; i++)
1621916340
if (!regsafe(env, &old->regs[i], &cur->regs[i],
16220-
&env->idmap_scratch))
16341+
&env->idmap_scratch, exact))
1622116342
return false;
1622216343

16223-
if (!stacksafe(env, old, cur, &env->idmap_scratch))
16344+
if (!stacksafe(env, old, cur, &env->idmap_scratch, exact))
1622416345
return false;
1622516346

1622616347
if (!refsafe(old, cur, &env->idmap_scratch))
@@ -16229,17 +16350,23 @@ static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_stat
1622916350
return true;
1623016351
}
1623116352

16353+
static void reset_idmap_scratch(struct bpf_verifier_env *env)
16354+
{
16355+
env->idmap_scratch.tmp_id_gen = env->id_gen;
16356+
memset(&env->idmap_scratch.map, 0, sizeof(env->idmap_scratch.map));
16357+
}
16358+
1623216359
static bool states_equal(struct bpf_verifier_env *env,
1623316360
struct bpf_verifier_state *old,
16234-
struct bpf_verifier_state *cur)
16361+
struct bpf_verifier_state *cur,
16362+
bool exact)
1623516363
{
1623616364
int i;
1623716365

1623816366
if (old->curframe != cur->curframe)
1623916367
return false;
1624016368

16241-
env->idmap_scratch.tmp_id_gen = env->id_gen;
16242-
memset(&env->idmap_scratch.map, 0, sizeof(env->idmap_scratch.map));
16369+
reset_idmap_scratch(env);
1624316370

1624416371
/* Verification state from speculative execution simulation
1624516372
* must never prune a non-speculative execution one.
@@ -16269,7 +16396,7 @@ static bool states_equal(struct bpf_verifier_env *env,
1626916396
for (i = 0; i <= old->curframe; i++) {
1627016397
if (old->frame[i]->callsite != cur->frame[i]->callsite)
1627116398
return false;
16272-
if (!func_states_equal(env, old->frame[i], cur->frame[i]))
16399+
if (!func_states_equal(env, old->frame[i], cur->frame[i], exact))
1627316400
return false;
1627416401
}
1627516402
return true;
@@ -16524,7 +16651,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
1652416651
struct bpf_verifier_state_list *new_sl;
1652516652
struct bpf_verifier_state_list *sl, **pprev;
1652616653
struct bpf_verifier_state *cur = env->cur_state, *new;
16527-
int i, j, err, states_cnt = 0;
16654+
int i, j, n, err, states_cnt = 0;
1652816655
bool force_new_state = env->test_state_freq || is_force_checkpoint(env, insn_idx);
1652916656
bool add_new_state = force_new_state;
1653016657

@@ -16579,9 +16706,33 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
1657916706
* It's safe to assume that iterator loop will finish, taking into
1658016707
* account iter_next() contract of eventually returning
1658116708
* sticky NULL result.
16709+
*
16710+
* Note, that states have to be compared exactly in this case because
16711+
* read and precision marks might not be finalized inside the loop.
16712+
* E.g. as in the program below:
16713+
*
16714+
* 1. r7 = -16
16715+
* 2. r6 = bpf_get_prandom_u32()
16716+
* 3. while (bpf_iter_num_next(&fp[-8])) {
16717+
* 4. if (r6 != 42) {
16718+
* 5. r7 = -32
16719+
* 6. r6 = bpf_get_prandom_u32()
16720+
* 7. continue
16721+
* 8. }
16722+
* 9. r0 = r10
16723+
* 10. r0 += r7
16724+
* 11. r8 = *(u64 *)(r0 + 0)
16725+
* 12. r6 = bpf_get_prandom_u32()
16726+
* 13. }
16727+
*
16728+
* Here verifier would first visit path 1-3, create a checkpoint at 3
16729+
* with r7=-16, continue to 4-7,3. Existing checkpoint at 3 does
16730+
* not have read or precision mark for r7 yet, thus inexact states
16731+
* comparison would discard current state with r7=-32
16732+
* => unsafe memory access at 11 would not be caught.
1658216733
*/
1658316734
if (is_iter_next_insn(env, insn_idx)) {
16584-
if (states_equal(env, &sl->state, cur)) {
16735+
if (states_equal(env, &sl->state, cur, true)) {
1658516736
struct bpf_func_state *cur_frame;
1658616737
struct bpf_reg_state *iter_state, *iter_reg;
1658716738
int spi;
@@ -16604,7 +16755,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
1660416755
}
1660516756
/* attempt to detect infinite loop to avoid unnecessary doomed work */
1660616757
if (states_maybe_looping(&sl->state, cur) &&
16607-
states_equal(env, &sl->state, cur) &&
16758+
states_equal(env, &sl->state, cur, false) &&
1660816759
!iter_active_depths_differ(&sl->state, cur)) {
1660916760
verbose_linfo(env, insn_idx, "; ");
1661016761
verbose(env, "infinite loop detected at insn %d\n", insn_idx);
@@ -16629,7 +16780,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
1662916780
add_new_state = false;
1663016781
goto miss;
1663116782
}
16632-
if (states_equal(env, &sl->state, cur)) {
16783+
if (states_equal(env, &sl->state, cur, false)) {
1663316784
hit:
1663416785
sl->hit_cnt++;
1663516786
/* reached equivalent register/stack state,
@@ -16668,8 +16819,12 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
1666816819
* to keep checking from state equivalence point of view.
1666916820
* Higher numbers increase max_states_per_insn and verification time,
1667016821
* but do not meaningfully decrease insn_processed.
16822+
* 'n' controls how many times state could miss before eviction.
16823+
* Use bigger 'n' for checkpoints because evicting checkpoint states
16824+
* too early would hinder iterator convergence.
1667116825
*/
16672-
if (sl->miss_cnt > sl->hit_cnt * 3 + 3) {
16826+
n = is_force_checkpoint(env, insn_idx) && sl->state.branches > 0 ? 64 : 3;
16827+
if (sl->miss_cnt > sl->hit_cnt * n + n) {
1667316828
/* the state is unlikely to be useful. Remove it to
1667416829
* speed up verification
1667516830
*/
@@ -16743,6 +16898,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
1674316898

1674416899
cur->parent = new;
1674516900
cur->first_insn_idx = insn_idx;
16901+
cur->dfs_depth = new->dfs_depth + 1;
1674616902
clear_jmp_history(cur);
1674716903
new_sl->next = *explored_state(env, insn_idx);
1674816904
*explored_state(env, insn_idx) = new_sl;

tools/testing/selftests/bpf/progs/iters_task_vma.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ int iter_task_vma_for_each(const void *ctx)
3030
bpf_for_each(task_vma, vma, task, 0) {
3131
if (seen >= 1000)
3232
break;
33+
barrier_var(seen);
3334

3435
vm_ranges[seen].vm_start = vma->vm_start;
3536
vm_ranges[seen].vm_end = vma->vm_end;

0 commit comments

Comments
 (0)