@@ -1802,6 +1802,7 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
1802
1802
dst_state->parent = src->parent;
1803
1803
dst_state->first_insn_idx = src->first_insn_idx;
1804
1804
dst_state->last_insn_idx = src->last_insn_idx;
1805
+ dst_state->dfs_depth = src->dfs_depth;
1805
1806
for (i = 0; i <= src->curframe; i++) {
1806
1807
dst = dst_state->frame[i];
1807
1808
if (!dst) {
@@ -7723,6 +7724,81 @@ static int process_iter_arg(struct bpf_verifier_env *env, int regno, int insn_id
7723
7724
return 0;
7724
7725
}
7725
7726
7727
+ /* Look for a previous loop entry at insn_idx: nearest parent state
7728
+ * stopped at insn_idx with callsites matching those in cur->frame.
7729
+ */
7730
+ static struct bpf_verifier_state *find_prev_entry(struct bpf_verifier_env *env,
7731
+ struct bpf_verifier_state *cur,
7732
+ int insn_idx)
7733
+ {
7734
+ struct bpf_verifier_state_list *sl;
7735
+ struct bpf_verifier_state *st;
7736
+
7737
+ /* Explored states are pushed in stack order, most recent states come first */
7738
+ sl = *explored_state(env, insn_idx);
7739
+ for (; sl; sl = sl->next) {
7740
+ /* If st->branches != 0 state is a part of current DFS verification path,
7741
+ * hence cur & st for a loop.
7742
+ */
7743
+ st = &sl->state;
7744
+ if (st->insn_idx == insn_idx && st->branches && same_callsites(st, cur) &&
7745
+ st->dfs_depth < cur->dfs_depth)
7746
+ return st;
7747
+ }
7748
+
7749
+ return NULL;
7750
+ }
7751
+
7752
+ static void reset_idmap_scratch(struct bpf_verifier_env *env);
7753
+ static bool regs_exact(const struct bpf_reg_state *rold,
7754
+ const struct bpf_reg_state *rcur,
7755
+ struct bpf_idmap *idmap);
7756
+
7757
+ static void maybe_widen_reg(struct bpf_verifier_env *env,
7758
+ struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
7759
+ struct bpf_idmap *idmap)
7760
+ {
7761
+ if (rold->type != SCALAR_VALUE)
7762
+ return;
7763
+ if (rold->type != rcur->type)
7764
+ return;
7765
+ if (rold->precise || rcur->precise || regs_exact(rold, rcur, idmap))
7766
+ return;
7767
+ __mark_reg_unknown(env, rcur);
7768
+ }
7769
+
7770
+ static int widen_imprecise_scalars(struct bpf_verifier_env *env,
7771
+ struct bpf_verifier_state *old,
7772
+ struct bpf_verifier_state *cur)
7773
+ {
7774
+ struct bpf_func_state *fold, *fcur;
7775
+ int i, fr;
7776
+
7777
+ reset_idmap_scratch(env);
7778
+ for (fr = old->curframe; fr >= 0; fr--) {
7779
+ fold = old->frame[fr];
7780
+ fcur = cur->frame[fr];
7781
+
7782
+ for (i = 0; i < MAX_BPF_REG; i++)
7783
+ maybe_widen_reg(env,
7784
+ &fold->regs[i],
7785
+ &fcur->regs[i],
7786
+ &env->idmap_scratch);
7787
+
7788
+ for (i = 0; i < fold->allocated_stack / BPF_REG_SIZE; i++) {
7789
+ if (!is_spilled_reg(&fold->stack[i]) ||
7790
+ !is_spilled_reg(&fcur->stack[i]))
7791
+ continue;
7792
+
7793
+ maybe_widen_reg(env,
7794
+ &fold->stack[i].spilled_ptr,
7795
+ &fcur->stack[i].spilled_ptr,
7796
+ &env->idmap_scratch);
7797
+ }
7798
+ }
7799
+ return 0;
7800
+ }
7801
+
7726
7802
/* process_iter_next_call() is called when verifier gets to iterator's next
7727
7803
* "method" (e.g., bpf_iter_num_next() for numbers iterator) call. We'll refer
7728
7804
* to it as just "iter_next()" in comments below.
@@ -7764,25 +7840,47 @@ static int process_iter_arg(struct bpf_verifier_env *env, int regno, int insn_id
7764
7840
* is some statically known limit on number of iterations (e.g., if there is
7765
7841
* an explicit `if n > 100 then break;` statement somewhere in the loop).
7766
7842
*
7767
- * One very subtle but very important aspect is that we *always* simulate NULL
7768
- * condition first (as the current state) before we simulate non-NULL case.
7769
- * This has to do with intricacies of scalar precision tracking. By simulating
7770
- * "exit condition" of iter_next() returning NULL first, we make sure all the
7771
- * relevant precision marks *that will be set **after** we exit iterator loop*
7772
- * are propagated backwards to common parent state of NULL and non-NULL
7773
- * branches. Thanks to that, state equivalence checks done later in forked
7774
- * state, when reaching iter_next() for ACTIVE iterator, can assume that
7775
- * precision marks are finalized and won't change. Because simulating another
7776
- * ACTIVE iterator iteration won't change them (because given same input
7777
- * states we'll end up with exactly same output states which we are currently
7778
- * comparing; and verification after the loop already propagated back what
7779
- * needs to be **additionally** tracked as precise). It's subtle, grok
7780
- * precision tracking for more intuitive understanding.
7843
+ * Iteration convergence logic in is_state_visited() relies on exact
7844
+ * states comparison, which ignores read and precision marks.
7845
+ * This is necessary because read and precision marks are not finalized
7846
+ * while in the loop. Exact comparison might preclude convergence for
7847
+ * simple programs like below:
7848
+ *
7849
+ * i = 0;
7850
+ * while(iter_next(&it))
7851
+ * i++;
7852
+ *
7853
+ * At each iteration step i++ would produce a new distinct state and
7854
+ * eventually instruction processing limit would be reached.
7855
+ *
7856
+ * To avoid such behavior speculatively forget (widen) range for
7857
+ * imprecise scalar registers, if those registers were not precise at the
7858
+ * end of the previous iteration and do not match exactly.
7859
+ *
7860
+ * This is a conservative heuristic that allows to verify wide range of programs,
7861
+ * however it precludes verification of programs that conjure an
7862
+ * imprecise value on the first loop iteration and use it as precise on a second.
7863
+ * For example, the following safe program would fail to verify:
7864
+ *
7865
+ * struct bpf_num_iter it;
7866
+ * int arr[10];
7867
+ * int i = 0, a = 0;
7868
+ * bpf_iter_num_new(&it, 0, 10);
7869
+ * while (bpf_iter_num_next(&it)) {
7870
+ * if (a == 0) {
7871
+ * a = 1;
7872
+ * i = 7; // Because i changed verifier would forget
7873
+ * // it's range on second loop entry.
7874
+ * } else {
7875
+ * arr[i] = 42; // This would fail to verify.
7876
+ * }
7877
+ * }
7878
+ * bpf_iter_num_destroy(&it);
7781
7879
*/
7782
7880
static int process_iter_next_call(struct bpf_verifier_env *env, int insn_idx,
7783
7881
struct bpf_kfunc_call_arg_meta *meta)
7784
7882
{
7785
- struct bpf_verifier_state *cur_st = env->cur_state, *queued_st;
7883
+ struct bpf_verifier_state *cur_st = env->cur_state, *queued_st, *prev_st ;
7786
7884
struct bpf_func_state *cur_fr = cur_st->frame[cur_st->curframe], *queued_fr;
7787
7885
struct bpf_reg_state *cur_iter, *queued_iter;
7788
7886
int iter_frameno = meta->iter.frameno;
@@ -7800,6 +7898,19 @@ static int process_iter_next_call(struct bpf_verifier_env *env, int insn_idx,
7800
7898
}
7801
7899
7802
7900
if (cur_iter->iter.state == BPF_ITER_STATE_ACTIVE) {
7901
+ /* Because iter_next() call is a checkpoint is_state_visitied()
7902
+ * should guarantee parent state with same call sites and insn_idx.
7903
+ */
7904
+ if (!cur_st->parent || cur_st->parent->insn_idx != insn_idx ||
7905
+ !same_callsites(cur_st->parent, cur_st)) {
7906
+ verbose(env, "bug: bad parent state for iter next call");
7907
+ return -EFAULT;
7908
+ }
7909
+ /* Note cur_st->parent in the call below, it is necessary to skip
7910
+ * checkpoint created for cur_st by is_state_visited()
7911
+ * right at this instruction.
7912
+ */
7913
+ prev_st = find_prev_entry(env, cur_st->parent, insn_idx);
7803
7914
/* branch out active iter state */
7804
7915
queued_st = push_stack(env, insn_idx + 1, insn_idx, false);
7805
7916
if (!queued_st)
@@ -7808,6 +7919,8 @@ static int process_iter_next_call(struct bpf_verifier_env *env, int insn_idx,
7808
7919
queued_iter = &queued_st->frame[iter_frameno]->stack[iter_spi].spilled_ptr;
7809
7920
queued_iter->iter.state = BPF_ITER_STATE_ACTIVE;
7810
7921
queued_iter->iter.depth++;
7922
+ if (prev_st)
7923
+ widen_imprecise_scalars(env, prev_st, queued_st);
7811
7924
7812
7925
queued_fr = queued_st->frame[queued_st->curframe];
7813
7926
mark_ptr_not_null_reg(&queued_fr->regs[BPF_REG_0]);
@@ -15948,8 +16061,11 @@ static bool regs_exact(const struct bpf_reg_state *rold,
15948
16061
15949
16062
/* Returns true if (rold safe implies rcur safe) */
15950
16063
static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
15951
- struct bpf_reg_state *rcur, struct bpf_idmap *idmap)
16064
+ struct bpf_reg_state *rcur, struct bpf_idmap *idmap, bool exact )
15952
16065
{
16066
+ if (exact)
16067
+ return regs_exact(rold, rcur, idmap);
16068
+
15953
16069
if (!(rold->live & REG_LIVE_READ))
15954
16070
/* explored state didn't use this */
15955
16071
return true;
@@ -16066,7 +16182,7 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
16066
16182
}
16067
16183
16068
16184
static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
16069
- struct bpf_func_state *cur, struct bpf_idmap *idmap)
16185
+ struct bpf_func_state *cur, struct bpf_idmap *idmap, bool exact )
16070
16186
{
16071
16187
int i, spi;
16072
16188
@@ -16079,7 +16195,12 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
16079
16195
16080
16196
spi = i / BPF_REG_SIZE;
16081
16197
16082
- if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) {
16198
+ if (exact &&
16199
+ old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
16200
+ cur->stack[spi].slot_type[i % BPF_REG_SIZE])
16201
+ return false;
16202
+
16203
+ if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ) && !exact) {
16083
16204
i += BPF_REG_SIZE - 1;
16084
16205
/* explored state didn't use this */
16085
16206
continue;
@@ -16129,7 +16250,7 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
16129
16250
* return false to continue verification of this path
16130
16251
*/
16131
16252
if (!regsafe(env, &old->stack[spi].spilled_ptr,
16132
- &cur->stack[spi].spilled_ptr, idmap))
16253
+ &cur->stack[spi].spilled_ptr, idmap, exact ))
16133
16254
return false;
16134
16255
break;
16135
16256
case STACK_DYNPTR:
@@ -16211,16 +16332,16 @@ static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur,
16211
16332
* the current state will reach 'bpf_exit' instruction safely
16212
16333
*/
16213
16334
static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old,
16214
- struct bpf_func_state *cur)
16335
+ struct bpf_func_state *cur, bool exact )
16215
16336
{
16216
16337
int i;
16217
16338
16218
16339
for (i = 0; i < MAX_BPF_REG; i++)
16219
16340
if (!regsafe(env, &old->regs[i], &cur->regs[i],
16220
- &env->idmap_scratch))
16341
+ &env->idmap_scratch, exact ))
16221
16342
return false;
16222
16343
16223
- if (!stacksafe(env, old, cur, &env->idmap_scratch))
16344
+ if (!stacksafe(env, old, cur, &env->idmap_scratch, exact ))
16224
16345
return false;
16225
16346
16226
16347
if (!refsafe(old, cur, &env->idmap_scratch))
@@ -16229,17 +16350,23 @@ static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_stat
16229
16350
return true;
16230
16351
}
16231
16352
16353
+ static void reset_idmap_scratch(struct bpf_verifier_env *env)
16354
+ {
16355
+ env->idmap_scratch.tmp_id_gen = env->id_gen;
16356
+ memset(&env->idmap_scratch.map, 0, sizeof(env->idmap_scratch.map));
16357
+ }
16358
+
16232
16359
static bool states_equal(struct bpf_verifier_env *env,
16233
16360
struct bpf_verifier_state *old,
16234
- struct bpf_verifier_state *cur)
16361
+ struct bpf_verifier_state *cur,
16362
+ bool exact)
16235
16363
{
16236
16364
int i;
16237
16365
16238
16366
if (old->curframe != cur->curframe)
16239
16367
return false;
16240
16368
16241
- env->idmap_scratch.tmp_id_gen = env->id_gen;
16242
- memset(&env->idmap_scratch.map, 0, sizeof(env->idmap_scratch.map));
16369
+ reset_idmap_scratch(env);
16243
16370
16244
16371
/* Verification state from speculative execution simulation
16245
16372
* must never prune a non-speculative execution one.
@@ -16269,7 +16396,7 @@ static bool states_equal(struct bpf_verifier_env *env,
16269
16396
for (i = 0; i <= old->curframe; i++) {
16270
16397
if (old->frame[i]->callsite != cur->frame[i]->callsite)
16271
16398
return false;
16272
- if (!func_states_equal(env, old->frame[i], cur->frame[i]))
16399
+ if (!func_states_equal(env, old->frame[i], cur->frame[i], exact ))
16273
16400
return false;
16274
16401
}
16275
16402
return true;
@@ -16524,7 +16651,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
16524
16651
struct bpf_verifier_state_list *new_sl;
16525
16652
struct bpf_verifier_state_list *sl, **pprev;
16526
16653
struct bpf_verifier_state *cur = env->cur_state, *new;
16527
- int i, j, err, states_cnt = 0;
16654
+ int i, j, n, err, states_cnt = 0;
16528
16655
bool force_new_state = env->test_state_freq || is_force_checkpoint(env, insn_idx);
16529
16656
bool add_new_state = force_new_state;
16530
16657
@@ -16579,9 +16706,33 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
16579
16706
* It's safe to assume that iterator loop will finish, taking into
16580
16707
* account iter_next() contract of eventually returning
16581
16708
* sticky NULL result.
16709
+ *
16710
+ * Note, that states have to be compared exactly in this case because
16711
+ * read and precision marks might not be finalized inside the loop.
16712
+ * E.g. as in the program below:
16713
+ *
16714
+ * 1. r7 = -16
16715
+ * 2. r6 = bpf_get_prandom_u32()
16716
+ * 3. while (bpf_iter_num_next(&fp[-8])) {
16717
+ * 4. if (r6 != 42) {
16718
+ * 5. r7 = -32
16719
+ * 6. r6 = bpf_get_prandom_u32()
16720
+ * 7. continue
16721
+ * 8. }
16722
+ * 9. r0 = r10
16723
+ * 10. r0 += r7
16724
+ * 11. r8 = *(u64 *)(r0 + 0)
16725
+ * 12. r6 = bpf_get_prandom_u32()
16726
+ * 13. }
16727
+ *
16728
+ * Here verifier would first visit path 1-3, create a checkpoint at 3
16729
+ * with r7=-16, continue to 4-7,3. Existing checkpoint at 3 does
16730
+ * not have read or precision mark for r7 yet, thus inexact states
16731
+ * comparison would discard current state with r7=-32
16732
+ * => unsafe memory access at 11 would not be caught.
16582
16733
*/
16583
16734
if (is_iter_next_insn(env, insn_idx)) {
16584
- if (states_equal(env, &sl->state, cur)) {
16735
+ if (states_equal(env, &sl->state, cur, true )) {
16585
16736
struct bpf_func_state *cur_frame;
16586
16737
struct bpf_reg_state *iter_state, *iter_reg;
16587
16738
int spi;
@@ -16604,7 +16755,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
16604
16755
}
16605
16756
/* attempt to detect infinite loop to avoid unnecessary doomed work */
16606
16757
if (states_maybe_looping(&sl->state, cur) &&
16607
- states_equal(env, &sl->state, cur) &&
16758
+ states_equal(env, &sl->state, cur, false ) &&
16608
16759
!iter_active_depths_differ(&sl->state, cur)) {
16609
16760
verbose_linfo(env, insn_idx, "; ");
16610
16761
verbose(env, "infinite loop detected at insn %d\n", insn_idx);
@@ -16629,7 +16780,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
16629
16780
add_new_state = false;
16630
16781
goto miss;
16631
16782
}
16632
- if (states_equal(env, &sl->state, cur)) {
16783
+ if (states_equal(env, &sl->state, cur, false )) {
16633
16784
hit:
16634
16785
sl->hit_cnt++;
16635
16786
/* reached equivalent register/stack state,
@@ -16668,8 +16819,12 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
16668
16819
* to keep checking from state equivalence point of view.
16669
16820
* Higher numbers increase max_states_per_insn and verification time,
16670
16821
* but do not meaningfully decrease insn_processed.
16822
+ * 'n' controls how many times state could miss before eviction.
16823
+ * Use bigger 'n' for checkpoints because evicting checkpoint states
16824
+ * too early would hinder iterator convergence.
16671
16825
*/
16672
- if (sl->miss_cnt > sl->hit_cnt * 3 + 3) {
16826
+ n = is_force_checkpoint(env, insn_idx) && sl->state.branches > 0 ? 64 : 3;
16827
+ if (sl->miss_cnt > sl->hit_cnt * n + n) {
16673
16828
/* the state is unlikely to be useful. Remove it to
16674
16829
* speed up verification
16675
16830
*/
@@ -16743,6 +16898,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
16743
16898
16744
16899
cur->parent = new;
16745
16900
cur->first_insn_idx = insn_idx;
16901
+ cur->dfs_depth = new->dfs_depth + 1;
16746
16902
clear_jmp_history(cur);
16747
16903
new_sl->next = *explored_state(env, insn_idx);
16748
16904
*explored_state(env, insn_idx) = new_sl;
0 commit comments