From 1cabd6e848c07fbc7cdb96c23da60bea355af9fe Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Thu, 6 Mar 2025 21:07:31 -0800 Subject: [PATCH 1/6] bpf: compute SCCs in program control flow graph Compute strongly connected components in the program CFG. Assign an SCC number to each instruction, recorded in env->insn_aux[*].scc. Use Tarjan's algorithm for SCC computation adapted to run non-recursively. For debug purposes print out computed SCCs as a part of full program dump in compute_live_registers() at log level 2, e.g.: func#0 @0 Live regs before insn: 0: .......... (b4) w6 = 10 2 1: ......6... (18) r1 = 0xffff88810bbb5565 2 3: .1....6... (b4) w2 = 2 2 4: .12...6... (85) call bpf_trace_printk#6 2 5: ......6... (04) w6 += -1 2 6: ......6... (56) if w6 != 0x0 goto pc-6 7: .......... (b4) w6 = 5 1 8: ......6... (18) r1 = 0xffff88810bbb5567 1 10: .1....6... (b4) w2 = 2 1 11: .12...6... (85) call bpf_trace_printk#6 1 12: ......6... (04) w6 += -1 1 13: ......6... (56) if w6 != 0x0 goto pc-6 14: .......... (b4) w0 = 0 15: 0......... (95) exit ^^^ SCC number for the instruction Signed-off-by: Eduard Zingerman --- include/linux/bpf_verifier.h | 5 + kernel/bpf/verifier.c | 182 +++++++++++++++++++++++++++++++++++ 2 files changed, 187 insertions(+) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 9734544b6957c..5f6808d5a154b 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -604,6 +604,11 @@ struct bpf_insn_aux_data { * accepts callback function as a parameter. */ bool calls_callback; + /* + * CFG strongly connected component this instruction belongs to, + * zero if it is a singleton SCC. + */ + u32 scc; /* registers alive before this instruction. */ u16 live_regs_before; }; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 54c6953a8b84c..fa0707367e7d7 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -23880,6 +23880,10 @@ static int compute_live_registers(struct bpf_verifier_env *env) if (env->log.level & BPF_LOG_LEVEL2) { verbose(env, "Live regs before insn:\n"); for (i = 0; i < insn_cnt; ++i) { + if (env->insn_aux_data[i].scc) + verbose(env, "%3d ", env->insn_aux_data[i].scc); + else + verbose(env, " "); verbose(env, "%3d: ", i); for (j = BPF_REG_0; j < BPF_REG_10; ++j) if (insn_aux[i].live_regs_before & BIT(j)) @@ -23901,6 +23905,180 @@ static int compute_live_registers(struct bpf_verifier_env *env) return err; } +/* + * Compute strongly connected components (SCCs) on the CFG. + * Assign an SCC number to each instruction, recorded in env->insn_aux[*].scc. + * If instruction is a sole member of its SCC and there are no self edges, + * assign it SCC number of zero. + * Uses a non-recursive adaptation of Tarjan's algorithm for SCC computation. + */ +static int compute_scc(struct bpf_verifier_env *env) +{ + const u32 NOT_ON_STACK = U32_MAX; + + struct bpf_insn_aux_data *aux = env->insn_aux_data; + const u32 insn_cnt = env->prog->len; + int stack_sz, dfs_sz, err = 0; + u32 *stack, *pre, *low, *dfs; + u32 succ_cnt, i, j, t, w; + u32 next_preorder_num; + u32 next_scc_id; + bool assign_scc; + u32 succ[2]; + + next_preorder_num = 1; + next_scc_id = 1; + /* + * - 'stack' accumulates vertices in DFS order, see invariant comment below; + * - 'pre[t] == p' => preorder number of vertex 't' is 'p'; + * - 'low[t] == n' => smallest preorder number of the vertex reachable from 't' is 'n'; + * - 'dfs' DFS traversal stack, used to emulate explicit recursion. + */ + stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL); + pre = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL); + low = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL); + dfs = kvcalloc(insn_cnt, sizeof(*dfs), GFP_KERNEL); + if (!stack || !pre || !low || !dfs) { + err = -ENOMEM; + goto exit; + } + /* + * References: + * [1] R. Tarjan "Depth-First Search and Linear Graph Algorithms" + * [2] D. J. Pearce "A Space-Efficient Algorithm for Finding Strongly Connected Components" + * + * The algorithm maintains the following invariant: + * - suppose there is a path 'u' ~> 'v', such that 'pre[v] < pre[u]'; + * - then, vertex 'u' remains on stack while vertex 'v' is on stack. + * + * Consequently: + * - If 'low[v] < pre[v]', there is a path from 'v' to some vertex 'u', + * such that 'pre[u] == low[v]'; vertex 'u' is currently on the stack, + * and thus there is an SCC (loop) containing both 'u' and 'v'. + * - If 'low[v] == pre[v]', loops containing 'v' have been explored, + * and 'v' can be considered the root of some SCC. + * + * Here is a pseudo-code for an explicitly recursive version of the algorithm: + * + * NOT_ON_STACK = insn_cnt + 1 + * pre = [0] * insn_cnt + * low = [0] * insn_cnt + * scc = [0] * insn_cnt + * stack = [] + * + * next_preorder_num = 1 + * next_scc_id = 1 + * + * def recur(w): + * nonlocal next_preorder_num + * nonlocal next_scc_id + * + * pre[w] = next_preorder_num + * low[w] = next_preorder_num + * next_preorder_num += 1 + * stack.append(w) + * for s in successors(w): + * # Note: for classic algorithm the block below should look as: + * # + * # if pre[s] == 0: + * # recur(s) + * # low[w] = min(low[w], low[s]) + * # elif low[s] != NOT_ON_STACK: + * # low[w] = min(low[w], pre[s]) + * # + * # But replacing both 'min' instructions with 'low[w] = min(low[w], low[s])' + * # does not break the invariant and makes itartive version of the algorithm + * # simpler. See 'Algorithm #3' from [2]. + * + * # 's' not yet visited + * if pre[s] == 0: + * recur(s) + * # if 's' is on stack, pick lowest reachable preorder number from it; + * # if 's' is not on stack 'low[s] == NOT_ON_STACK > low[w]', + * # so 'min' would be a noop. + * low[w] = min(low[w], low[s]) + * + * if low[w] == pre[w]: + * # 'w' is the root of an SCC, pop all vertices + * # below 'w' on stack and assign same SCC to them. + * while True: + * t = stack.pop() + * low[t] = NOT_ON_STACK + * scc[t] = next_scc_id + * if t == w: + * break + * next_scc_id += 1 + * + * for i in range(0, insn_cnt): + * if pre[i] == 0: + * recur(i) + * + * Below implementation replaces explicit recusion with array 'dfs'. + */ + for (i = 0; i < insn_cnt; i++) { + if (pre[i]) + continue; + stack_sz = 0; + dfs_sz = 1; + dfs[0] = i; +dfs_continue: + while (dfs_sz) { + w = dfs[dfs_sz - 1]; + if (pre[w] == 0) { + low[w] = next_preorder_num; + pre[w] = next_preorder_num; + next_preorder_num++; + stack[stack_sz++] = w; + } + /* Visit 'w' successors */ + succ_cnt = insn_successors(env->prog, w, succ); + for (j = 0; j < succ_cnt; ++j) { + if (pre[succ[j]]) { + low[w] = min(low[w], low[succ[j]]); + } else { + dfs[dfs_sz++] = succ[j]; + goto dfs_continue; + } + } + /* + * Preserve the invariant: if some vertex above in the stack + * is reachable from 'w', keep 'w' on the stack. + */ + if (low[w] < pre[w]) { + dfs_sz--; + goto dfs_continue; + } + /* + * Assign SCC number only if component has two or more elements, + * or if component has a self reference. + */ + assign_scc = stack[stack_sz - 1] != w; + for (j = 0; j < succ_cnt; ++j) { + if (succ[j] == w) { + assign_scc = true; + break; + } + } + /* Pop component elements from stack */ + do { + t = stack[--stack_sz]; + low[t] = NOT_ON_STACK; + if (assign_scc) + aux[t].scc = next_scc_id; + } while (t != w); + if (assign_scc) + next_scc_id++; + dfs_sz--; + } + } +exit: + kvfree(stack); + kvfree(pre); + kvfree(low); + kvfree(dfs); + return err; +} + int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size) { u64 start_time = ktime_get_ns(); @@ -24022,6 +24200,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 if (ret) goto skip_full_check; + ret = compute_scc(env); + if (ret < 0) + goto skip_full_check; + ret = compute_live_registers(env); if (ret < 0) goto skip_full_check; From 4ccdc3b8f7b5e0d3ad2277639e5a6605689416c5 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 26 Apr 2025 02:37:20 -0700 Subject: [PATCH 2/6] bpf: frame_insn_idx() utility function A function to return IP for a given frame in a call stack of a state. Will be used by a next patch. Signed-off-by: Eduard Zingerman --- kernel/bpf/verifier.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index fa0707367e7d7..6fa9cfcc12804 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1798,6 +1798,14 @@ static bool same_callsites(struct bpf_verifier_state *a, struct bpf_verifier_sta return true; } +/* Return IP for a given frame in a call stack */ +static u32 frame_insn_idx(struct bpf_verifier_state *st, u32 frame) +{ + return frame == st->curframe + ? st->insn_idx + : st->frame[frame + 1]->callsite; +} + /* Open coded iterators allow back-edges in the state graph in order to * check unbounded loops that iterators. * @@ -18731,9 +18739,7 @@ static bool states_equal(struct bpf_verifier_env *env, * and all frame states need to be equivalent */ for (i = 0; i <= old->curframe; i++) { - insn_idx = i == old->curframe - ? env->insn_idx - : old->frame[i + 1]->callsite; + insn_idx = frame_insn_idx(old, i); if (old->frame[i]->callsite != cur->frame[i]->callsite) return false; if (!func_states_equal(env, old->frame[i], cur->frame[i], insn_idx, exact)) From c365b17bf410a326f34acfd490dfc497d5efa074 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Tue, 29 Apr 2025 15:28:04 -0700 Subject: [PATCH 3/6] bpf: move REG_LIVE_DONE check to clean_live_states() The next patch would add some relatively heavy-weight operation to clean_live_states(), this operation can be skipped if REG_LIVE_DONE is set. Move the check from clean_verifier_state() to clean_verifier_state() as a small refactoring commit. Signed-off-by: Eduard Zingerman --- kernel/bpf/verifier.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 6fa9cfcc12804..4698e7794779f 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -18235,10 +18235,6 @@ static void clean_verifier_state(struct bpf_verifier_env *env, { int i; - if (st->frame[0]->regs[0].live & REG_LIVE_DONE) - /* all regs in this state in all frames were already marked */ - return; - for (i = 0; i <= st->curframe; i++) clean_func_state(env, st->frame[i]); } @@ -18293,6 +18289,9 @@ static void clean_live_states(struct bpf_verifier_env *env, int insn, if (sl->state.insn_idx != insn || !same_callsites(&sl->state, cur)) continue; + if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) + /* all regs in this state in all frames were already marked */ + continue; clean_verifier_state(env, &sl->state); } } From 49f23f4a0729aab9dcabd8e193822fa3d8f2127e Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Tue, 29 Apr 2025 16:59:49 -0700 Subject: [PATCH 4/6] bpf: use SCC info instead of loop_entry Replace loop_entry-based exact states comparison logic. Instead, for states within an iterator based loop, mark all registers as read and precise. Use control flow graph strongly connected components information to detect states that are members of a loop. See comments for mark_all_regs_read_and_precise() for a detailed explanation. This change addresses the cases described in [1]. These cases can be illustrated with the following diagram: .-> A --. Assume the states are visited in the order A, B, C. | | | Assume that state B reaches a state equivalent to state A. | v v At this point, state C is not processed yet, so state A '-- B C has not received any read or precision marks from C. As a result, these marks won't be propagated to B. If B has incomplete marks, it is unsafe to use it in states_equal() checks. See selftests later in a series for examples of unsafe programs that are not detected without this change. [1] https://lore.kernel.org/bpf/3c6ac16b7578406e2ddd9ba889ce955748fe636b.camel@gmail.com/ Fixes: 2a0992829ea3 ("bpf: correct loop detection for iterators convergence") Signed-off-by: Eduard Zingerman --- include/linux/bpf_verifier.h | 26 ++++ kernel/bpf/verifier.c | 257 ++++++++++++++++++++++++++++------- 2 files changed, 233 insertions(+), 50 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 5f6808d5a154b..07a65db598172 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -471,6 +471,11 @@ struct bpf_verifier_state { * while they are still in use. */ u32 used_as_loop_entry; + /* + * If this state is a checkpoint at insn_idx that belongs to an SCC, + * record the SCC epoch at the time of checkpoint creation. + */ + u32 scc_epoch; }; #define bpf_get_spilled_reg(slot, frame, mask) \ @@ -718,6 +723,25 @@ struct bpf_idset { u32 ids[BPF_ID_MAP_SIZE]; }; +/* Information tracked for CFG strongly connected components */ +struct bpf_scc_info { + /* + * Set to true when is_state_visited() detected convergence of + * open coded iterator for a state with insn_idx within this SCC. + * Indicates that read and precision marks are incomplete for + * states with insn_idx in this SCC. + */ + bool state_loops_possible; + /* + * Number of times this SCC was entered by some verifier state + * and that state was fully explored. + * In other words, number of times .branches became non-zero + * and then zero again. + */ + u32 scc_epoch; + struct bpf_verifier_state *entry_state; +}; + /* single container for all structs * one verifier_env per bpf_check() call */ @@ -810,6 +834,8 @@ struct bpf_verifier_env { u64 prev_log_pos, prev_insn_print_pos; /* buffer used to temporary hold constants as scalar registers */ struct bpf_reg_state fake_reg[2]; + struct bpf_scc_info *scc_info; + u32 num_sccs; /* buffer used to generate temporary string representations, * e.g., in reg_type_str() to generate reg_type string */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 4698e7794779f..3d3f32053ce7d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1756,6 +1756,7 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state, dst_state->used_as_loop_entry = src->used_as_loop_entry; dst_state->may_goto_depth = src->may_goto_depth; dst_state->loop_entry = src->loop_entry; + dst_state->scc_epoch = src->scc_epoch; for (i = 0; i <= src->curframe; i++) { dst = dst_state->frame[i]; if (!dst) { @@ -1806,6 +1807,57 @@ static u32 frame_insn_idx(struct bpf_verifier_state *st, u32 frame) : st->frame[frame + 1]->callsite; } +static struct bpf_scc_info *insn_scc(struct bpf_verifier_env *env, int insn_idx) +{ + u32 scc; + + scc = env->insn_aux_data[insn_idx].scc; + return scc ? &env->scc_info[scc] : NULL; +} + +/* + * Returns true iff: + * - verifier is currently exploring states with origins in some CFG SCCs; + * - st->insn_idx belongs to one of these SCCs; + * - st->scc_epoch is the current SCC epoch, indicating that some parent + * of st started current SCC exploration epoch. + * + * When above conditions are true, mark_all_regs_read_and_precise() + * has not yet been called for st, meaning that read and precision + * marks can't be relied upon. + * + * See comments for mark_all_regs_read_and_precise(). + */ +static bool incomplete_read_marks(struct bpf_verifier_env *env, + struct bpf_verifier_state *st, + bool same_epoch) +{ + struct bpf_scc_info *scc_info; + u32 insn_idx, i; + + for (i = 0; i <= st->curframe; i++) { + insn_idx = frame_insn_idx(st, i); + scc_info = insn_scc(env, insn_idx); + if (scc_info && + scc_info->state_loops_possible && + (same_epoch ? scc_info->scc_epoch == st->scc_epoch + : scc_info->scc_epoch > st->scc_epoch)) + return true; + } + + return false; +} + +static void mark_state_loops_possible(struct bpf_verifier_env *env, + struct bpf_verifier_state *st) +{ + struct bpf_scc_info *scc_info; + + scc_info = insn_scc(env, st->insn_idx); + if (scc_info) + scc_info->state_loops_possible = 1; +} + /* Open coded iterators allow back-edges in the state graph in order to * check unbounded loops that iterators. * @@ -1964,18 +2016,12 @@ static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifi { struct bpf_verifier_state_list *sl = NULL, *parent_sl; struct bpf_verifier_state *parent; + struct bpf_scc_info *scc_info; + u32 insn_idx, i; while (st) { u32 br = --st->branches; - /* br == 0 signals that DFS exploration for 'st' is finished, - * thus it is necessary to update parent's loop entry if it - * turned out that st is a part of some loop. - * This is a part of 'case A' in get_loop_entry() comment. - */ - if (br == 0 && st->parent && st->loop_entry) - update_loop_entry(env, st->parent, st->loop_entry); - /* WARN_ON(br > 1) technically makes sense here, * but see comment in push_stack(), hence: */ @@ -1984,6 +2030,14 @@ static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifi br); if (br) break; + for (i = 0; i <= st->curframe; i++) { + insn_idx = frame_insn_idx(st, i); + scc_info = insn_scc(env, insn_idx); + if (scc_info && scc_info->entry_state == st) { + scc_info->entry_state = NULL; + scc_info->scc_epoch++; + } + } parent = st->parent; parent_sl = state_parent_as_list(st); if (sl) @@ -2242,6 +2296,18 @@ static void __mark_reg_unbounded(struct bpf_reg_state *reg) reg->u32_max_value = U32_MAX; } +static bool is_reg_unbounded(struct bpf_reg_state *reg) +{ + return reg->smin_value == S64_MIN && + reg->smax_value == S64_MAX && + reg->umin_value == 0 && + reg->umax_value == U64_MAX && + reg->s32_min_value == S32_MIN && + reg->s32_max_value == S32_MAX && + reg->u32_min_value == 0 && + reg->u32_max_value == U32_MAX; +} + static void __mark_reg64_unbounded(struct bpf_reg_state *reg) { reg->smin_value = S64_MIN; @@ -18239,6 +18305,116 @@ static void clean_verifier_state(struct bpf_verifier_env *env, clean_func_state(env, st->frame[i]); } +/* + * Open coded iterators introduce loops in the verifier state graph. + * State graph loops can result in incomplete read and precision marks + * on individual states. E.g. consider the following states graph: + * + * .-> A --. Assume the states are visited in the order A, B, C. + * | | | Assume that state B reaches a state equivalent to state A. + * | v v At this point, state C has not been processed yet, + * '-- B C so state A does not have any read or precision marks from C yet. + * As a result, these marks won't be propagated to B. + * + * If the marks on B are incomplete, it would be unsafe to use it in + * states_equal(NOT_EXACT) checks, states_equal(RANGE_WITHIN) should + * be used instead. + * + * To avoid this safety issue, and since states with incomplete read + * marks can only occur within control flow graph loops, the verifier + * assumes that any state with bpf_verifier_state->insn_idx residing + * in a strongly connected component (SCC) has read and precision + * marks for all registers. This assumption is enforced by the + * function mark_all_regs_read_and_precise(), which assigns + * corresponding marks. + * + * An intuitive point to call mark_all_regs_read_and_precise() would + * be when a new state is created in is_state_visited(). + * However, doing so would interfere with widen_imprecise_scalars(), + * which widens scalars in the current state after checking registers in a + * parent state. Registers are not widened if they are marked as precise + * in the parent state. + * + * To avoid interfering with widening logic, + * a call to mark_all_regs_read_and_precise() for state is postponed + * until no widening is possible in any descendant of state S. + * + * Another intuitive spot to call mark_all_regs_read_and_precise() + * would be in update_branch_counts() when S's branches counter + * reaches 0. However, this falls short in the following case: + * + * sum = 0 + * bpf_repeat(10) { // a + * if (unlikely(bpf_get_prandom_u32())) // b + * sum += 1; + * if (bpf_get_prandom_u32()) // c + * asm volatile (""); + * asm volatile ("goto +0;"); // d + * } + * + * Here a checkpoint is created at (d) with {sum=0} and the branch counter + * for (d) reaches 0, so 'sum' would be marked precise. + * When second branch of (c) reaches (d), checkpoint would be hit, + * and the precision mark for 'sum' propagated to (a). + * When the second branch of (b) reaches (a), the state would be {sum=1}, + * no widening would occur, causing verification to continue forever. + * + * To avoid such premature precision markings, the verifier postpones + * the call to mark_all_regs_read_and_precise() for state S even further. + * Suppose state P is a [grand]parent of state S and is the first state + * in the current state chain with state->insn_idx within current SCC. + * mark_all_regs_read_and_precise() for state S is only called once P + * is fully explored. + * + * The struct 'bpf_scc_info' is used to track this condition: + * - bpf_scc_info->branches counts how many states currently + * in env->cur_state or env->head originate from this SCC; + * - bpf_scc_info->scc_epoch counts how many times 'branches' + * has reached zero; + * - bpf_verifier_state->scc_epoch records the epoch of the SCC + * corresponding to bpf_verifier_state->insn_idx at the moment + * of state creation. + * + * Functions parent_scc_enter() and parent_scc_exit() maintain the + * bpf_scc_info->{branches,scc_epoch} counters. + * + * bpf_scc_info->branches reaching zero indicates that state P is + * fully explored. Its descendants residing in the same SCC have + * state->scc_epoch == scc_info->scc_epoch. parent_scc_exit() + * increments scc_info->scc_epoch, allowing clean_live_states() to + * detect these states and apply mark_all_regs_read_and_precise(). + */ +static void mark_all_regs_read_and_precise(struct bpf_verifier_env *env, + struct bpf_verifier_state *st) +{ + struct bpf_func_state *func; + struct bpf_reg_state *reg; + u16 live_regs; + u32 insn_idx; + int i, j; + + for (i = 0; i <= st->curframe; i++) { + insn_idx = frame_insn_idx(st, i); + live_regs = env->insn_aux_data[insn_idx].live_regs_before; + func = st->frame[i]; + for (j = 0; j < BPF_REG_FP; j++) { + reg = &func->regs[j]; + if (!(BIT(j) & live_regs) || reg->type == NOT_INIT) + continue; + reg->live |= REG_LIVE_READ64; + if (reg->type == SCALAR_VALUE && !is_reg_unbounded(reg)) + reg->precise = true; + } + for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) { + reg = &func->stack[j].spilled_ptr; + reg->live |= REG_LIVE_READ64; + if (is_spilled_reg(&func->stack[j]) && + reg->type == SCALAR_VALUE && !is_reg_unbounded(reg)) + reg->precise = true; + } + } +} + /* the parentage chains form a tree. * the verifier states are added to state lists at given insn and * pushed into state stack for future exploration. @@ -18274,7 +18450,6 @@ static void clean_verifier_state(struct bpf_verifier_env *env, static void clean_live_states(struct bpf_verifier_env *env, int insn, struct bpf_verifier_state *cur) { - struct bpf_verifier_state *loop_entry; struct bpf_verifier_state_list *sl; struct list_head *pos, *head; @@ -18283,15 +18458,16 @@ static void clean_live_states(struct bpf_verifier_env *env, int insn, sl = container_of(pos, struct bpf_verifier_state_list, node); if (sl->state.branches) continue; - loop_entry = get_loop_entry(env, &sl->state); - if (!IS_ERR_OR_NULL(loop_entry) && loop_entry->branches) - continue; if (sl->state.insn_idx != insn || !same_callsites(&sl->state, cur)) continue; if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) /* all regs in this state in all frames were already marked */ continue; + if (incomplete_read_marks(env, &sl->state, true)) + continue; + if (incomplete_read_marks(env, &sl->state, false)) + mark_all_regs_read_and_precise(env, &sl->state); clean_verifier_state(env, &sl->state); } } @@ -18995,10 +19171,11 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) { struct bpf_verifier_state_list *new_sl; struct bpf_verifier_state_list *sl; - struct bpf_verifier_state *cur = env->cur_state, *new, *loop_entry; + struct bpf_verifier_state *cur = env->cur_state, *new; int i, j, n, err, states_cnt = 0; bool force_new_state, add_new_state, force_exact; struct list_head *pos, *tmp, *head; + struct bpf_scc_info *scc_info; force_new_state = env->test_state_freq || is_force_checkpoint(env, insn_idx) || /* Avoid accumulating infinitely long jmp history */ @@ -19098,7 +19275,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) spi = __get_spi(iter_reg->off + iter_reg->var_off.value); iter_state = &func(env, iter_reg)->stack[spi].spilled_ptr; if (iter_state->iter.state == BPF_ITER_STATE_ACTIVE) { - update_loop_entry(env, cur, &sl->state); + mark_state_loops_possible(env, &sl->state); goto hit; } } @@ -19107,7 +19284,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) if (is_may_goto_insn_at(env, insn_idx)) { if (sl->state.may_goto_depth != cur->may_goto_depth && states_equal(env, &sl->state, cur, RANGE_WITHIN)) { - update_loop_entry(env, cur, &sl->state); + mark_state_loops_possible(env, &sl->state); goto hit; } } @@ -19149,38 +19326,9 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) add_new_state = false; goto miss; } - /* If sl->state is a part of a loop and this loop's entry is a part of - * current verification path then states have to be compared exactly. - * 'force_exact' is needed to catch the following case: - * - * initial Here state 'succ' was processed first, - * | it was eventually tracked to produce a - * V state identical to 'hdr'. - * .---------> hdr All branches from 'succ' had been explored - * | | and thus 'succ' has its .branches == 0. - * | V - * | .------... Suppose states 'cur' and 'succ' correspond - * | | | to the same instruction + callsites. - * | V V In such case it is necessary to check - * | ... ... if 'succ' and 'cur' are states_equal(). - * | | | If 'succ' and 'cur' are a part of the - * | V V same loop exact flag has to be set. - * | succ <- cur To check if that is the case, verify - * | | if loop entry of 'succ' is in current - * | V DFS path. - * | ... - * | | - * '----' - * - * Additional details are in the comment before get_loop_entry(). - */ - loop_entry = get_loop_entry(env, &sl->state); - if (IS_ERR(loop_entry)) - return PTR_ERR(loop_entry); - force_exact = loop_entry && loop_entry->branches > 0; + /* See comments for mark_all_regs_read_and_precise() */ + force_exact = incomplete_read_marks(env, &sl->state, true); if (states_equal(env, &sl->state, cur, force_exact ? RANGE_WITHIN : NOT_EXACT)) { - if (force_exact) - update_loop_entry(env, cur, loop_entry); hit: sl->hit_cnt++; /* reached equivalent register/stack state, @@ -19278,6 +19426,12 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) return err; } new->insn_idx = insn_idx; + scc_info = insn_scc(env, insn_idx); + if (scc_info) { + new->scc_epoch = scc_info->scc_epoch; + if (!scc_info->entry_state) + scc_info->entry_state = new; + } WARN_ONCE(new->branches != 1, "BUG is_state_visited:branches_to_explore=%d insn %d\n", new->branches, insn_idx); @@ -19664,10 +19818,6 @@ static int do_check(struct bpf_verifier_env *env) return err; break; } else { - if (WARN_ON_ONCE(env->cur_state->loop_entry)) { - verbose(env, "verifier bug: env->cur_state->loop_entry != NULL\n"); - return -EFAULT; - } do_print_state = true; continue; } @@ -24076,6 +24226,12 @@ static int compute_scc(struct bpf_verifier_env *env) dfs_sz--; } } + env->scc_info = kvcalloc(next_scc_id, sizeof(*env->scc_info), GFP_KERNEL); + if (!env->scc_info) { + err = -ENOMEM; + goto exit; + } + env->num_sccs = next_scc_id; exit: kvfree(stack); kvfree(pre); @@ -24352,6 +24508,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 kvfree(env->insn_hist); err_free_env: kvfree(env->cfg.insn_postorder); + kvfree(env->scc_info); kvfree(env); return ret; } From 4d2705e75374941b09bcd7580eeb2d1a3fb2d2d8 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Tue, 29 Apr 2025 17:04:42 -0700 Subject: [PATCH 5/6] bpf: remove {update,get}_loop_entry functions Previous patch switched read and precision tracking logic for iterator-based loops from loops tracking on state graph to loops tracking on control flow graph. This patch removes {update,get}_loop_entry functions used for state graph loops tracking. Signed-off-by: Eduard Zingerman --- include/linux/bpf_verifier.h | 15 --- kernel/bpf/verifier.c | 178 +---------------------------------- 2 files changed, 2 insertions(+), 191 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 07a65db598172..2fad2b1195037 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -445,16 +445,6 @@ struct bpf_verifier_state { /* first and last insn idx of this verifier state */ u32 first_insn_idx; u32 last_insn_idx; - /* If this state is a part of states loop this field points to some - * parent of this state such that: - * - it is also a member of the same states loop; - * - DFS states traversal starting from initial state visits loop_entry - * state before this state. - * Used to compute topmost loop entry for state loops. - * State loops might appear because of open coded iterators logic. - * See get_loop_entry() for more information. - */ - struct bpf_verifier_state *loop_entry; /* Sub-range of env->insn_hist[] corresponding to this state's * instruction history. * Backtracking is using it to go from last to first. @@ -466,11 +456,6 @@ struct bpf_verifier_state { u32 dfs_depth; u32 callback_unroll_depth; u32 may_goto_depth; - /* If this state was ever pointed-to by other state's loop_entry field - * this flag would be set to true. Used to avoid freeing such states - * while they are still in use. - */ - u32 used_as_loop_entry; /* * If this state is a checkpoint at insn_idx that belongs to an SCC, * record the SCC epoch at the time of checkpoint creation. diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 3d3f32053ce7d..850cf53b72775 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1672,7 +1672,7 @@ static void free_verifier_state(struct bpf_verifier_state *state, kfree(state); } -/* struct bpf_verifier_state->{parent,loop_entry} refer to states +/* struct bpf_verifier_state->parent refers to states * that are in either of env->{expored_states,free_list}. * In both cases the state is contained in struct bpf_verifier_state_list. */ @@ -1683,36 +1683,18 @@ static struct bpf_verifier_state_list *state_parent_as_list(struct bpf_verifier_ return NULL; } -static struct bpf_verifier_state_list *state_loop_entry_as_list(struct bpf_verifier_state *st) -{ - if (st->loop_entry) - return container_of(st->loop_entry, struct bpf_verifier_state_list, state); - return NULL; -} - /* A state can be freed if it is no longer referenced: * - is in the env->free_list; * - has no children states; - * - is not used as loop_entry. - * - * Freeing a state can make it's loop_entry free-able. */ static void maybe_free_verifier_state(struct bpf_verifier_env *env, struct bpf_verifier_state_list *sl) { - struct bpf_verifier_state_list *loop_entry_sl; - - while (sl && sl->in_free_list && - sl->state.branches == 0 && - sl->state.used_as_loop_entry == 0) { - loop_entry_sl = state_loop_entry_as_list(&sl->state); - if (loop_entry_sl) - loop_entry_sl->state.used_as_loop_entry--; + if (sl->in_free_list && sl->state.branches == 0) { list_del(&sl->node); free_verifier_state(&sl->state, false); kfree(sl); env->free_list_size--; - sl = loop_entry_sl; } } @@ -1753,9 +1735,7 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state, dst_state->insn_hist_end = src->insn_hist_end; dst_state->dfs_depth = src->dfs_depth; dst_state->callback_unroll_depth = src->callback_unroll_depth; - dst_state->used_as_loop_entry = src->used_as_loop_entry; dst_state->may_goto_depth = src->may_goto_depth; - dst_state->loop_entry = src->loop_entry; dst_state->scc_epoch = src->scc_epoch; for (i = 0; i <= src->curframe; i++) { dst = dst_state->frame[i]; @@ -1858,160 +1838,6 @@ static void mark_state_loops_possible(struct bpf_verifier_env *env, scc_info->state_loops_possible = 1; } -/* Open coded iterators allow back-edges in the state graph in order to - * check unbounded loops that iterators. - * - * In is_state_visited() it is necessary to know if explored states are - * part of some loops in order to decide whether non-exact states - * comparison could be used: - * - non-exact states comparison establishes sub-state relation and uses - * read and precision marks to do so, these marks are propagated from - * children states and thus are not guaranteed to be final in a loop; - * - exact states comparison just checks if current and explored states - * are identical (and thus form a back-edge). - * - * Paper "A New Algorithm for Identifying Loops in Decompilation" - * by Tao Wei, Jian Mao, Wei Zou and Yu Chen [1] presents a convenient - * algorithm for loop structure detection and gives an overview of - * relevant terminology. It also has helpful illustrations. - * - * [1] https://api.semanticscholar.org/CorpusID:15784067 - * - * We use a similar algorithm but because loop nested structure is - * irrelevant for verifier ours is significantly simpler and resembles - * strongly connected components algorithm from Sedgewick's textbook. - * - * Define topmost loop entry as a first node of the loop traversed in a - * depth first search starting from initial state. The goal of the loop - * tracking algorithm is to associate topmost loop entries with states - * derived from these entries. - * - * For each step in the DFS states traversal algorithm needs to identify - * the following situations: - * - * initial initial initial - * | | | - * V V V - * ... ... .---------> hdr - * | | | | - * V V | V - * cur .-> succ | .------... - * | | | | | | - * V | V | V V - * succ '-- cur | ... ... - * | | | - * | V V - * | succ <- cur - * | | - * | V - * | ... - * | | - * '----' - * - * (A) successor state of cur (B) successor state of cur or it's entry - * not yet traversed are in current DFS path, thus cur and succ - * are members of the same outermost loop - * - * initial initial - * | | - * V V - * ... ... - * | | - * V V - * .------... .------... - * | | | | - * V V V V - * .-> hdr ... ... ... - * | | | | | - * | V V V V - * | succ <- cur succ <- cur - * | | | - * | V V - * | ... ... - * | | | - * '----' exit - * - * (C) successor state of cur is a part of some loop but this loop - * does not include cur or successor state is not in a loop at all. - * - * Algorithm could be described as the following python code: - * - * traversed = set() # Set of traversed nodes - * entries = {} # Mapping from node to loop entry - * depths = {} # Depth level assigned to graph node - * path = set() # Current DFS path - * - * # Find outermost loop entry known for n - * def get_loop_entry(n): - * h = entries.get(n, None) - * while h in entries: - * h = entries[h] - * return h - * - * # Update n's loop entry if h comes before n in current DFS path. - * def update_loop_entry(n, h): - * if h in path and depths[entries.get(n, n)] < depths[n]: - * entries[n] = h1 - * - * def dfs(n, depth): - * traversed.add(n) - * path.add(n) - * depths[n] = depth - * for succ in G.successors(n): - * if succ not in traversed: - * # Case A: explore succ and update cur's loop entry - * # only if succ's entry is in current DFS path. - * dfs(succ, depth + 1) - * h = entries.get(succ, None) - * update_loop_entry(n, h) - * else: - * # Case B or C depending on `h1 in path` check in update_loop_entry(). - * update_loop_entry(n, succ) - * path.remove(n) - * - * To adapt this algorithm for use with verifier: - * - use st->branch == 0 as a signal that DFS of succ had been finished - * and cur's loop entry has to be updated (case A), handle this in - * update_branch_counts(); - * - use st->branch > 0 as a signal that st is in the current DFS path; - * - handle cases B and C in is_state_visited(). - */ -static struct bpf_verifier_state *get_loop_entry(struct bpf_verifier_env *env, - struct bpf_verifier_state *st) -{ - struct bpf_verifier_state *topmost = st->loop_entry; - u32 steps = 0; - - while (topmost && topmost->loop_entry) { - if (steps++ > st->dfs_depth) { - WARN_ONCE(true, "verifier bug: infinite loop in get_loop_entry\n"); - verbose(env, "verifier bug: infinite loop in get_loop_entry()\n"); - return ERR_PTR(-EFAULT); - } - topmost = topmost->loop_entry; - } - return topmost; -} - -static void update_loop_entry(struct bpf_verifier_env *env, - struct bpf_verifier_state *cur, struct bpf_verifier_state *hdr) -{ - /* The hdr->branches check decides between cases B and C in - * comment for get_loop_entry(). If hdr->branches == 0 then - * head's topmost loop entry is not in current DFS path, - * hence 'cur' and 'hdr' are not in the same loop and there is - * no need to update cur->loop_entry. - */ - if (hdr->branches && hdr->dfs_depth < (cur->loop_entry ?: cur)->dfs_depth) { - if (cur->loop_entry) { - cur->loop_entry->used_as_loop_entry--; - maybe_free_verifier_state(env, state_loop_entry_as_list(cur)); - } - cur->loop_entry = hdr; - hdr->used_as_loop_entry++; - } -} - static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st) { struct bpf_verifier_state_list *sl = NULL, *parent_sl; From fc2f684ab8e4374a60264d6ed6e200d16334be9b Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Mon, 10 Mar 2025 18:14:54 -0700 Subject: [PATCH 6/6] selftests/bpf: tests with a loop state missing read/precision mark The test case absent_mark_in_the_middle_state is equivalent of the following C program: 1: r8 = bpf_get_prandom_u32(); 2: r6 = -32; 3: bpf_iter_num_new(&fp[-8], 0, 10); 4: if (unlikely(bpf_get_prandom_u32())) 5: r6 = -31; 6: for (;;) { 7: if (!bpf_iter_num_next(&fp[-8])) 8: break; 9: if (unlikely(bpf_get_prandom_u32())) 10: *(u64 *)(fp + r6) = 7; 11: } 12: bpf_iter_num_destroy(&fp[-8]); 13: return 0; W/o a fix that instructs verifier to ignore branches count for loop entries verification proceeds as follows: - 1-4, state is {r6=-32,fp-8=active}; - 6, checkpoint A is created with {r6=-32,fp-8=active}; - 7, checkpoint B is created with {r6=-32,fp-8=active}, push state {r6=-32,fp-8=active} from 7 to 9; - 8,12,13, {r6=-32,fp-8=drained}, exit; - pop state with {r6=-32,fp-8=active} from 7 to 9; - 9, push state {r6=-32,fp-8=active} from 9 to 10; - 6, checkpoint C is created with {r6=-32,fp-8=active}; - 7, checkpoint A is hit, no precision propagated for r6 to C; - pop state {r6=-32,fp-8=active} from 9 to 10; - 10, state is {r6=-31,fp-8=active}, r6 is marked as read and precise, these marks are propagated to checkpoints A and B (but not C, as it is not the parent of current state; - 6, {r6=-31,fp-8=active} checkpoint C is hit, because r6 is not marked precise for this checkpoint; - the program is accepted, despite a possibility of unaligned u64 stack access at offset -31. The test case absent_mark_in_the_middle_state2 is similar except the following change: r8 = bpf_get_prandom_u32(); r6 = -32; bpf_iter_num_new(&fp[-8], 0, 10); if (unlikely(bpf_get_prandom_u32())) { r6 = -31; + jump_into_loop: + goto +0; + goto loop; + } + if (unlikely(bpf_get_prandom_u32())) + goto jump_into_loop; + loop: for (;;) { if (!bpf_iter_num_next(&fp[-8])) break; if (unlikely(bpf_get_prandom_u32())) *(u64 *)(fp + r6) = 7; } bpf_iter_num_destroy(&fp[-8]) return 0 The goal is to check that read/precision marks are propagated to checkpoint created at 'goto +0' that resides outside of the loop. Signed-off-by: Eduard Zingerman --- tools/testing/selftests/bpf/progs/iters.c | 151 ++++++++++++++++++++++ 1 file changed, 151 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c index 76adf4a8f2dad..12030aa340b2d 100644 --- a/tools/testing/selftests/bpf/progs/iters.c +++ b/tools/testing/selftests/bpf/progs/iters.c @@ -1649,4 +1649,155 @@ int clean_live_states(const void *ctx) return 0; } +SEC("?raw_tp") +__flag(BPF_F_TEST_STATE_FREQ) +__failure __msg("misaligned stack access off 0+-31+0 size 8") +__naked int absent_mark_in_the_middle_state(void) +{ + /* This is equivalent to C program below. + * + * r8 = bpf_get_prandom_u32(); + * r6 = -32; + * bpf_iter_num_new(&fp[-8], 0, 10); + * if (unlikely(bpf_get_prandom_u32())) + * r6 = -31; + * while (bpf_iter_num_next(&fp[-8])) { + * if (unlikely(bpf_get_prandom_u32())) + * *(fp + r6) = 7; + * } + * bpf_iter_num_destroy(&fp[-8]) + * return 0 + */ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "r8 = r0;" + "r7 = 0;" + "r6 = -32;" + "r0 = 0;" + "*(u64 *)(r10 - 16) = r0;" + "r1 = r10;" + "r1 += -8;" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "call %[bpf_get_prandom_u32];" + "if r0 == r8 goto change_r6_%=;" + "loop_%=:" + "call noop;" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto loop_end_%=;" + "call %[bpf_get_prandom_u32];" + "if r0 == r8 goto use_r6_%=;" + "goto loop_%=;" + "loop_end_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + "r0 = 0;" + "exit;" + "use_r6_%=:" + "r0 = r10;" + "r0 += r6;" + "r1 = 7;" + "*(u64 *)(r0 + 0) = r1;" + "goto loop_%=;" + "change_r6_%=:" + "r6 = -31;" + "goto loop_%=;" + : + : __imm(bpf_iter_num_new), + __imm(bpf_iter_num_next), + __imm(bpf_iter_num_destroy), + __imm(bpf_get_prandom_u32) + : __clobber_all + ); +} + +__used __naked +static int noop(void) +{ + asm volatile ( + "r0 = 0;" + "exit;" + ); +} + +SEC("?raw_tp") +__flag(BPF_F_TEST_STATE_FREQ) +__failure __msg("misaligned stack access off 0+-31+0 size 8") +__naked int absent_mark_in_the_middle_state2(void) +{ + /* This is equivalent to C program below. + * + * r8 = bpf_get_prandom_u32(); + * r6 = -32; + * bpf_iter_num_new(&fp[-8], 0, 10); + * if (unlikely(bpf_get_prandom_u32())) { + * r6 = -31; + * jump_into_loop: + * goto +0; + * goto loop; + * } + * if (unlikely(bpf_get_prandom_u32())) + * goto jump_into_loop; + * loop: + * while (bpf_iter_num_next(&fp[-8])) { + * if (unlikely(bpf_get_prandom_u32())) + * *(fp + r6) = 7; + * } + * bpf_iter_num_destroy(&fp[-8]) + * return 0 + */ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "r8 = r0;" + "r7 = 0;" + "r6 = -32;" + "r0 = 0;" + "*(u64 *)(r10 - 16) = r0;" + "r1 = r10;" + "r1 += -8;" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "call %[bpf_get_prandom_u32];" + "if r0 == r8 goto change_r6_%=;" + "call %[bpf_get_prandom_u32];" + "if r0 == r8 goto jump_into_loop_%=;" + "loop_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto loop_end_%=;" + "call %[bpf_get_prandom_u32];" + "if r0 == r8 goto use_r6_%=;" + "goto loop_%=;" + "loop_end_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + "r0 = 0;" + "exit;" + "use_r6_%=:" + "r0 = r10;" + "r0 += r6;" + "r1 = 7;" + "*(u64 *)(r0 + 0) = r1;" + "goto loop_%=;" + "change_r6_%=:" + "r6 = -31;" + "jump_into_loop_%=: " + "goto +0;" + "goto loop_%=;" + : + : __imm(bpf_iter_num_new), + __imm(bpf_iter_num_next), + __imm(bpf_iter_num_destroy), + __imm(bpf_get_prandom_u32) + : __clobber_all + ); +} + char _license[] SEC("license") = "GPL";