hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/kernel/bpf/verifier.c
....@@ -562,6 +562,20 @@
562562 btf_type_by_id(btf_vmlinux, id)->name_off);
563563 }
564564
565
+/* The reg state of a pointer or a bounded scalar was saved when
566
+ * it was spilled to the stack.
567
+ */
568
+static bool is_spilled_reg(const struct bpf_stack_state *stack)
569
+{
570
+ return stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL;
571
+}
572
+
573
+static void scrub_spilled_slot(u8 *stype)
574
+{
575
+ if (*stype != STACK_INVALID)
576
+ *stype = STACK_MISC;
577
+}
578
+
565579 static void print_verifier_state(struct bpf_verifier_env *env,
566580 const struct bpf_func_state *state)
567581 {
....@@ -666,7 +680,7 @@
666680 continue;
667681 verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
668682 print_liveness(env, state->stack[i].spilled_ptr.live);
669
- if (state->stack[i].slot_type[0] == STACK_SPILL) {
683
+ if (is_spilled_reg(&state->stack[i])) {
670684 reg = &state->stack[i].spilled_ptr;
671685 t = reg->type;
672686 verbose(env, "=%s", reg_type_str[t]);
....@@ -1345,7 +1359,7 @@
13451359 reg->type = SCALAR_VALUE;
13461360 reg->var_off = tnum_unknown;
13471361 reg->frameno = 0;
1348
- reg->precise = env->subprog_cnt > 1 || !env->bpf_capable;
1362
+ reg->precise = !env->bpf_capable;
13491363 __mark_reg_unbounded(reg);
13501364 }
13511365
....@@ -1868,8 +1882,6 @@
18681882 */
18691883 if (insn->src_reg != BPF_REG_FP)
18701884 return 0;
1871
- if (BPF_SIZE(insn->code) != BPF_DW)
1872
- return 0;
18731885
18741886 /* dreg = *(u64 *)[fp - off] was a fill from the stack.
18751887 * that [fp - off] slot contains scalar that needs to be
....@@ -1891,8 +1903,6 @@
18911903 return -ENOTSUPP;
18921904 /* scalars can only be spilled into stack */
18931905 if (insn->dst_reg != BPF_REG_FP)
1894
- return 0;
1895
- if (BPF_SIZE(insn->code) != BPF_DW)
18961906 return 0;
18971907 spi = (-insn->off - 1) / BPF_REG_SIZE;
18981908 if (spi >= 64) {
....@@ -1921,6 +1931,21 @@
19211931 }
19221932 } else if (opcode == BPF_EXIT) {
19231933 return -ENOTSUPP;
1934
+ } else if (BPF_SRC(insn->code) == BPF_X) {
1935
+ if (!(*reg_mask & (dreg | sreg)))
1936
+ return 0;
1937
+ /* dreg <cond> sreg
1938
+ * Both dreg and sreg need precision before
1939
+ * this insn. If only sreg was marked precise
1940
+ * before it would be equally necessary to
1941
+ * propagate it to dreg.
1942
+ */
1943
+ *reg_mask |= (sreg | dreg);
1944
+ /* else dreg <cond> K
1945
+ * Only dreg still needs precision before
1946
+ * this insn, so for the K-based conditional
1947
+ * there is nothing new to be marked.
1948
+ */
19241949 }
19251950 } else if (class == BPF_LD) {
19261951 if (!(*reg_mask & dreg))
....@@ -1998,8 +2023,11 @@
19982023
19992024 /* big hammer: mark all scalars precise in this path.
20002025 * pop_stack may still get !precise scalars.
2026
+ * We also skip current state and go straight to first parent state,
2027
+ * because precision markings in current non-checkpointed state are
2028
+ * not needed. See why in the comment in __mark_chain_precision below.
20012029 */
2002
- for (; st; st = st->parent)
2030
+ for (st = st->parent; st; st = st->parent) {
20032031 for (i = 0; i <= st->curframe; i++) {
20042032 func = st->frame[i];
20052033 for (j = 0; j < BPF_REG_FP; j++) {
....@@ -2009,7 +2037,7 @@
20092037 reg->precise = true;
20102038 }
20112039 for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
2012
- if (func->stack[j].slot_type[0] != STACK_SPILL)
2040
+ if (!is_spilled_reg(&func->stack[j]))
20132041 continue;
20142042 reg = &func->stack[j].spilled_ptr;
20152043 if (reg->type != SCALAR_VALUE)
....@@ -2017,9 +2045,122 @@
20172045 reg->precise = true;
20182046 }
20192047 }
2048
+ }
20202049 }
20212050
2022
-static int __mark_chain_precision(struct bpf_verifier_env *env, int regno,
2051
+static void mark_all_scalars_imprecise(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
2052
+{
2053
+ struct bpf_func_state *func;
2054
+ struct bpf_reg_state *reg;
2055
+ int i, j;
2056
+
2057
+ for (i = 0; i <= st->curframe; i++) {
2058
+ func = st->frame[i];
2059
+ for (j = 0; j < BPF_REG_FP; j++) {
2060
+ reg = &func->regs[j];
2061
+ if (reg->type != SCALAR_VALUE)
2062
+ continue;
2063
+ reg->precise = false;
2064
+ }
2065
+ for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
2066
+ if (!is_spilled_reg(&func->stack[j]))
2067
+ continue;
2068
+ reg = &func->stack[j].spilled_ptr;
2069
+ if (reg->type != SCALAR_VALUE)
2070
+ continue;
2071
+ reg->precise = false;
2072
+ }
2073
+ }
2074
+}
2075
+
2076
+/*
2077
+ * __mark_chain_precision() backtracks BPF program instruction sequence and
2078
+ * chain of verifier states making sure that register *regno* (if regno >= 0)
2079
+ * and/or stack slot *spi* (if spi >= 0) are marked as precisely tracked
2080
+ * SCALARS, as well as any other registers and slots that contribute to
2081
+ * a tracked state of given registers/stack slots, depending on specific BPF
2082
+ * assembly instructions (see backtrack_insns() for exact instruction handling
2083
+ * logic). This backtracking relies on recorded jmp_history and is able to
2084
+ * traverse entire chain of parent states. This process ends only when all the
2085
+ * necessary registers/slots and their transitive dependencies are marked as
2086
+ * precise.
2087
+ *
2088
+ * One important and subtle aspect is that precise marks *do not matter* in
2089
+ * the currently verified state (current state). It is important to understand
2090
+ * why this is the case.
2091
+ *
2092
+ * First, note that current state is the state that is not yet "checkpointed",
2093
+ * i.e., it is not yet put into env->explored_states, and it has no children
2094
+ * states as well. It's ephemeral, and can end up either a) being discarded if
2095
+ * compatible explored state is found at some point or BPF_EXIT instruction is
2096
+ * reached or b) checkpointed and put into env->explored_states, branching out
2097
+ * into one or more children states.
2098
+ *
2099
+ * In the former case, precise markings in current state are completely
2100
+ * ignored by state comparison code (see regsafe() for details). Only
2101
+ * checkpointed ("old") state precise markings are important, and if old
2102
+ * state's register/slot is precise, regsafe() assumes current state's
2103
+ * register/slot as precise and checks value ranges exactly and precisely. If
2104
+ * states turn out to be compatible, current state's necessary precise
2105
+ * markings and any required parent states' precise markings are enforced
2106
+ * after the fact with propagate_precision() logic, after the fact. But it's
2107
+ * important to realize that in this case, even after marking current state
2108
+ * registers/slots as precise, we immediately discard current state. So what
2109
+ * actually matters is any of the precise markings propagated into current
2110
+ * state's parent states, which are always checkpointed (due to b) case above).
2111
+ * As such, for scenario a) it doesn't matter if current state has precise
2112
+ * markings set or not.
2113
+ *
2114
+ * Now, for the scenario b), checkpointing and forking into child(ren)
2115
+ * state(s). Note that before current state gets to checkpointing step, any
2116
+ * processed instruction always assumes precise SCALAR register/slot
2117
+ * knowledge: if precise value or range is useful to prune jump branch, BPF
2118
+ * verifier takes this opportunity enthusiastically. Similarly, when
2119
+ * register's value is used to calculate offset or memory address, exact
2120
+ * knowledge of SCALAR range is assumed, checked, and enforced. So, similar to
2121
+ * what we mentioned above about state comparison ignoring precise markings
2122
+ * during state comparison, BPF verifier ignores and also assumes precise
2123
+ * markings *at will* during instruction verification process. But as verifier
2124
+ * assumes precision, it also propagates any precision dependencies across
2125
+ * parent states, which are not yet finalized, so can be further restricted
2126
+ * based on new knowledge gained from restrictions enforced by their children
2127
+ * states. This is so that once those parent states are finalized, i.e., when
2128
+ * they have no more active children state, state comparison logic in
2129
+ * is_state_visited() would enforce strict and precise SCALAR ranges, if
2130
+ * required for correctness.
2131
+ *
2132
+ * To build a bit more intuition, note also that once a state is checkpointed,
2133
+ * the path we took to get to that state is not important. This is crucial
2134
+ * property for state pruning. When state is checkpointed and finalized at
2135
+ * some instruction index, it can be correctly and safely used to "short
2136
+ * circuit" any *compatible* state that reaches exactly the same instruction
2137
+ * index. I.e., if we jumped to that instruction from a completely different
2138
+ * code path than original finalized state was derived from, it doesn't
2139
+ * matter, current state can be discarded because from that instruction
2140
+ * forward having a compatible state will ensure we will safely reach the
2141
+ * exit. States describe preconditions for further exploration, but completely
2142
+ * forget the history of how we got here.
2143
+ *
2144
+ * This also means that even if we needed precise SCALAR range to get to
2145
+ * finalized state, but from that point forward *that same* SCALAR register is
2146
+ * never used in a precise context (i.e., it's precise value is not needed for
2147
+ * correctness), it's correct and safe to mark such register as "imprecise"
2148
+ * (i.e., precise marking set to false). This is what we rely on when we do
2149
+ * not set precise marking in current state. If no child state requires
2150
+ * precision for any given SCALAR register, it's safe to dictate that it can
2151
+ * be imprecise. If any child state does require this register to be precise,
2152
+ * we'll mark it precise later retroactively during precise markings
2153
+ * propagation from child state to parent states.
2154
+ *
2155
+ * Skipping precise marking setting in current state is a mild version of
2156
+ * relying on the above observation. But we can utilize this property even
2157
+ * more aggressively by proactively forgetting any precise marking in the
2158
+ * current state (which we inherited from the parent state), right before we
2159
+ * checkpoint it and branch off into new child state. This is done by
2160
+ * mark_all_scalars_imprecise() to hopefully get more permissive and generic
2161
+ * finalized states which help in short circuiting more future states.
2162
+ */
2163
+static int __mark_chain_precision(struct bpf_verifier_env *env, int frame, int regno,
20232164 int spi)
20242165 {
20252166 struct bpf_verifier_state *st = env->cur_state;
....@@ -2036,22 +2177,22 @@
20362177 if (!env->bpf_capable)
20372178 return 0;
20382179
2039
- func = st->frame[st->curframe];
2180
+ /* Do sanity checks against current state of register and/or stack
2181
+ * slot, but don't set precise flag in current state, as precision
2182
+ * tracking in the current state is unnecessary.
2183
+ */
2184
+ func = st->frame[frame];
20402185 if (regno >= 0) {
20412186 reg = &func->regs[regno];
20422187 if (reg->type != SCALAR_VALUE) {
20432188 WARN_ONCE(1, "backtracing misuse");
20442189 return -EFAULT;
20452190 }
2046
- if (!reg->precise)
2047
- new_marks = true;
2048
- else
2049
- reg_mask = 0;
2050
- reg->precise = true;
2191
+ new_marks = true;
20512192 }
20522193
20532194 while (spi >= 0) {
2054
- if (func->stack[spi].slot_type[0] != STACK_SPILL) {
2195
+ if (!is_spilled_reg(&func->stack[spi])) {
20552196 stack_mask = 0;
20562197 break;
20572198 }
....@@ -2060,11 +2201,7 @@
20602201 stack_mask = 0;
20612202 break;
20622203 }
2063
- if (!reg->precise)
2064
- new_marks = true;
2065
- else
2066
- stack_mask = 0;
2067
- reg->precise = true;
2204
+ new_marks = true;
20682205 break;
20692206 }
20702207
....@@ -2072,12 +2209,42 @@
20722209 return 0;
20732210 if (!reg_mask && !stack_mask)
20742211 return 0;
2212
+
20752213 for (;;) {
20762214 DECLARE_BITMAP(mask, 64);
20772215 u32 history = st->jmp_history_cnt;
20782216
20792217 if (env->log.level & BPF_LOG_LEVEL)
20802218 verbose(env, "last_idx %d first_idx %d\n", last_idx, first_idx);
2219
+
2220
+ if (last_idx < 0) {
2221
+ /* we are at the entry into subprog, which
2222
+ * is expected for global funcs, but only if
2223
+ * requested precise registers are R1-R5
2224
+ * (which are global func's input arguments)
2225
+ */
2226
+ if (st->curframe == 0 &&
2227
+ st->frame[0]->subprogno > 0 &&
2228
+ st->frame[0]->callsite == BPF_MAIN_FUNC &&
2229
+ stack_mask == 0 && (reg_mask & ~0x3e) == 0) {
2230
+ bitmap_from_u64(mask, reg_mask);
2231
+ for_each_set_bit(i, mask, 32) {
2232
+ reg = &st->frame[0]->regs[i];
2233
+ if (reg->type != SCALAR_VALUE) {
2234
+ reg_mask &= ~(1u << i);
2235
+ continue;
2236
+ }
2237
+ reg->precise = true;
2238
+ }
2239
+ return 0;
2240
+ }
2241
+
2242
+ verbose(env, "BUG backtracing func entry subprog %d reg_mask %x stack_mask %llx\n",
2243
+ st->frame[0]->subprogno, reg_mask, stack_mask);
2244
+ WARN_ONCE(1, "verifier backtracking bug");
2245
+ return -EFAULT;
2246
+ }
2247
+
20812248 for (i = last_idx;;) {
20822249 if (skip_first) {
20832250 err = 0;
....@@ -2117,7 +2284,7 @@
21172284 break;
21182285
21192286 new_marks = false;
2120
- func = st->frame[st->curframe];
2287
+ func = st->frame[frame];
21212288 bitmap_from_u64(mask, reg_mask);
21222289 for_each_set_bit(i, mask, 32) {
21232290 reg = &func->regs[i];
....@@ -2150,7 +2317,7 @@
21502317 return 0;
21512318 }
21522319
2153
- if (func->stack[i].slot_type[0] != STACK_SPILL) {
2320
+ if (!is_spilled_reg(&func->stack[i])) {
21542321 stack_mask &= ~(1ull << i);
21552322 continue;
21562323 }
....@@ -2183,12 +2350,17 @@
21832350
21842351 static int mark_chain_precision(struct bpf_verifier_env *env, int regno)
21852352 {
2186
- return __mark_chain_precision(env, regno, -1);
2353
+ return __mark_chain_precision(env, env->cur_state->curframe, regno, -1);
21872354 }
21882355
2189
-static int mark_chain_precision_stack(struct bpf_verifier_env *env, int spi)
2356
+static int mark_chain_precision_frame(struct bpf_verifier_env *env, int frame, int regno)
21902357 {
2191
- return __mark_chain_precision(env, -1, spi);
2358
+ return __mark_chain_precision(env, frame, regno, -1);
2359
+}
2360
+
2361
+static int mark_chain_precision_stack_frame(struct bpf_verifier_env *env, int frame, int spi)
2362
+{
2363
+ return __mark_chain_precision(env, frame, -1, spi);
21922364 }
21932365
21942366 static bool is_spillable_regtype(enum bpf_reg_type type)
....@@ -2259,16 +2431,38 @@
22592431 return reg->type != SCALAR_VALUE;
22602432 }
22612433
2434
+/* Copy src state preserving dst->parent and dst->live fields */
2435
+static void copy_register_state(struct bpf_reg_state *dst, const struct bpf_reg_state *src)
2436
+{
2437
+ struct bpf_reg_state *parent = dst->parent;
2438
+ enum bpf_reg_liveness live = dst->live;
2439
+
2440
+ *dst = *src;
2441
+ dst->parent = parent;
2442
+ dst->live = live;
2443
+}
2444
+
22622445 static void save_register_state(struct bpf_func_state *state,
2263
- int spi, struct bpf_reg_state *reg)
2446
+ int spi, struct bpf_reg_state *reg,
2447
+ int size)
22642448 {
22652449 int i;
22662450
2267
- state->stack[spi].spilled_ptr = *reg;
2268
- state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
2451
+ copy_register_state(&state->stack[spi].spilled_ptr, reg);
2452
+ if (size == BPF_REG_SIZE)
2453
+ state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
22692454
2270
- for (i = 0; i < BPF_REG_SIZE; i++)
2271
- state->stack[spi].slot_type[i] = STACK_SPILL;
2455
+ for (i = BPF_REG_SIZE; i > BPF_REG_SIZE - size; i--)
2456
+ state->stack[spi].slot_type[i - 1] = STACK_SPILL;
2457
+
2458
+ /* size < 8 bytes spill */
2459
+ for (; i; i--)
2460
+ scrub_spilled_slot(&state->stack[spi].slot_type[i - 1]);
2461
+}
2462
+
2463
+static bool is_bpf_st_mem(struct bpf_insn *insn)
2464
+{
2465
+ return BPF_CLASS(insn->code) == BPF_ST && BPF_MODE(insn->code) == BPF_MEM;
22722466 }
22732467
22742468 /* check_stack_{read,write}_fixed_off functions track spill/fill of registers,
....@@ -2282,8 +2476,9 @@
22822476 {
22832477 struct bpf_func_state *cur; /* state of the current function */
22842478 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
2285
- u32 dst_reg = env->prog->insnsi[insn_idx].dst_reg;
2479
+ struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
22862480 struct bpf_reg_state *reg = NULL;
2481
+ u32 dst_reg = insn->dst_reg;
22872482
22882483 err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE),
22892484 state->acquired_refs, true);
....@@ -2306,7 +2501,9 @@
23062501 bool sanitize = reg && is_spillable_regtype(reg->type);
23072502
23082503 for (i = 0; i < size; i++) {
2309
- if (state->stack[spi].slot_type[i] == STACK_INVALID) {
2504
+ u8 type = state->stack[spi].slot_type[i];
2505
+
2506
+ if (type != STACK_MISC && type != STACK_ZERO) {
23102507 sanitize = true;
23112508 break;
23122509 }
....@@ -2316,7 +2513,7 @@
23162513 env->insn_aux_data[insn_idx].sanitize_stack_spill = true;
23172514 }
23182515
2319
- if (reg && size == BPF_REG_SIZE && register_is_bounded(reg) &&
2516
+ if (reg && !(off % BPF_REG_SIZE) && register_is_bounded(reg) &&
23202517 !register_is_null(reg) && env->bpf_capable) {
23212518 if (dst_reg != BPF_REG_FP) {
23222519 /* The backtracking logic can only recognize explicit
....@@ -2329,7 +2526,17 @@
23292526 if (err)
23302527 return err;
23312528 }
2332
- save_register_state(state, spi, reg);
2529
+ save_register_state(state, spi, reg, size);
2530
+ /* Break the relation on a narrowing spill. */
2531
+ if (fls64(reg->umax_value) > BITS_PER_BYTE * size)
2532
+ state->stack[spi].spilled_ptr.id = 0;
2533
+ } else if (!reg && !(off % BPF_REG_SIZE) && is_bpf_st_mem(insn) &&
2534
+ insn->imm != 0 && env->bpf_capable) {
2535
+ struct bpf_reg_state fake_reg = {};
2536
+
2537
+ __mark_reg_known(&fake_reg, (u32)insn->imm);
2538
+ fake_reg.type = SCALAR_VALUE;
2539
+ save_register_state(state, spi, &fake_reg, size);
23332540 } else if (reg && is_spillable_regtype(reg->type)) {
23342541 /* register containing pointer is being spilled into stack */
23352542 if (size != BPF_REG_SIZE) {
....@@ -2341,16 +2548,16 @@
23412548 verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
23422549 return -EINVAL;
23432550 }
2344
- save_register_state(state, spi, reg);
2551
+ save_register_state(state, spi, reg, size);
23452552 } else {
23462553 u8 type = STACK_MISC;
23472554
23482555 /* regular write of data into stack destroys any spilled ptr */
23492556 state->stack[spi].spilled_ptr.type = NOT_INIT;
23502557 /* Mark slots as STACK_MISC if they belonged to spilled ptr. */
2351
- if (state->stack[spi].slot_type[0] == STACK_SPILL)
2558
+ if (is_spilled_reg(&state->stack[spi]))
23522559 for (i = 0; i < BPF_REG_SIZE; i++)
2353
- state->stack[spi].slot_type[i] = STACK_MISC;
2560
+ scrub_spilled_slot(&state->stack[spi].slot_type[i]);
23542561
23552562 /* only mark the slot as written if all 8 bytes were written
23562563 * otherwise read propagation may incorrectly stop too soon
....@@ -2364,7 +2571,8 @@
23642571 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
23652572
23662573 /* when we zero initialize stack slots mark them as such */
2367
- if (reg && register_is_null(reg)) {
2574
+ if ((reg && register_is_null(reg)) ||
2575
+ (!reg && is_bpf_st_mem(insn) && insn->imm == 0)) {
23682576 /* backtracking doesn't work for STACK_ZERO yet. */
23692577 err = mark_chain_precision(env, value_regno);
23702578 if (err)
....@@ -2439,14 +2647,17 @@
24392647 spi = slot / BPF_REG_SIZE;
24402648 stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
24412649
2442
- if (!env->allow_ptr_leaks
2443
- && *stype != NOT_INIT
2444
- && *stype != SCALAR_VALUE) {
2445
- /* Reject the write if there's are spilled pointers in
2446
- * range. If we didn't reject here, the ptr status
2447
- * would be erased below (even though not all slots are
2448
- * actually overwritten), possibly opening the door to
2449
- * leaks.
2650
+ if (!env->allow_ptr_leaks && *stype != STACK_MISC && *stype != STACK_ZERO) {
2651
+ /* Reject the write if range we may write to has not
2652
+ * been initialized beforehand. If we didn't reject
2653
+ * here, the ptr status would be erased below (even
2654
+ * though not all slots are actually overwritten),
2655
+ * possibly opening the door to leaks.
2656
+ *
2657
+ * We do however catch STACK_INVALID case below, and
2658
+ * only allow reading possibly uninitialized memory
2659
+ * later for CAP_PERFMON, as the write may not happen to
2660
+ * that slot.
24502661 */
24512662 verbose(env, "spilled ptr in range of var-offset stack write; insn %d, ptr off: %d",
24522663 insn_idx, i);
....@@ -2554,35 +2765,56 @@
25542765 struct bpf_func_state *state = vstate->frame[vstate->curframe];
25552766 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
25562767 struct bpf_reg_state *reg;
2557
- u8 *stype;
2768
+ u8 *stype, type;
25582769
25592770 stype = reg_state->stack[spi].slot_type;
25602771 reg = &reg_state->stack[spi].spilled_ptr;
25612772
2562
- if (stype[0] == STACK_SPILL) {
2563
- if (size != BPF_REG_SIZE) {
2773
+ if (is_spilled_reg(&reg_state->stack[spi])) {
2774
+ u8 spill_size = 1;
2775
+
2776
+ for (i = BPF_REG_SIZE - 1; i > 0 && stype[i - 1] == STACK_SPILL; i--)
2777
+ spill_size++;
2778
+
2779
+ if (size != BPF_REG_SIZE || spill_size != BPF_REG_SIZE) {
25642780 if (reg->type != SCALAR_VALUE) {
25652781 verbose_linfo(env, env->insn_idx, "; ");
25662782 verbose(env, "invalid size of register fill\n");
25672783 return -EACCES;
25682784 }
2569
- if (dst_regno >= 0) {
2570
- mark_reg_unknown(env, state->regs, dst_regno);
2571
- state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
2572
- }
2785
+
25732786 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
2574
- return 0;
2575
- }
2576
- for (i = 1; i < BPF_REG_SIZE; i++) {
2577
- if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) {
2578
- verbose(env, "corrupted spill memory\n");
2579
- return -EACCES;
2787
+ if (dst_regno < 0)
2788
+ return 0;
2789
+
2790
+ if (!(off % BPF_REG_SIZE) && size == spill_size) {
2791
+ /* The earlier check_reg_arg() has decided the
2792
+ * subreg_def for this insn. Save it first.
2793
+ */
2794
+ s32 subreg_def = state->regs[dst_regno].subreg_def;
2795
+
2796
+ copy_register_state(&state->regs[dst_regno], reg);
2797
+ state->regs[dst_regno].subreg_def = subreg_def;
2798
+ } else {
2799
+ for (i = 0; i < size; i++) {
2800
+ type = stype[(slot - i) % BPF_REG_SIZE];
2801
+ if (type == STACK_SPILL)
2802
+ continue;
2803
+ if (type == STACK_MISC)
2804
+ continue;
2805
+ verbose(env, "invalid read from stack off %d+%d size %d\n",
2806
+ off, i, size);
2807
+ return -EACCES;
2808
+ }
2809
+ mark_reg_unknown(env, state->regs, dst_regno);
25802810 }
2811
+ state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
2812
+ return 0;
25812813 }
25822814
25832815 if (dst_regno >= 0) {
25842816 /* restore register state from stack */
2585
- state->regs[dst_regno] = *reg;
2817
+ copy_register_state(&state->regs[dst_regno], reg);
25862818 /* mark reg as written since spilled pointer state likely
25872819 * has its liveness marks cleared by is_state_visited()
25882820 * which resets stack/reg liveness for state transitions
....@@ -2601,8 +2833,6 @@
26012833 }
26022834 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
26032835 } else {
2604
- u8 type;
2605
-
26062836 for (i = 0; i < size; i++) {
26072837 type = stype[(slot - i) % BPF_REG_SIZE];
26082838 if (type == STACK_MISC)
....@@ -2704,17 +2934,13 @@
27042934 }
27052935 /* Variable offset is prohibited for unprivileged mode for simplicity
27062936 * since it requires corresponding support in Spectre masking for stack
2707
- * ALU. See also retrieve_ptr_limit().
2937
+ * ALU. See also retrieve_ptr_limit(). The check in
2938
+ * check_stack_access_for_ptr_arithmetic() called by
2939
+ * adjust_ptr_min_max_vals() prevents users from creating stack pointers
2940
+ * with variable offsets, therefore no check is required here. Further,
2941
+ * just checking it here would be insufficient as speculative stack
2942
+ * writes could still lead to unsafe speculative behaviour.
27082943 */
2709
- if (!env->bypass_spec_v1 && var_off) {
2710
- char tn_buf[48];
2711
-
2712
- tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
2713
- verbose(env, "R%d variable offset stack access prohibited for !root, var_off=%s\n",
2714
- ptr_regno, tn_buf);
2715
- return -EACCES;
2716
- }
2717
-
27182944 if (!var_off) {
27192945 off += reg->var_off.value;
27202946 err = check_stack_read_fixed_off(env, state, off, size,
....@@ -4078,17 +4304,17 @@
40784304 goto mark;
40794305 }
40804306
4081
- if (state->stack[spi].slot_type[0] == STACK_SPILL &&
4307
+ if (is_spilled_reg(&state->stack[spi]) &&
40824308 state->stack[spi].spilled_ptr.type == PTR_TO_BTF_ID)
40834309 goto mark;
40844310
4085
- if (state->stack[spi].slot_type[0] == STACK_SPILL &&
4311
+ if (is_spilled_reg(&state->stack[spi]) &&
40864312 (state->stack[spi].spilled_ptr.type == SCALAR_VALUE ||
40874313 env->allow_ptr_leaks)) {
40884314 if (clobber) {
40894315 __mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
40904316 for (j = 0; j < BPF_REG_SIZE; j++)
4091
- state->stack[spi].slot_type[j] = STACK_MISC;
4317
+ scrub_spilled_slot(&state->stack[spi].slot_type[j]);
40924318 }
40934319 goto mark;
40944320 }
....@@ -5845,7 +6071,7 @@
58456071 */
58466072 if (!ptr_is_dst_reg) {
58476073 tmp = *dst_reg;
5848
- *dst_reg = *ptr_reg;
6074
+ copy_register_state(dst_reg, ptr_reg);
58496075 }
58506076 ret = sanitize_speculative_path(env, NULL, env->insn_idx + 1,
58516077 env->insn_idx);
....@@ -6989,6 +7215,11 @@
69897215 return err;
69907216 return adjust_ptr_min_max_vals(env, insn,
69917217 dst_reg, src_reg);
7218
+ } else if (dst_reg->precise) {
7219
+ /* if dst_reg is precise, src_reg should be precise as well */
7220
+ err = mark_chain_precision(env, insn->src_reg);
7221
+ if (err)
7222
+ return err;
69927223 }
69937224 } else {
69947225 /* Pretend the src is a reg with a known value, since we only
....@@ -7094,7 +7325,7 @@
70947325 * to propagate min/max range.
70957326 */
70967327 src_reg->id = ++env->id_gen;
7097
- *dst_reg = *src_reg;
7328
+ copy_register_state(dst_reg, src_reg);
70987329 dst_reg->live |= REG_LIVE_WRITTEN;
70997330 dst_reg->subreg_def = DEF_NOT_SUBREG;
71007331 } else {
....@@ -7105,7 +7336,7 @@
71057336 insn->src_reg);
71067337 return -EACCES;
71077338 } else if (src_reg->type == SCALAR_VALUE) {
7108
- *dst_reg = *src_reg;
7339
+ copy_register_state(dst_reg, src_reg);
71097340 /* Make sure ID is cleared otherwise
71107341 * dst_reg min/max could be incorrectly
71117342 * propagated into src_reg by find_equal_scalars()
....@@ -7925,7 +8156,7 @@
79258156
79268157 bpf_for_each_reg_in_vstate(vstate, state, reg, ({
79278158 if (reg->type == SCALAR_VALUE && reg->id == known_reg->id)
7928
- *reg = *known_reg;
8159
+ copy_register_state(reg, known_reg);
79298160 }));
79308161 }
79318162
....@@ -9144,7 +9375,7 @@
91449375 if (env->explore_alu_limits)
91459376 return false;
91469377 if (rcur->type == SCALAR_VALUE) {
9147
- if (!rold->precise && !rcur->precise)
9378
+ if (!rold->precise)
91489379 return true;
91499380 /* new val must satisfy old val knowledge */
91509381 return range_within(rold, rcur) &&
....@@ -9274,9 +9505,9 @@
92749505 * return false to continue verification of this path
92759506 */
92769507 return false;
9277
- if (i % BPF_REG_SIZE)
9508
+ if (i % BPF_REG_SIZE != BPF_REG_SIZE - 1)
92789509 continue;
9279
- if (old->stack[spi].slot_type[0] != STACK_SPILL)
9510
+ if (!is_spilled_reg(&old->stack[spi]))
92809511 continue;
92819512 if (!regsafe(env, &old->stack[spi].spilled_ptr,
92829513 &cur->stack[spi].spilled_ptr, idmap))
....@@ -9467,34 +9698,38 @@
94679698 {
94689699 struct bpf_reg_state *state_reg;
94699700 struct bpf_func_state *state;
9470
- int i, err = 0;
9701
+ int i, err = 0, fr;
94719702
9472
- state = old->frame[old->curframe];
9473
- state_reg = state->regs;
9474
- for (i = 0; i < BPF_REG_FP; i++, state_reg++) {
9475
- if (state_reg->type != SCALAR_VALUE ||
9476
- !state_reg->precise)
9477
- continue;
9478
- if (env->log.level & BPF_LOG_LEVEL2)
9479
- verbose(env, "propagating r%d\n", i);
9480
- err = mark_chain_precision(env, i);
9481
- if (err < 0)
9482
- return err;
9483
- }
9703
+ for (fr = old->curframe; fr >= 0; fr--) {
9704
+ state = old->frame[fr];
9705
+ state_reg = state->regs;
9706
+ for (i = 0; i < BPF_REG_FP; i++, state_reg++) {
9707
+ if (state_reg->type != SCALAR_VALUE ||
9708
+ !state_reg->precise ||
9709
+ !(state_reg->live & REG_LIVE_READ))
9710
+ continue;
9711
+ if (env->log.level & BPF_LOG_LEVEL2)
9712
+ verbose(env, "frame %d: propagating r%d\n", fr, i);
9713
+ err = mark_chain_precision_frame(env, fr, i);
9714
+ if (err < 0)
9715
+ return err;
9716
+ }
94849717
9485
- for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
9486
- if (state->stack[i].slot_type[0] != STACK_SPILL)
9487
- continue;
9488
- state_reg = &state->stack[i].spilled_ptr;
9489
- if (state_reg->type != SCALAR_VALUE ||
9490
- !state_reg->precise)
9491
- continue;
9492
- if (env->log.level & BPF_LOG_LEVEL2)
9493
- verbose(env, "propagating fp%d\n",
9494
- (-i - 1) * BPF_REG_SIZE);
9495
- err = mark_chain_precision_stack(env, i);
9496
- if (err < 0)
9497
- return err;
9718
+ for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
9719
+ if (!is_spilled_reg(&state->stack[i]))
9720
+ continue;
9721
+ state_reg = &state->stack[i].spilled_ptr;
9722
+ if (state_reg->type != SCALAR_VALUE ||
9723
+ !state_reg->precise ||
9724
+ !(state_reg->live & REG_LIVE_READ))
9725
+ continue;
9726
+ if (env->log.level & BPF_LOG_LEVEL2)
9727
+ verbose(env, "frame %d: propagating fp%d\n",
9728
+ fr, (-i - 1) * BPF_REG_SIZE);
9729
+ err = mark_chain_precision_stack_frame(env, fr, i);
9730
+ if (err < 0)
9731
+ return err;
9732
+ }
94989733 }
94999734 return 0;
95009735 }
....@@ -9672,6 +9907,10 @@
96729907 env->peak_states++;
96739908 env->prev_jmps_processed = env->jmps_processed;
96749909 env->prev_insn_processed = env->insn_processed;
9910
+
9911
+ /* forget precise markings we inherited, see __mark_chain_precision */
9912
+ if (env->bpf_capable)
9913
+ mark_all_scalars_imprecise(env, cur);
96759914
96769915 /* add new state to the head of linked list */
96779916 new = &new_sl->state;
....@@ -11070,7 +11309,7 @@
1107011309 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
1107111310 insn->dst_reg,
1107211311 shift);
11073
- insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg,
11312
+ insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
1107411313 (1ULL << size * 8) - 1);
1107511314 }
1107611315 }
....@@ -11771,6 +12010,9 @@
1177112010 0 /* frameno */,
1177212011 subprog);
1177312012
12013
+ state->first_insn_idx = env->subprog_info[subprog].start;
12014
+ state->last_insn_idx = -1;
12015
+
1177412016 regs = state->frame[state->curframe]->regs;
1177512017 if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) {
1177612018 ret = btf_prepare_func_args(env, subprog, regs);