.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
1 | 2 | /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com |
---|
2 | 3 | * Copyright (c) 2016 Facebook |
---|
3 | | - * |
---|
4 | | - * This program is free software; you can redistribute it and/or |
---|
5 | | - * modify it under the terms of version 2 of the GNU General Public |
---|
6 | | - * License as published by the Free Software Foundation. |
---|
7 | | - * |
---|
8 | | - * This program is distributed in the hope that it will be useful, but |
---|
9 | | - * WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
10 | | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
---|
11 | | - * General Public License for more details. |
---|
| 4 | + * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io |
---|
12 | 5 | */ |
---|
| 6 | +#include <uapi/linux/btf.h> |
---|
13 | 7 | #include <linux/kernel.h> |
---|
14 | 8 | #include <linux/types.h> |
---|
15 | 9 | #include <linux/slab.h> |
---|
16 | 10 | #include <linux/bpf.h> |
---|
| 11 | +#include <linux/btf.h> |
---|
17 | 12 | #include <linux/bpf_verifier.h> |
---|
18 | 13 | #include <linux/filter.h> |
---|
19 | 14 | #include <net/netlink.h> |
---|
.. | .. |
---|
23 | 18 | #include <linux/bsearch.h> |
---|
24 | 19 | #include <linux/sort.h> |
---|
25 | 20 | #include <linux/perf_event.h> |
---|
| 21 | +#include <linux/ctype.h> |
---|
| 22 | +#include <linux/error-injection.h> |
---|
| 23 | +#include <linux/bpf_lsm.h> |
---|
| 24 | +#include <linux/btf_ids.h> |
---|
26 | 25 | |
---|
27 | 26 | #include "disasm.h" |
---|
28 | 27 | |
---|
29 | 28 | static const struct bpf_verifier_ops * const bpf_verifier_ops[] = { |
---|
30 | | -#define BPF_PROG_TYPE(_id, _name) \ |
---|
| 29 | +#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \ |
---|
31 | 30 | [_id] = & _name ## _verifier_ops, |
---|
32 | 31 | #define BPF_MAP_TYPE(_id, _ops) |
---|
| 32 | +#define BPF_LINK_TYPE(_id, _name) |
---|
33 | 33 | #include <linux/bpf_types.h> |
---|
34 | 34 | #undef BPF_PROG_TYPE |
---|
35 | 35 | #undef BPF_MAP_TYPE |
---|
| 36 | +#undef BPF_LINK_TYPE |
---|
36 | 37 | }; |
---|
37 | 38 | |
---|
38 | 39 | /* bpf_check() is a static code analyzer that walks eBPF program |
---|
.. | .. |
---|
80 | 81 | * (like pointer plus pointer becomes SCALAR_VALUE type) |
---|
81 | 82 | * |
---|
82 | 83 | * When verifier sees load or store instructions the type of base register |
---|
83 | | - * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK. These are three pointer |
---|
84 | | - * types recognized by check_mem_access() function. |
---|
| 84 | + * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are |
---|
| 85 | + * four pointer types recognized by check_mem_access() function. |
---|
85 | 86 | * |
---|
86 | 87 | * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value' |
---|
87 | 88 | * and the range of [ptr, ptr + map's value_size) is accessible. |
---|
.. | .. |
---|
140 | 141 | * |
---|
141 | 142 | * After the call R0 is set to return type of the function and registers R1-R5 |
---|
142 | 143 | * are set to NOT_INIT to indicate that they are no longer readable. |
---|
| 144 | + * |
---|
| 145 | + * The following reference types represent a potential reference to a kernel |
---|
| 146 | + * resource which, after first being allocated, must be checked and freed by |
---|
| 147 | + * the BPF program: |
---|
| 148 | + * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET |
---|
| 149 | + * |
---|
| 150 | + * When the verifier sees a helper call return a reference type, it allocates a |
---|
| 151 | + * pointer id for the reference and stores it in the current function state. |
---|
| 152 | + * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into |
---|
| 153 | + * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type |
---|
| 154 | + * passes through a NULL-check conditional. For the branch wherein the state is |
---|
| 155 | + * changed to CONST_IMM, the verifier releases the reference. |
---|
| 156 | + * |
---|
| 157 | + * For each helper function that allocates a reference, such as |
---|
| 158 | + * bpf_sk_lookup_tcp(), there is a corresponding release function, such as |
---|
| 159 | + * bpf_sk_release(). When a reference type passes into the release function, |
---|
| 160 | + * the verifier also releases the reference. If any unchecked or unreleased |
---|
| 161 | + * reference remains at the end of the program, the verifier rejects it. |
---|
143 | 162 | */ |
---|
144 | 163 | |
---|
145 | 164 | /* verifier_state + insn_idx are pushed to stack when branch is encountered */ |
---|
.. | .. |
---|
152 | 171 | int insn_idx; |
---|
153 | 172 | int prev_insn_idx; |
---|
154 | 173 | struct bpf_verifier_stack_elem *next; |
---|
| 174 | + /* length of verifier log at the time this state was pushed on stack */ |
---|
| 175 | + u32 log_pos; |
---|
155 | 176 | }; |
---|
156 | 177 | |
---|
157 | | -#define BPF_COMPLEXITY_LIMIT_INSNS 131072 |
---|
158 | | -#define BPF_COMPLEXITY_LIMIT_STACK 1024 |
---|
| 178 | +#define BPF_COMPLEXITY_LIMIT_JMP_SEQ 8192 |
---|
159 | 179 | #define BPF_COMPLEXITY_LIMIT_STATES 64 |
---|
| 180 | + |
---|
| 181 | +#define BPF_MAP_KEY_POISON (1ULL << 63) |
---|
| 182 | +#define BPF_MAP_KEY_SEEN (1ULL << 62) |
---|
160 | 183 | |
---|
161 | 184 | #define BPF_MAP_PTR_UNPRIV 1UL |
---|
162 | 185 | #define BPF_MAP_PTR_POISON ((void *)((0xeB9FUL << 1) + \ |
---|
.. | .. |
---|
165 | 188 | |
---|
166 | 189 | static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux) |
---|
167 | 190 | { |
---|
168 | | - return BPF_MAP_PTR(aux->map_state) == BPF_MAP_PTR_POISON; |
---|
| 191 | + return BPF_MAP_PTR(aux->map_ptr_state) == BPF_MAP_PTR_POISON; |
---|
169 | 192 | } |
---|
170 | 193 | |
---|
171 | 194 | static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux) |
---|
172 | 195 | { |
---|
173 | | - return aux->map_state & BPF_MAP_PTR_UNPRIV; |
---|
| 196 | + return aux->map_ptr_state & BPF_MAP_PTR_UNPRIV; |
---|
174 | 197 | } |
---|
175 | 198 | |
---|
176 | 199 | static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux, |
---|
.. | .. |
---|
178 | 201 | { |
---|
179 | 202 | BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV); |
---|
180 | 203 | unpriv |= bpf_map_ptr_unpriv(aux); |
---|
181 | | - aux->map_state = (unsigned long)map | |
---|
182 | | - (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL); |
---|
| 204 | + aux->map_ptr_state = (unsigned long)map | |
---|
| 205 | + (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL); |
---|
| 206 | +} |
---|
| 207 | + |
---|
| 208 | +static bool bpf_map_key_poisoned(const struct bpf_insn_aux_data *aux) |
---|
| 209 | +{ |
---|
| 210 | + return aux->map_key_state & BPF_MAP_KEY_POISON; |
---|
| 211 | +} |
---|
| 212 | + |
---|
| 213 | +static bool bpf_map_key_unseen(const struct bpf_insn_aux_data *aux) |
---|
| 214 | +{ |
---|
| 215 | + return !(aux->map_key_state & BPF_MAP_KEY_SEEN); |
---|
| 216 | +} |
---|
| 217 | + |
---|
| 218 | +static u64 bpf_map_key_immediate(const struct bpf_insn_aux_data *aux) |
---|
| 219 | +{ |
---|
| 220 | + return aux->map_key_state & ~(BPF_MAP_KEY_SEEN | BPF_MAP_KEY_POISON); |
---|
| 221 | +} |
---|
| 222 | + |
---|
| 223 | +static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state) |
---|
| 224 | +{ |
---|
| 225 | + bool poisoned = bpf_map_key_poisoned(aux); |
---|
| 226 | + |
---|
| 227 | + aux->map_key_state = state | BPF_MAP_KEY_SEEN | |
---|
| 228 | + (poisoned ? BPF_MAP_KEY_POISON : 0ULL); |
---|
183 | 229 | } |
---|
184 | 230 | |
---|
185 | 231 | struct bpf_call_arg_meta { |
---|
.. | .. |
---|
188 | 234 | bool pkt_access; |
---|
189 | 235 | int regno; |
---|
190 | 236 | int access_size; |
---|
| 237 | + int mem_size; |
---|
191 | 238 | u64 msize_max_value; |
---|
| 239 | + int ref_obj_id; |
---|
| 240 | + int func_id; |
---|
| 241 | + u32 btf_id; |
---|
| 242 | + u32 ret_btf_id; |
---|
192 | 243 | }; |
---|
193 | 244 | |
---|
| 245 | +struct btf *btf_vmlinux; |
---|
| 246 | + |
---|
194 | 247 | static DEFINE_MUTEX(bpf_verifier_lock); |
---|
| 248 | + |
---|
| 249 | +static const struct bpf_line_info * |
---|
| 250 | +find_linfo(const struct bpf_verifier_env *env, u32 insn_off) |
---|
| 251 | +{ |
---|
| 252 | + const struct bpf_line_info *linfo; |
---|
| 253 | + const struct bpf_prog *prog; |
---|
| 254 | + u32 i, nr_linfo; |
---|
| 255 | + |
---|
| 256 | + prog = env->prog; |
---|
| 257 | + nr_linfo = prog->aux->nr_linfo; |
---|
| 258 | + |
---|
| 259 | + if (!nr_linfo || insn_off >= prog->len) |
---|
| 260 | + return NULL; |
---|
| 261 | + |
---|
| 262 | + linfo = prog->aux->linfo; |
---|
| 263 | + for (i = 1; i < nr_linfo; i++) |
---|
| 264 | + if (insn_off < linfo[i].insn_off) |
---|
| 265 | + break; |
---|
| 266 | + |
---|
| 267 | + return &linfo[i - 1]; |
---|
| 268 | +} |
---|
195 | 269 | |
---|
196 | 270 | void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt, |
---|
197 | 271 | va_list args) |
---|
.. | .. |
---|
206 | 280 | n = min(log->len_total - log->len_used - 1, n); |
---|
207 | 281 | log->kbuf[n] = '\0'; |
---|
208 | 282 | |
---|
| 283 | + if (log->level == BPF_LOG_KERNEL) { |
---|
| 284 | + pr_err("BPF:%s\n", log->kbuf); |
---|
| 285 | + return; |
---|
| 286 | + } |
---|
209 | 287 | if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1)) |
---|
210 | 288 | log->len_used += n; |
---|
211 | 289 | else |
---|
| 290 | + log->ubuf = NULL; |
---|
| 291 | +} |
---|
| 292 | + |
---|
| 293 | +static void bpf_vlog_reset(struct bpf_verifier_log *log, u32 new_pos) |
---|
| 294 | +{ |
---|
| 295 | + char zero = 0; |
---|
| 296 | + |
---|
| 297 | + if (!bpf_verifier_log_needed(log)) |
---|
| 298 | + return; |
---|
| 299 | + |
---|
| 300 | + log->len_used = new_pos; |
---|
| 301 | + if (put_user(zero, log->ubuf + new_pos)) |
---|
212 | 302 | log->ubuf = NULL; |
---|
213 | 303 | } |
---|
214 | 304 | |
---|
.. | .. |
---|
243 | 333 | va_end(args); |
---|
244 | 334 | } |
---|
245 | 335 | |
---|
| 336 | +__printf(2, 3) void bpf_log(struct bpf_verifier_log *log, |
---|
| 337 | + const char *fmt, ...) |
---|
| 338 | +{ |
---|
| 339 | + va_list args; |
---|
| 340 | + |
---|
| 341 | + if (!bpf_verifier_log_needed(log)) |
---|
| 342 | + return; |
---|
| 343 | + |
---|
| 344 | + va_start(args, fmt); |
---|
| 345 | + bpf_verifier_vlog(log, fmt, args); |
---|
| 346 | + va_end(args); |
---|
| 347 | +} |
---|
| 348 | + |
---|
| 349 | +static const char *ltrim(const char *s) |
---|
| 350 | +{ |
---|
| 351 | + while (isspace(*s)) |
---|
| 352 | + s++; |
---|
| 353 | + |
---|
| 354 | + return s; |
---|
| 355 | +} |
---|
| 356 | + |
---|
| 357 | +__printf(3, 4) static void verbose_linfo(struct bpf_verifier_env *env, |
---|
| 358 | + u32 insn_off, |
---|
| 359 | + const char *prefix_fmt, ...) |
---|
| 360 | +{ |
---|
| 361 | + const struct bpf_line_info *linfo; |
---|
| 362 | + |
---|
| 363 | + if (!bpf_verifier_log_needed(&env->log)) |
---|
| 364 | + return; |
---|
| 365 | + |
---|
| 366 | + linfo = find_linfo(env, insn_off); |
---|
| 367 | + if (!linfo || linfo == env->prev_linfo) |
---|
| 368 | + return; |
---|
| 369 | + |
---|
| 370 | + if (prefix_fmt) { |
---|
| 371 | + va_list args; |
---|
| 372 | + |
---|
| 373 | + va_start(args, prefix_fmt); |
---|
| 374 | + bpf_verifier_vlog(&env->log, prefix_fmt, args); |
---|
| 375 | + va_end(args); |
---|
| 376 | + } |
---|
| 377 | + |
---|
| 378 | + verbose(env, "%s\n", |
---|
| 379 | + ltrim(btf_name_by_offset(env->prog->aux->btf, |
---|
| 380 | + linfo->line_off))); |
---|
| 381 | + |
---|
| 382 | + env->prev_linfo = linfo; |
---|
| 383 | +} |
---|
| 384 | + |
---|
246 | 385 | static bool type_is_pkt_pointer(enum bpf_reg_type type) |
---|
247 | 386 | { |
---|
248 | 387 | return type == PTR_TO_PACKET || |
---|
249 | 388 | type == PTR_TO_PACKET_META; |
---|
| 389 | +} |
---|
| 390 | + |
---|
| 391 | +static bool type_is_sk_pointer(enum bpf_reg_type type) |
---|
| 392 | +{ |
---|
| 393 | + return type == PTR_TO_SOCKET || |
---|
| 394 | + type == PTR_TO_SOCK_COMMON || |
---|
| 395 | + type == PTR_TO_TCP_SOCK || |
---|
| 396 | + type == PTR_TO_XDP_SOCK; |
---|
| 397 | +} |
---|
| 398 | + |
---|
| 399 | +static bool reg_type_not_null(enum bpf_reg_type type) |
---|
| 400 | +{ |
---|
| 401 | + return type == PTR_TO_SOCKET || |
---|
| 402 | + type == PTR_TO_TCP_SOCK || |
---|
| 403 | + type == PTR_TO_MAP_VALUE || |
---|
| 404 | + type == PTR_TO_SOCK_COMMON; |
---|
| 405 | +} |
---|
| 406 | + |
---|
| 407 | +static bool reg_type_may_be_null(enum bpf_reg_type type) |
---|
| 408 | +{ |
---|
| 409 | + return type == PTR_TO_MAP_VALUE_OR_NULL || |
---|
| 410 | + type == PTR_TO_SOCKET_OR_NULL || |
---|
| 411 | + type == PTR_TO_SOCK_COMMON_OR_NULL || |
---|
| 412 | + type == PTR_TO_TCP_SOCK_OR_NULL || |
---|
| 413 | + type == PTR_TO_BTF_ID_OR_NULL || |
---|
| 414 | + type == PTR_TO_MEM_OR_NULL || |
---|
| 415 | + type == PTR_TO_RDONLY_BUF_OR_NULL || |
---|
| 416 | + type == PTR_TO_RDWR_BUF_OR_NULL; |
---|
| 417 | +} |
---|
| 418 | + |
---|
| 419 | +static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg) |
---|
| 420 | +{ |
---|
| 421 | + return reg->type == PTR_TO_MAP_VALUE && |
---|
| 422 | + map_value_has_spin_lock(reg->map_ptr); |
---|
| 423 | +} |
---|
| 424 | + |
---|
| 425 | +static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type) |
---|
| 426 | +{ |
---|
| 427 | + return type == PTR_TO_SOCKET || |
---|
| 428 | + type == PTR_TO_SOCKET_OR_NULL || |
---|
| 429 | + type == PTR_TO_TCP_SOCK || |
---|
| 430 | + type == PTR_TO_TCP_SOCK_OR_NULL || |
---|
| 431 | + type == PTR_TO_MEM || |
---|
| 432 | + type == PTR_TO_MEM_OR_NULL; |
---|
| 433 | +} |
---|
| 434 | + |
---|
| 435 | +static bool arg_type_may_be_refcounted(enum bpf_arg_type type) |
---|
| 436 | +{ |
---|
| 437 | + return type == ARG_PTR_TO_SOCK_COMMON; |
---|
| 438 | +} |
---|
| 439 | + |
---|
| 440 | +static bool arg_type_may_be_null(enum bpf_arg_type type) |
---|
| 441 | +{ |
---|
| 442 | + return type == ARG_PTR_TO_MAP_VALUE_OR_NULL || |
---|
| 443 | + type == ARG_PTR_TO_MEM_OR_NULL || |
---|
| 444 | + type == ARG_PTR_TO_CTX_OR_NULL || |
---|
| 445 | + type == ARG_PTR_TO_SOCKET_OR_NULL || |
---|
| 446 | + type == ARG_PTR_TO_ALLOC_MEM_OR_NULL; |
---|
| 447 | +} |
---|
| 448 | + |
---|
| 449 | +/* Determine whether the function releases some resources allocated by another |
---|
| 450 | + * function call. The first reference type argument will be assumed to be |
---|
| 451 | + * released by release_reference(). |
---|
| 452 | + */ |
---|
| 453 | +static bool is_release_function(enum bpf_func_id func_id) |
---|
| 454 | +{ |
---|
| 455 | + return func_id == BPF_FUNC_sk_release || |
---|
| 456 | + func_id == BPF_FUNC_ringbuf_submit || |
---|
| 457 | + func_id == BPF_FUNC_ringbuf_discard; |
---|
| 458 | +} |
---|
| 459 | + |
---|
| 460 | +static bool may_be_acquire_function(enum bpf_func_id func_id) |
---|
| 461 | +{ |
---|
| 462 | + return func_id == BPF_FUNC_sk_lookup_tcp || |
---|
| 463 | + func_id == BPF_FUNC_sk_lookup_udp || |
---|
| 464 | + func_id == BPF_FUNC_skc_lookup_tcp || |
---|
| 465 | + func_id == BPF_FUNC_map_lookup_elem || |
---|
| 466 | + func_id == BPF_FUNC_ringbuf_reserve; |
---|
| 467 | +} |
---|
| 468 | + |
---|
| 469 | +static bool is_acquire_function(enum bpf_func_id func_id, |
---|
| 470 | + const struct bpf_map *map) |
---|
| 471 | +{ |
---|
| 472 | + enum bpf_map_type map_type = map ? map->map_type : BPF_MAP_TYPE_UNSPEC; |
---|
| 473 | + |
---|
| 474 | + if (func_id == BPF_FUNC_sk_lookup_tcp || |
---|
| 475 | + func_id == BPF_FUNC_sk_lookup_udp || |
---|
| 476 | + func_id == BPF_FUNC_skc_lookup_tcp || |
---|
| 477 | + func_id == BPF_FUNC_ringbuf_reserve) |
---|
| 478 | + return true; |
---|
| 479 | + |
---|
| 480 | + if (func_id == BPF_FUNC_map_lookup_elem && |
---|
| 481 | + (map_type == BPF_MAP_TYPE_SOCKMAP || |
---|
| 482 | + map_type == BPF_MAP_TYPE_SOCKHASH)) |
---|
| 483 | + return true; |
---|
| 484 | + |
---|
| 485 | + return false; |
---|
| 486 | +} |
---|
| 487 | + |
---|
| 488 | +static bool is_ptr_cast_function(enum bpf_func_id func_id) |
---|
| 489 | +{ |
---|
| 490 | + return func_id == BPF_FUNC_tcp_sock || |
---|
| 491 | + func_id == BPF_FUNC_sk_fullsock || |
---|
| 492 | + func_id == BPF_FUNC_skc_to_tcp_sock || |
---|
| 493 | + func_id == BPF_FUNC_skc_to_tcp6_sock || |
---|
| 494 | + func_id == BPF_FUNC_skc_to_udp6_sock || |
---|
| 495 | + func_id == BPF_FUNC_skc_to_tcp_timewait_sock || |
---|
| 496 | + func_id == BPF_FUNC_skc_to_tcp_request_sock; |
---|
250 | 497 | } |
---|
251 | 498 | |
---|
252 | 499 | /* string representation of 'enum bpf_reg_type' */ |
---|
.. | .. |
---|
261 | 508 | [PTR_TO_PACKET] = "pkt", |
---|
262 | 509 | [PTR_TO_PACKET_META] = "pkt_meta", |
---|
263 | 510 | [PTR_TO_PACKET_END] = "pkt_end", |
---|
| 511 | + [PTR_TO_FLOW_KEYS] = "flow_keys", |
---|
| 512 | + [PTR_TO_SOCKET] = "sock", |
---|
| 513 | + [PTR_TO_SOCKET_OR_NULL] = "sock_or_null", |
---|
| 514 | + [PTR_TO_SOCK_COMMON] = "sock_common", |
---|
| 515 | + [PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null", |
---|
| 516 | + [PTR_TO_TCP_SOCK] = "tcp_sock", |
---|
| 517 | + [PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null", |
---|
| 518 | + [PTR_TO_TP_BUFFER] = "tp_buffer", |
---|
| 519 | + [PTR_TO_XDP_SOCK] = "xdp_sock", |
---|
| 520 | + [PTR_TO_BTF_ID] = "ptr_", |
---|
| 521 | + [PTR_TO_BTF_ID_OR_NULL] = "ptr_or_null_", |
---|
| 522 | + [PTR_TO_PERCPU_BTF_ID] = "percpu_ptr_", |
---|
| 523 | + [PTR_TO_MEM] = "mem", |
---|
| 524 | + [PTR_TO_MEM_OR_NULL] = "mem_or_null", |
---|
| 525 | + [PTR_TO_RDONLY_BUF] = "rdonly_buf", |
---|
| 526 | + [PTR_TO_RDONLY_BUF_OR_NULL] = "rdonly_buf_or_null", |
---|
| 527 | + [PTR_TO_RDWR_BUF] = "rdwr_buf", |
---|
| 528 | + [PTR_TO_RDWR_BUF_OR_NULL] = "rdwr_buf_or_null", |
---|
| 529 | +}; |
---|
| 530 | + |
---|
| 531 | +static char slot_type_char[] = { |
---|
| 532 | + [STACK_INVALID] = '?', |
---|
| 533 | + [STACK_SPILL] = 'r', |
---|
| 534 | + [STACK_MISC] = 'm', |
---|
| 535 | + [STACK_ZERO] = '0', |
---|
264 | 536 | }; |
---|
265 | 537 | |
---|
266 | 538 | static void print_liveness(struct bpf_verifier_env *env, |
---|
267 | 539 | enum bpf_reg_liveness live) |
---|
268 | 540 | { |
---|
269 | | - if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN)) |
---|
| 541 | + if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN | REG_LIVE_DONE)) |
---|
270 | 542 | verbose(env, "_"); |
---|
271 | 543 | if (live & REG_LIVE_READ) |
---|
272 | 544 | verbose(env, "r"); |
---|
273 | 545 | if (live & REG_LIVE_WRITTEN) |
---|
274 | 546 | verbose(env, "w"); |
---|
| 547 | + if (live & REG_LIVE_DONE) |
---|
| 548 | + verbose(env, "D"); |
---|
275 | 549 | } |
---|
276 | 550 | |
---|
277 | 551 | static struct bpf_func_state *func(struct bpf_verifier_env *env, |
---|
.. | .. |
---|
280 | 554 | struct bpf_verifier_state *cur = env->cur_state; |
---|
281 | 555 | |
---|
282 | 556 | return cur->frame[reg->frameno]; |
---|
| 557 | +} |
---|
| 558 | + |
---|
| 559 | +const char *kernel_type_name(u32 id) |
---|
| 560 | +{ |
---|
| 561 | + return btf_name_by_offset(btf_vmlinux, |
---|
| 562 | + btf_type_by_id(btf_vmlinux, id)->name_off); |
---|
| 563 | +} |
---|
| 564 | + |
---|
| 565 | +/* The reg state of a pointer or a bounded scalar was saved when |
---|
| 566 | + * it was spilled to the stack. |
---|
| 567 | + */ |
---|
| 568 | +static bool is_spilled_reg(const struct bpf_stack_state *stack) |
---|
| 569 | +{ |
---|
| 570 | + return stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL; |
---|
| 571 | +} |
---|
| 572 | + |
---|
| 573 | +static void scrub_spilled_slot(u8 *stype) |
---|
| 574 | +{ |
---|
| 575 | + if (*stype != STACK_INVALID) |
---|
| 576 | + *stype = STACK_MISC; |
---|
283 | 577 | } |
---|
284 | 578 | |
---|
285 | 579 | static void print_verifier_state(struct bpf_verifier_env *env, |
---|
.. | .. |
---|
299 | 593 | verbose(env, " R%d", i); |
---|
300 | 594 | print_liveness(env, reg->live); |
---|
301 | 595 | verbose(env, "=%s", reg_type_str[t]); |
---|
| 596 | + if (t == SCALAR_VALUE && reg->precise) |
---|
| 597 | + verbose(env, "P"); |
---|
302 | 598 | if ((t == SCALAR_VALUE || t == PTR_TO_STACK) && |
---|
303 | 599 | tnum_is_const(reg->var_off)) { |
---|
304 | 600 | /* reg->off should be 0 for SCALAR_VALUE */ |
---|
305 | 601 | verbose(env, "%lld", reg->var_off.value + reg->off); |
---|
306 | | - if (t == PTR_TO_STACK) |
---|
307 | | - verbose(env, ",call_%d", func(env, reg)->callsite); |
---|
308 | 602 | } else { |
---|
| 603 | + if (t == PTR_TO_BTF_ID || |
---|
| 604 | + t == PTR_TO_BTF_ID_OR_NULL || |
---|
| 605 | + t == PTR_TO_PERCPU_BTF_ID) |
---|
| 606 | + verbose(env, "%s", kernel_type_name(reg->btf_id)); |
---|
309 | 607 | verbose(env, "(id=%d", reg->id); |
---|
| 608 | + if (reg_type_may_be_refcounted_or_null(t)) |
---|
| 609 | + verbose(env, ",ref_obj_id=%d", reg->ref_obj_id); |
---|
310 | 610 | if (t != SCALAR_VALUE) |
---|
311 | 611 | verbose(env, ",off=%d", reg->off); |
---|
312 | 612 | if (type_is_pkt_pointer(t)) |
---|
.. | .. |
---|
344 | 644 | tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); |
---|
345 | 645 | verbose(env, ",var_off=%s", tn_buf); |
---|
346 | 646 | } |
---|
| 647 | + if (reg->s32_min_value != reg->smin_value && |
---|
| 648 | + reg->s32_min_value != S32_MIN) |
---|
| 649 | + verbose(env, ",s32_min_value=%d", |
---|
| 650 | + (int)(reg->s32_min_value)); |
---|
| 651 | + if (reg->s32_max_value != reg->smax_value && |
---|
| 652 | + reg->s32_max_value != S32_MAX) |
---|
| 653 | + verbose(env, ",s32_max_value=%d", |
---|
| 654 | + (int)(reg->s32_max_value)); |
---|
| 655 | + if (reg->u32_min_value != reg->umin_value && |
---|
| 656 | + reg->u32_min_value != U32_MIN) |
---|
| 657 | + verbose(env, ",u32_min_value=%d", |
---|
| 658 | + (int)(reg->u32_min_value)); |
---|
| 659 | + if (reg->u32_max_value != reg->umax_value && |
---|
| 660 | + reg->u32_max_value != U32_MAX) |
---|
| 661 | + verbose(env, ",u32_max_value=%d", |
---|
| 662 | + (int)(reg->u32_max_value)); |
---|
347 | 663 | } |
---|
348 | 664 | verbose(env, ")"); |
---|
349 | 665 | } |
---|
350 | 666 | } |
---|
351 | 667 | for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { |
---|
352 | | - if (state->stack[i].slot_type[0] == STACK_SPILL) { |
---|
353 | | - verbose(env, " fp%d", |
---|
354 | | - (-i - 1) * BPF_REG_SIZE); |
---|
355 | | - print_liveness(env, state->stack[i].spilled_ptr.live); |
---|
356 | | - verbose(env, "=%s", |
---|
357 | | - reg_type_str[state->stack[i].spilled_ptr.type]); |
---|
| 668 | + char types_buf[BPF_REG_SIZE + 1]; |
---|
| 669 | + bool valid = false; |
---|
| 670 | + int j; |
---|
| 671 | + |
---|
| 672 | + for (j = 0; j < BPF_REG_SIZE; j++) { |
---|
| 673 | + if (state->stack[i].slot_type[j] != STACK_INVALID) |
---|
| 674 | + valid = true; |
---|
| 675 | + types_buf[j] = slot_type_char[ |
---|
| 676 | + state->stack[i].slot_type[j]]; |
---|
358 | 677 | } |
---|
359 | | - if (state->stack[i].slot_type[0] == STACK_ZERO) |
---|
360 | | - verbose(env, " fp%d=0", (-i - 1) * BPF_REG_SIZE); |
---|
| 678 | + types_buf[BPF_REG_SIZE] = 0; |
---|
| 679 | + if (!valid) |
---|
| 680 | + continue; |
---|
| 681 | + verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE); |
---|
| 682 | + print_liveness(env, state->stack[i].spilled_ptr.live); |
---|
| 683 | + if (is_spilled_reg(&state->stack[i])) { |
---|
| 684 | + reg = &state->stack[i].spilled_ptr; |
---|
| 685 | + t = reg->type; |
---|
| 686 | + verbose(env, "=%s", reg_type_str[t]); |
---|
| 687 | + if (t == SCALAR_VALUE && reg->precise) |
---|
| 688 | + verbose(env, "P"); |
---|
| 689 | + if (t == SCALAR_VALUE && tnum_is_const(reg->var_off)) |
---|
| 690 | + verbose(env, "%lld", reg->var_off.value + reg->off); |
---|
| 691 | + } else { |
---|
| 692 | + verbose(env, "=%s", types_buf); |
---|
| 693 | + } |
---|
| 694 | + } |
---|
| 695 | + if (state->acquired_refs && state->refs[0].id) { |
---|
| 696 | + verbose(env, " refs=%d", state->refs[0].id); |
---|
| 697 | + for (i = 1; i < state->acquired_refs; i++) |
---|
| 698 | + if (state->refs[i].id) |
---|
| 699 | + verbose(env, ",%d", state->refs[i].id); |
---|
361 | 700 | } |
---|
362 | 701 | verbose(env, "\n"); |
---|
363 | 702 | } |
---|
364 | 703 | |
---|
365 | | -static int copy_stack_state(struct bpf_func_state *dst, |
---|
366 | | - const struct bpf_func_state *src) |
---|
367 | | -{ |
---|
368 | | - if (!src->stack) |
---|
369 | | - return 0; |
---|
370 | | - if (WARN_ON_ONCE(dst->allocated_stack < src->allocated_stack)) { |
---|
371 | | - /* internal bug, make state invalid to reject the program */ |
---|
372 | | - memset(dst, 0, sizeof(*dst)); |
---|
373 | | - return -EFAULT; |
---|
374 | | - } |
---|
375 | | - memcpy(dst->stack, src->stack, |
---|
376 | | - sizeof(*src->stack) * (src->allocated_stack / BPF_REG_SIZE)); |
---|
377 | | - return 0; |
---|
| 704 | +#define COPY_STATE_FN(NAME, COUNT, FIELD, SIZE) \ |
---|
| 705 | +static int copy_##NAME##_state(struct bpf_func_state *dst, \ |
---|
| 706 | + const struct bpf_func_state *src) \ |
---|
| 707 | +{ \ |
---|
| 708 | + if (!src->FIELD) \ |
---|
| 709 | + return 0; \ |
---|
| 710 | + if (WARN_ON_ONCE(dst->COUNT < src->COUNT)) { \ |
---|
| 711 | + /* internal bug, make state invalid to reject the program */ \ |
---|
| 712 | + memset(dst, 0, sizeof(*dst)); \ |
---|
| 713 | + return -EFAULT; \ |
---|
| 714 | + } \ |
---|
| 715 | + memcpy(dst->FIELD, src->FIELD, \ |
---|
| 716 | + sizeof(*src->FIELD) * (src->COUNT / SIZE)); \ |
---|
| 717 | + return 0; \ |
---|
378 | 718 | } |
---|
| 719 | +/* copy_reference_state() */ |
---|
| 720 | +COPY_STATE_FN(reference, acquired_refs, refs, 1) |
---|
| 721 | +/* copy_stack_state() */ |
---|
| 722 | +COPY_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE) |
---|
| 723 | +#undef COPY_STATE_FN |
---|
| 724 | + |
---|
| 725 | +#define REALLOC_STATE_FN(NAME, COUNT, FIELD, SIZE) \ |
---|
| 726 | +static int realloc_##NAME##_state(struct bpf_func_state *state, int size, \ |
---|
| 727 | + bool copy_old) \ |
---|
| 728 | +{ \ |
---|
| 729 | + u32 old_size = state->COUNT; \ |
---|
| 730 | + struct bpf_##NAME##_state *new_##FIELD; \ |
---|
| 731 | + int slot = size / SIZE; \ |
---|
| 732 | + \ |
---|
| 733 | + if (size <= old_size || !size) { \ |
---|
| 734 | + if (copy_old) \ |
---|
| 735 | + return 0; \ |
---|
| 736 | + state->COUNT = slot * SIZE; \ |
---|
| 737 | + if (!size && old_size) { \ |
---|
| 738 | + kfree(state->FIELD); \ |
---|
| 739 | + state->FIELD = NULL; \ |
---|
| 740 | + } \ |
---|
| 741 | + return 0; \ |
---|
| 742 | + } \ |
---|
| 743 | + new_##FIELD = kmalloc_array(slot, sizeof(struct bpf_##NAME##_state), \ |
---|
| 744 | + GFP_KERNEL); \ |
---|
| 745 | + if (!new_##FIELD) \ |
---|
| 746 | + return -ENOMEM; \ |
---|
| 747 | + if (copy_old) { \ |
---|
| 748 | + if (state->FIELD) \ |
---|
| 749 | + memcpy(new_##FIELD, state->FIELD, \ |
---|
| 750 | + sizeof(*new_##FIELD) * (old_size / SIZE)); \ |
---|
| 751 | + memset(new_##FIELD + old_size / SIZE, 0, \ |
---|
| 752 | + sizeof(*new_##FIELD) * (size - old_size) / SIZE); \ |
---|
| 753 | + } \ |
---|
| 754 | + state->COUNT = slot * SIZE; \ |
---|
| 755 | + kfree(state->FIELD); \ |
---|
| 756 | + state->FIELD = new_##FIELD; \ |
---|
| 757 | + return 0; \ |
---|
| 758 | +} |
---|
| 759 | +/* realloc_reference_state() */ |
---|
| 760 | +REALLOC_STATE_FN(reference, acquired_refs, refs, 1) |
---|
| 761 | +/* realloc_stack_state() */ |
---|
| 762 | +REALLOC_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE) |
---|
| 763 | +#undef REALLOC_STATE_FN |
---|
379 | 764 | |
---|
380 | 765 | /* do_check() starts with zero-sized stack in struct bpf_verifier_state to |
---|
381 | 766 | * make it consume minimal amount of memory. check_stack_write() access from |
---|
382 | 767 | * the program calls into realloc_func_state() to grow the stack size. |
---|
383 | | - * Note there is a non-zero parent pointer inside each reg of bpf_verifier_state |
---|
384 | | - * which this function copies over. It points to corresponding reg in previous |
---|
385 | | - * bpf_verifier_state which is never reallocated |
---|
| 768 | + * Note there is a non-zero 'parent' pointer inside bpf_verifier_state |
---|
| 769 | + * which realloc_stack_state() copies over. It points to previous |
---|
| 770 | + * bpf_verifier_state which is never reallocated. |
---|
386 | 771 | */ |
---|
387 | | -static int realloc_func_state(struct bpf_func_state *state, int size, |
---|
388 | | - bool copy_old) |
---|
| 772 | +static int realloc_func_state(struct bpf_func_state *state, int stack_size, |
---|
| 773 | + int refs_size, bool copy_old) |
---|
389 | 774 | { |
---|
390 | | - u32 old_size = state->allocated_stack; |
---|
391 | | - struct bpf_stack_state *new_stack; |
---|
392 | | - int slot = size / BPF_REG_SIZE; |
---|
| 775 | + int err = realloc_reference_state(state, refs_size, copy_old); |
---|
| 776 | + if (err) |
---|
| 777 | + return err; |
---|
| 778 | + return realloc_stack_state(state, stack_size, copy_old); |
---|
| 779 | +} |
---|
393 | 780 | |
---|
394 | | - if (size <= old_size || !size) { |
---|
395 | | - if (copy_old) |
---|
| 781 | +/* Acquire a pointer id from the env and update the state->refs to include |
---|
| 782 | + * this new pointer reference. |
---|
| 783 | + * On success, returns a valid pointer id to associate with the register |
---|
| 784 | + * On failure, returns a negative errno. |
---|
| 785 | + */ |
---|
| 786 | +static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx) |
---|
| 787 | +{ |
---|
| 788 | + struct bpf_func_state *state = cur_func(env); |
---|
| 789 | + int new_ofs = state->acquired_refs; |
---|
| 790 | + int id, err; |
---|
| 791 | + |
---|
| 792 | + err = realloc_reference_state(state, state->acquired_refs + 1, true); |
---|
| 793 | + if (err) |
---|
| 794 | + return err; |
---|
| 795 | + id = ++env->id_gen; |
---|
| 796 | + state->refs[new_ofs].id = id; |
---|
| 797 | + state->refs[new_ofs].insn_idx = insn_idx; |
---|
| 798 | + |
---|
| 799 | + return id; |
---|
| 800 | +} |
---|
| 801 | + |
---|
| 802 | +/* release function corresponding to acquire_reference_state(). Idempotent. */ |
---|
| 803 | +static int release_reference_state(struct bpf_func_state *state, int ptr_id) |
---|
| 804 | +{ |
---|
| 805 | + int i, last_idx; |
---|
| 806 | + |
---|
| 807 | + last_idx = state->acquired_refs - 1; |
---|
| 808 | + for (i = 0; i < state->acquired_refs; i++) { |
---|
| 809 | + if (state->refs[i].id == ptr_id) { |
---|
| 810 | + if (last_idx && i != last_idx) |
---|
| 811 | + memcpy(&state->refs[i], &state->refs[last_idx], |
---|
| 812 | + sizeof(*state->refs)); |
---|
| 813 | + memset(&state->refs[last_idx], 0, sizeof(*state->refs)); |
---|
| 814 | + state->acquired_refs--; |
---|
396 | 815 | return 0; |
---|
397 | | - state->allocated_stack = slot * BPF_REG_SIZE; |
---|
398 | | - if (!size && old_size) { |
---|
399 | | - kfree(state->stack); |
---|
400 | | - state->stack = NULL; |
---|
401 | 816 | } |
---|
402 | | - return 0; |
---|
403 | 817 | } |
---|
404 | | - new_stack = kmalloc_array(slot, sizeof(struct bpf_stack_state), |
---|
405 | | - GFP_KERNEL); |
---|
406 | | - if (!new_stack) |
---|
407 | | - return -ENOMEM; |
---|
408 | | - if (copy_old) { |
---|
409 | | - if (state->stack) |
---|
410 | | - memcpy(new_stack, state->stack, |
---|
411 | | - sizeof(*new_stack) * (old_size / BPF_REG_SIZE)); |
---|
412 | | - memset(new_stack + old_size / BPF_REG_SIZE, 0, |
---|
413 | | - sizeof(*new_stack) * (size - old_size) / BPF_REG_SIZE); |
---|
414 | | - } |
---|
415 | | - state->allocated_stack = slot * BPF_REG_SIZE; |
---|
416 | | - kfree(state->stack); |
---|
417 | | - state->stack = new_stack; |
---|
| 818 | + return -EINVAL; |
---|
| 819 | +} |
---|
| 820 | + |
---|
| 821 | +static int transfer_reference_state(struct bpf_func_state *dst, |
---|
| 822 | + struct bpf_func_state *src) |
---|
| 823 | +{ |
---|
| 824 | + int err = realloc_reference_state(dst, src->acquired_refs, false); |
---|
| 825 | + if (err) |
---|
| 826 | + return err; |
---|
| 827 | + err = copy_reference_state(dst, src); |
---|
| 828 | + if (err) |
---|
| 829 | + return err; |
---|
418 | 830 | return 0; |
---|
419 | 831 | } |
---|
420 | 832 | |
---|
.. | .. |
---|
422 | 834 | { |
---|
423 | 835 | if (!state) |
---|
424 | 836 | return; |
---|
| 837 | + kfree(state->refs); |
---|
425 | 838 | kfree(state->stack); |
---|
426 | 839 | kfree(state); |
---|
| 840 | +} |
---|
| 841 | + |
---|
| 842 | +static void clear_jmp_history(struct bpf_verifier_state *state) |
---|
| 843 | +{ |
---|
| 844 | + kfree(state->jmp_history); |
---|
| 845 | + state->jmp_history = NULL; |
---|
| 846 | + state->jmp_history_cnt = 0; |
---|
427 | 847 | } |
---|
428 | 848 | |
---|
429 | 849 | static void free_verifier_state(struct bpf_verifier_state *state, |
---|
.. | .. |
---|
435 | 855 | free_func_state(state->frame[i]); |
---|
436 | 856 | state->frame[i] = NULL; |
---|
437 | 857 | } |
---|
| 858 | + clear_jmp_history(state); |
---|
438 | 859 | if (free_self) |
---|
439 | 860 | kfree(state); |
---|
440 | 861 | } |
---|
.. | .. |
---|
447 | 868 | { |
---|
448 | 869 | int err; |
---|
449 | 870 | |
---|
450 | | - err = realloc_func_state(dst, src->allocated_stack, false); |
---|
| 871 | + err = realloc_func_state(dst, src->allocated_stack, src->acquired_refs, |
---|
| 872 | + false); |
---|
451 | 873 | if (err) |
---|
452 | 874 | return err; |
---|
453 | | - memcpy(dst, src, offsetof(struct bpf_func_state, allocated_stack)); |
---|
| 875 | + memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs)); |
---|
| 876 | + err = copy_reference_state(dst, src); |
---|
| 877 | + if (err) |
---|
| 878 | + return err; |
---|
454 | 879 | return copy_stack_state(dst, src); |
---|
455 | 880 | } |
---|
456 | 881 | |
---|
.. | .. |
---|
458 | 883 | const struct bpf_verifier_state *src) |
---|
459 | 884 | { |
---|
460 | 885 | struct bpf_func_state *dst; |
---|
| 886 | + u32 jmp_sz = sizeof(struct bpf_idx_pair) * src->jmp_history_cnt; |
---|
461 | 887 | int i, err; |
---|
| 888 | + |
---|
| 889 | + if (dst_state->jmp_history_cnt < src->jmp_history_cnt) { |
---|
| 890 | + kfree(dst_state->jmp_history); |
---|
| 891 | + dst_state->jmp_history = kmalloc(jmp_sz, GFP_USER); |
---|
| 892 | + if (!dst_state->jmp_history) |
---|
| 893 | + return -ENOMEM; |
---|
| 894 | + } |
---|
| 895 | + memcpy(dst_state->jmp_history, src->jmp_history, jmp_sz); |
---|
| 896 | + dst_state->jmp_history_cnt = src->jmp_history_cnt; |
---|
462 | 897 | |
---|
463 | 898 | /* if dst has more stack frames then src frame, free them */ |
---|
464 | 899 | for (i = src->curframe + 1; i <= dst_state->curframe; i++) { |
---|
.. | .. |
---|
467 | 902 | } |
---|
468 | 903 | dst_state->speculative = src->speculative; |
---|
469 | 904 | dst_state->curframe = src->curframe; |
---|
| 905 | + dst_state->active_spin_lock = src->active_spin_lock; |
---|
| 906 | + dst_state->branches = src->branches; |
---|
| 907 | + dst_state->parent = src->parent; |
---|
| 908 | + dst_state->first_insn_idx = src->first_insn_idx; |
---|
| 909 | + dst_state->last_insn_idx = src->last_insn_idx; |
---|
470 | 910 | for (i = 0; i <= src->curframe; i++) { |
---|
471 | 911 | dst = dst_state->frame[i]; |
---|
472 | 912 | if (!dst) { |
---|
.. | .. |
---|
482 | 922 | return 0; |
---|
483 | 923 | } |
---|
484 | 924 | |
---|
| 925 | +static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st) |
---|
| 926 | +{ |
---|
| 927 | + while (st) { |
---|
| 928 | + u32 br = --st->branches; |
---|
| 929 | + |
---|
| 930 | + /* WARN_ON(br > 1) technically makes sense here, |
---|
| 931 | + * but see comment in push_stack(), hence: |
---|
| 932 | + */ |
---|
| 933 | + WARN_ONCE((int)br < 0, |
---|
| 934 | + "BUG update_branch_counts:branches_to_explore=%d\n", |
---|
| 935 | + br); |
---|
| 936 | + if (br) |
---|
| 937 | + break; |
---|
| 938 | + st = st->parent; |
---|
| 939 | + } |
---|
| 940 | +} |
---|
| 941 | + |
---|
485 | 942 | static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx, |
---|
486 | | - int *insn_idx) |
---|
| 943 | + int *insn_idx, bool pop_log) |
---|
487 | 944 | { |
---|
488 | 945 | struct bpf_verifier_state *cur = env->cur_state; |
---|
489 | 946 | struct bpf_verifier_stack_elem *elem, *head = env->head; |
---|
.. | .. |
---|
497 | 954 | if (err) |
---|
498 | 955 | return err; |
---|
499 | 956 | } |
---|
| 957 | + if (pop_log) |
---|
| 958 | + bpf_vlog_reset(&env->log, head->log_pos); |
---|
500 | 959 | if (insn_idx) |
---|
501 | 960 | *insn_idx = head->insn_idx; |
---|
502 | 961 | if (prev_insn_idx) |
---|
.. | .. |
---|
524 | 983 | elem->insn_idx = insn_idx; |
---|
525 | 984 | elem->prev_insn_idx = prev_insn_idx; |
---|
526 | 985 | elem->next = env->head; |
---|
| 986 | + elem->log_pos = env->log.len_used; |
---|
527 | 987 | env->head = elem; |
---|
528 | 988 | env->stack_size++; |
---|
529 | 989 | err = copy_verifier_state(&elem->st, cur); |
---|
530 | 990 | if (err) |
---|
531 | 991 | goto err; |
---|
532 | 992 | elem->st.speculative |= speculative; |
---|
533 | | - if (env->stack_size > BPF_COMPLEXITY_LIMIT_STACK) { |
---|
534 | | - verbose(env, "BPF program is too complex\n"); |
---|
| 993 | + if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) { |
---|
| 994 | + verbose(env, "The sequence of %d jumps is too complex.\n", |
---|
| 995 | + env->stack_size); |
---|
535 | 996 | goto err; |
---|
| 997 | + } |
---|
| 998 | + if (elem->st.parent) { |
---|
| 999 | + ++elem->st.parent->branches; |
---|
| 1000 | + /* WARN_ON(branches > 2) technically makes sense here, |
---|
| 1001 | + * but |
---|
| 1002 | + * 1. speculative states will bump 'branches' for non-branch |
---|
| 1003 | + * instructions |
---|
| 1004 | + * 2. is_state_visited() heuristics may decide not to create |
---|
| 1005 | + * a new state for a sequence of branches and all such current |
---|
| 1006 | + * and cloned states will be pointing to a single parent state |
---|
| 1007 | + * which might have large 'branches' count. |
---|
| 1008 | + */ |
---|
536 | 1009 | } |
---|
537 | 1010 | return &elem->st; |
---|
538 | 1011 | err: |
---|
539 | 1012 | free_verifier_state(env->cur_state, true); |
---|
540 | 1013 | env->cur_state = NULL; |
---|
541 | 1014 | /* pop all elements and return */ |
---|
542 | | - while (!pop_stack(env, NULL, NULL)); |
---|
| 1015 | + while (!pop_stack(env, NULL, NULL, false)); |
---|
543 | 1016 | return NULL; |
---|
544 | 1017 | } |
---|
545 | 1018 | |
---|
.. | .. |
---|
548 | 1021 | BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5 |
---|
549 | 1022 | }; |
---|
550 | 1023 | |
---|
551 | | -static void __mark_reg_not_init(struct bpf_reg_state *reg); |
---|
| 1024 | +static void __mark_reg_not_init(const struct bpf_verifier_env *env, |
---|
| 1025 | + struct bpf_reg_state *reg); |
---|
| 1026 | + |
---|
| 1027 | +/* This helper doesn't clear reg->id */ |
---|
| 1028 | +static void ___mark_reg_known(struct bpf_reg_state *reg, u64 imm) |
---|
| 1029 | +{ |
---|
| 1030 | + reg->var_off = tnum_const(imm); |
---|
| 1031 | + reg->smin_value = (s64)imm; |
---|
| 1032 | + reg->smax_value = (s64)imm; |
---|
| 1033 | + reg->umin_value = imm; |
---|
| 1034 | + reg->umax_value = imm; |
---|
| 1035 | + |
---|
| 1036 | + reg->s32_min_value = (s32)imm; |
---|
| 1037 | + reg->s32_max_value = (s32)imm; |
---|
| 1038 | + reg->u32_min_value = (u32)imm; |
---|
| 1039 | + reg->u32_max_value = (u32)imm; |
---|
| 1040 | +} |
---|
552 | 1041 | |
---|
553 | 1042 | /* Mark the unknown part of a register (variable offset or scalar value) as |
---|
554 | 1043 | * known to have the value @imm. |
---|
.. | .. |
---|
558 | 1047 | /* Clear id, off, and union(map_ptr, range) */ |
---|
559 | 1048 | memset(((u8 *)reg) + sizeof(reg->type), 0, |
---|
560 | 1049 | offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type)); |
---|
561 | | - reg->var_off = tnum_const(imm); |
---|
562 | | - reg->smin_value = (s64)imm; |
---|
563 | | - reg->smax_value = (s64)imm; |
---|
564 | | - reg->umin_value = imm; |
---|
565 | | - reg->umax_value = imm; |
---|
| 1050 | + ___mark_reg_known(reg, imm); |
---|
| 1051 | +} |
---|
| 1052 | + |
---|
| 1053 | +static void __mark_reg32_known(struct bpf_reg_state *reg, u64 imm) |
---|
| 1054 | +{ |
---|
| 1055 | + reg->var_off = tnum_const_subreg(reg->var_off, imm); |
---|
| 1056 | + reg->s32_min_value = (s32)imm; |
---|
| 1057 | + reg->s32_max_value = (s32)imm; |
---|
| 1058 | + reg->u32_min_value = (u32)imm; |
---|
| 1059 | + reg->u32_max_value = (u32)imm; |
---|
566 | 1060 | } |
---|
567 | 1061 | |
---|
568 | 1062 | /* Mark the 'variable offset' part of a register as zero. This should be |
---|
.. | .. |
---|
586 | 1080 | verbose(env, "mark_reg_known_zero(regs, %u)\n", regno); |
---|
587 | 1081 | /* Something bad happened, let's kill all regs */ |
---|
588 | 1082 | for (regno = 0; regno < MAX_BPF_REG; regno++) |
---|
589 | | - __mark_reg_not_init(regs + regno); |
---|
| 1083 | + __mark_reg_not_init(env, regs + regno); |
---|
590 | 1084 | return; |
---|
591 | 1085 | } |
---|
592 | 1086 | __mark_reg_known_zero(regs + regno); |
---|
.. | .. |
---|
617 | 1111 | tnum_equals_const(reg->var_off, 0); |
---|
618 | 1112 | } |
---|
619 | 1113 | |
---|
620 | | -/* Attempts to improve min/max values based on var_off information */ |
---|
621 | | -static void __update_reg_bounds(struct bpf_reg_state *reg) |
---|
| 1114 | +/* Reset the min/max bounds of a register */ |
---|
| 1115 | +static void __mark_reg_unbounded(struct bpf_reg_state *reg) |
---|
| 1116 | +{ |
---|
| 1117 | + reg->smin_value = S64_MIN; |
---|
| 1118 | + reg->smax_value = S64_MAX; |
---|
| 1119 | + reg->umin_value = 0; |
---|
| 1120 | + reg->umax_value = U64_MAX; |
---|
| 1121 | + |
---|
| 1122 | + reg->s32_min_value = S32_MIN; |
---|
| 1123 | + reg->s32_max_value = S32_MAX; |
---|
| 1124 | + reg->u32_min_value = 0; |
---|
| 1125 | + reg->u32_max_value = U32_MAX; |
---|
| 1126 | +} |
---|
| 1127 | + |
---|
| 1128 | +static void __mark_reg64_unbounded(struct bpf_reg_state *reg) |
---|
| 1129 | +{ |
---|
| 1130 | + reg->smin_value = S64_MIN; |
---|
| 1131 | + reg->smax_value = S64_MAX; |
---|
| 1132 | + reg->umin_value = 0; |
---|
| 1133 | + reg->umax_value = U64_MAX; |
---|
| 1134 | +} |
---|
| 1135 | + |
---|
| 1136 | +static void __mark_reg32_unbounded(struct bpf_reg_state *reg) |
---|
| 1137 | +{ |
---|
| 1138 | + reg->s32_min_value = S32_MIN; |
---|
| 1139 | + reg->s32_max_value = S32_MAX; |
---|
| 1140 | + reg->u32_min_value = 0; |
---|
| 1141 | + reg->u32_max_value = U32_MAX; |
---|
| 1142 | +} |
---|
| 1143 | + |
---|
| 1144 | +static void __update_reg32_bounds(struct bpf_reg_state *reg) |
---|
| 1145 | +{ |
---|
| 1146 | + struct tnum var32_off = tnum_subreg(reg->var_off); |
---|
| 1147 | + |
---|
| 1148 | + /* min signed is max(sign bit) | min(other bits) */ |
---|
| 1149 | + reg->s32_min_value = max_t(s32, reg->s32_min_value, |
---|
| 1150 | + var32_off.value | (var32_off.mask & S32_MIN)); |
---|
| 1151 | + /* max signed is min(sign bit) | max(other bits) */ |
---|
| 1152 | + reg->s32_max_value = min_t(s32, reg->s32_max_value, |
---|
| 1153 | + var32_off.value | (var32_off.mask & S32_MAX)); |
---|
| 1154 | + reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)var32_off.value); |
---|
| 1155 | + reg->u32_max_value = min(reg->u32_max_value, |
---|
| 1156 | + (u32)(var32_off.value | var32_off.mask)); |
---|
| 1157 | +} |
---|
| 1158 | + |
---|
| 1159 | +static void __update_reg64_bounds(struct bpf_reg_state *reg) |
---|
622 | 1160 | { |
---|
623 | 1161 | /* min signed is max(sign bit) | min(other bits) */ |
---|
624 | 1162 | reg->smin_value = max_t(s64, reg->smin_value, |
---|
.. | .. |
---|
631 | 1169 | reg->var_off.value | reg->var_off.mask); |
---|
632 | 1170 | } |
---|
633 | 1171 | |
---|
| 1172 | +static void __update_reg_bounds(struct bpf_reg_state *reg) |
---|
| 1173 | +{ |
---|
| 1174 | + __update_reg32_bounds(reg); |
---|
| 1175 | + __update_reg64_bounds(reg); |
---|
| 1176 | +} |
---|
| 1177 | + |
---|
634 | 1178 | /* Uses signed min/max values to inform unsigned, and vice-versa */ |
---|
635 | | -static void __reg_deduce_bounds(struct bpf_reg_state *reg) |
---|
| 1179 | +static void __reg32_deduce_bounds(struct bpf_reg_state *reg) |
---|
| 1180 | +{ |
---|
| 1181 | + /* Learn sign from signed bounds. |
---|
| 1182 | + * If we cannot cross the sign boundary, then signed and unsigned bounds |
---|
| 1183 | + * are the same, so combine. This works even in the negative case, e.g. |
---|
| 1184 | + * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff. |
---|
| 1185 | + */ |
---|
| 1186 | + if (reg->s32_min_value >= 0 || reg->s32_max_value < 0) { |
---|
| 1187 | + reg->s32_min_value = reg->u32_min_value = |
---|
| 1188 | + max_t(u32, reg->s32_min_value, reg->u32_min_value); |
---|
| 1189 | + reg->s32_max_value = reg->u32_max_value = |
---|
| 1190 | + min_t(u32, reg->s32_max_value, reg->u32_max_value); |
---|
| 1191 | + return; |
---|
| 1192 | + } |
---|
| 1193 | + /* Learn sign from unsigned bounds. Signed bounds cross the sign |
---|
| 1194 | + * boundary, so we must be careful. |
---|
| 1195 | + */ |
---|
| 1196 | + if ((s32)reg->u32_max_value >= 0) { |
---|
| 1197 | + /* Positive. We can't learn anything from the smin, but smax |
---|
| 1198 | + * is positive, hence safe. |
---|
| 1199 | + */ |
---|
| 1200 | + reg->s32_min_value = reg->u32_min_value; |
---|
| 1201 | + reg->s32_max_value = reg->u32_max_value = |
---|
| 1202 | + min_t(u32, reg->s32_max_value, reg->u32_max_value); |
---|
| 1203 | + } else if ((s32)reg->u32_min_value < 0) { |
---|
| 1204 | + /* Negative. We can't learn anything from the smax, but smin |
---|
| 1205 | + * is negative, hence safe. |
---|
| 1206 | + */ |
---|
| 1207 | + reg->s32_min_value = reg->u32_min_value = |
---|
| 1208 | + max_t(u32, reg->s32_min_value, reg->u32_min_value); |
---|
| 1209 | + reg->s32_max_value = reg->u32_max_value; |
---|
| 1210 | + } |
---|
| 1211 | +} |
---|
| 1212 | + |
---|
| 1213 | +static void __reg64_deduce_bounds(struct bpf_reg_state *reg) |
---|
636 | 1214 | { |
---|
637 | 1215 | /* Learn sign from signed bounds. |
---|
638 | 1216 | * If we cannot cross the sign boundary, then signed and unsigned bounds |
---|
.. | .. |
---|
666 | 1244 | } |
---|
667 | 1245 | } |
---|
668 | 1246 | |
---|
| 1247 | +static void __reg_deduce_bounds(struct bpf_reg_state *reg) |
---|
| 1248 | +{ |
---|
| 1249 | + __reg32_deduce_bounds(reg); |
---|
| 1250 | + __reg64_deduce_bounds(reg); |
---|
| 1251 | +} |
---|
| 1252 | + |
---|
669 | 1253 | /* Attempts to improve var_off based on unsigned min/max information */ |
---|
670 | 1254 | static void __reg_bound_offset(struct bpf_reg_state *reg) |
---|
671 | 1255 | { |
---|
672 | | - reg->var_off = tnum_intersect(reg->var_off, |
---|
673 | | - tnum_range(reg->umin_value, |
---|
674 | | - reg->umax_value)); |
---|
| 1256 | + struct tnum var64_off = tnum_intersect(reg->var_off, |
---|
| 1257 | + tnum_range(reg->umin_value, |
---|
| 1258 | + reg->umax_value)); |
---|
| 1259 | + struct tnum var32_off = tnum_intersect(tnum_subreg(reg->var_off), |
---|
| 1260 | + tnum_range(reg->u32_min_value, |
---|
| 1261 | + reg->u32_max_value)); |
---|
| 1262 | + |
---|
| 1263 | + reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off); |
---|
675 | 1264 | } |
---|
676 | 1265 | |
---|
677 | | -/* Reset the min/max bounds of a register */ |
---|
678 | | -static void __mark_reg_unbounded(struct bpf_reg_state *reg) |
---|
| 1266 | +static void reg_bounds_sync(struct bpf_reg_state *reg) |
---|
679 | 1267 | { |
---|
680 | | - reg->smin_value = S64_MIN; |
---|
681 | | - reg->smax_value = S64_MAX; |
---|
682 | | - reg->umin_value = 0; |
---|
683 | | - reg->umax_value = U64_MAX; |
---|
| 1268 | + /* We might have learned new bounds from the var_off. */ |
---|
| 1269 | + __update_reg_bounds(reg); |
---|
| 1270 | + /* We might have learned something about the sign bit. */ |
---|
| 1271 | + __reg_deduce_bounds(reg); |
---|
| 1272 | + /* We might have learned some bits from the bounds. */ |
---|
| 1273 | + __reg_bound_offset(reg); |
---|
| 1274 | + /* Intersecting with the old var_off might have improved our bounds |
---|
| 1275 | + * slightly, e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc), |
---|
| 1276 | + * then new var_off is (0; 0x7f...fc) which improves our umax. |
---|
| 1277 | + */ |
---|
| 1278 | + __update_reg_bounds(reg); |
---|
| 1279 | +} |
---|
| 1280 | + |
---|
| 1281 | +static bool __reg32_bound_s64(s32 a) |
---|
| 1282 | +{ |
---|
| 1283 | + return a >= 0 && a <= S32_MAX; |
---|
| 1284 | +} |
---|
| 1285 | + |
---|
| 1286 | +static void __reg_assign_32_into_64(struct bpf_reg_state *reg) |
---|
| 1287 | +{ |
---|
| 1288 | + reg->umin_value = reg->u32_min_value; |
---|
| 1289 | + reg->umax_value = reg->u32_max_value; |
---|
| 1290 | + |
---|
| 1291 | + /* Attempt to pull 32-bit signed bounds into 64-bit bounds but must |
---|
| 1292 | + * be positive otherwise set to worse case bounds and refine later |
---|
| 1293 | + * from tnum. |
---|
| 1294 | + */ |
---|
| 1295 | + if (__reg32_bound_s64(reg->s32_min_value) && |
---|
| 1296 | + __reg32_bound_s64(reg->s32_max_value)) { |
---|
| 1297 | + reg->smin_value = reg->s32_min_value; |
---|
| 1298 | + reg->smax_value = reg->s32_max_value; |
---|
| 1299 | + } else { |
---|
| 1300 | + reg->smin_value = 0; |
---|
| 1301 | + reg->smax_value = U32_MAX; |
---|
| 1302 | + } |
---|
| 1303 | +} |
---|
| 1304 | + |
---|
| 1305 | +static void __reg_combine_32_into_64(struct bpf_reg_state *reg) |
---|
| 1306 | +{ |
---|
| 1307 | + /* special case when 64-bit register has upper 32-bit register |
---|
| 1308 | + * zeroed. Typically happens after zext or <<32, >>32 sequence |
---|
| 1309 | + * allowing us to use 32-bit bounds directly, |
---|
| 1310 | + */ |
---|
| 1311 | + if (tnum_equals_const(tnum_clear_subreg(reg->var_off), 0)) { |
---|
| 1312 | + __reg_assign_32_into_64(reg); |
---|
| 1313 | + } else { |
---|
| 1314 | + /* Otherwise the best we can do is push lower 32bit known and |
---|
| 1315 | + * unknown bits into register (var_off set from jmp logic) |
---|
| 1316 | + * then learn as much as possible from the 64-bit tnum |
---|
| 1317 | + * known and unknown bits. The previous smin/smax bounds are |
---|
| 1318 | + * invalid here because of jmp32 compare so mark them unknown |
---|
| 1319 | + * so they do not impact tnum bounds calculation. |
---|
| 1320 | + */ |
---|
| 1321 | + __mark_reg64_unbounded(reg); |
---|
| 1322 | + } |
---|
| 1323 | + reg_bounds_sync(reg); |
---|
| 1324 | +} |
---|
| 1325 | + |
---|
| 1326 | +static bool __reg64_bound_s32(s64 a) |
---|
| 1327 | +{ |
---|
| 1328 | + return a >= S32_MIN && a <= S32_MAX; |
---|
| 1329 | +} |
---|
| 1330 | + |
---|
| 1331 | +static bool __reg64_bound_u32(u64 a) |
---|
| 1332 | +{ |
---|
| 1333 | + return a >= U32_MIN && a <= U32_MAX; |
---|
| 1334 | +} |
---|
| 1335 | + |
---|
| 1336 | +static void __reg_combine_64_into_32(struct bpf_reg_state *reg) |
---|
| 1337 | +{ |
---|
| 1338 | + __mark_reg32_unbounded(reg); |
---|
| 1339 | + if (__reg64_bound_s32(reg->smin_value) && __reg64_bound_s32(reg->smax_value)) { |
---|
| 1340 | + reg->s32_min_value = (s32)reg->smin_value; |
---|
| 1341 | + reg->s32_max_value = (s32)reg->smax_value; |
---|
| 1342 | + } |
---|
| 1343 | + if (__reg64_bound_u32(reg->umin_value) && __reg64_bound_u32(reg->umax_value)) { |
---|
| 1344 | + reg->u32_min_value = (u32)reg->umin_value; |
---|
| 1345 | + reg->u32_max_value = (u32)reg->umax_value; |
---|
| 1346 | + } |
---|
| 1347 | + reg_bounds_sync(reg); |
---|
684 | 1348 | } |
---|
685 | 1349 | |
---|
686 | 1350 | /* Mark a register as having a completely unknown (scalar) value. */ |
---|
687 | | -static void __mark_reg_unknown(struct bpf_reg_state *reg) |
---|
| 1351 | +static void __mark_reg_unknown(const struct bpf_verifier_env *env, |
---|
| 1352 | + struct bpf_reg_state *reg) |
---|
688 | 1353 | { |
---|
689 | 1354 | /* |
---|
690 | 1355 | * Clear type, id, off, and union(map_ptr, range) and |
---|
.. | .. |
---|
694 | 1359 | reg->type = SCALAR_VALUE; |
---|
695 | 1360 | reg->var_off = tnum_unknown; |
---|
696 | 1361 | reg->frameno = 0; |
---|
| 1362 | + reg->precise = !env->bpf_capable; |
---|
697 | 1363 | __mark_reg_unbounded(reg); |
---|
698 | 1364 | } |
---|
699 | 1365 | |
---|
.. | .. |
---|
704 | 1370 | verbose(env, "mark_reg_unknown(regs, %u)\n", regno); |
---|
705 | 1371 | /* Something bad happened, let's kill all regs except FP */ |
---|
706 | 1372 | for (regno = 0; regno < BPF_REG_FP; regno++) |
---|
707 | | - __mark_reg_not_init(regs + regno); |
---|
| 1373 | + __mark_reg_not_init(env, regs + regno); |
---|
708 | 1374 | return; |
---|
709 | 1375 | } |
---|
710 | | - __mark_reg_unknown(regs + regno); |
---|
| 1376 | + __mark_reg_unknown(env, regs + regno); |
---|
711 | 1377 | } |
---|
712 | 1378 | |
---|
713 | | -static void __mark_reg_not_init(struct bpf_reg_state *reg) |
---|
| 1379 | +static void __mark_reg_not_init(const struct bpf_verifier_env *env, |
---|
| 1380 | + struct bpf_reg_state *reg) |
---|
714 | 1381 | { |
---|
715 | | - __mark_reg_unknown(reg); |
---|
| 1382 | + __mark_reg_unknown(env, reg); |
---|
716 | 1383 | reg->type = NOT_INIT; |
---|
717 | 1384 | } |
---|
718 | 1385 | |
---|
.. | .. |
---|
723 | 1390 | verbose(env, "mark_reg_not_init(regs, %u)\n", regno); |
---|
724 | 1391 | /* Something bad happened, let's kill all regs except FP */ |
---|
725 | 1392 | for (regno = 0; regno < BPF_REG_FP; regno++) |
---|
726 | | - __mark_reg_not_init(regs + regno); |
---|
| 1393 | + __mark_reg_not_init(env, regs + regno); |
---|
727 | 1394 | return; |
---|
728 | 1395 | } |
---|
729 | | - __mark_reg_not_init(regs + regno); |
---|
| 1396 | + __mark_reg_not_init(env, regs + regno); |
---|
730 | 1397 | } |
---|
731 | 1398 | |
---|
| 1399 | +static void mark_btf_ld_reg(struct bpf_verifier_env *env, |
---|
| 1400 | + struct bpf_reg_state *regs, u32 regno, |
---|
| 1401 | + enum bpf_reg_type reg_type, u32 btf_id) |
---|
| 1402 | +{ |
---|
| 1403 | + if (reg_type == SCALAR_VALUE) { |
---|
| 1404 | + mark_reg_unknown(env, regs, regno); |
---|
| 1405 | + return; |
---|
| 1406 | + } |
---|
| 1407 | + mark_reg_known_zero(env, regs, regno); |
---|
| 1408 | + regs[regno].type = PTR_TO_BTF_ID; |
---|
| 1409 | + regs[regno].btf_id = btf_id; |
---|
| 1410 | +} |
---|
| 1411 | + |
---|
| 1412 | +#define DEF_NOT_SUBREG (0) |
---|
732 | 1413 | static void init_reg_state(struct bpf_verifier_env *env, |
---|
733 | 1414 | struct bpf_func_state *state) |
---|
734 | 1415 | { |
---|
.. | .. |
---|
739 | 1420 | mark_reg_not_init(env, regs, i); |
---|
740 | 1421 | regs[i].live = REG_LIVE_NONE; |
---|
741 | 1422 | regs[i].parent = NULL; |
---|
| 1423 | + regs[i].subreg_def = DEF_NOT_SUBREG; |
---|
742 | 1424 | } |
---|
743 | 1425 | |
---|
744 | 1426 | /* frame pointer */ |
---|
745 | 1427 | regs[BPF_REG_FP].type = PTR_TO_STACK; |
---|
746 | 1428 | mark_reg_known_zero(env, regs, BPF_REG_FP); |
---|
747 | 1429 | regs[BPF_REG_FP].frameno = state->frameno; |
---|
748 | | - |
---|
749 | | - /* 1st arg to a function */ |
---|
750 | | - regs[BPF_REG_1].type = PTR_TO_CTX; |
---|
751 | | - mark_reg_known_zero(env, regs, BPF_REG_1); |
---|
752 | 1430 | } |
---|
753 | 1431 | |
---|
754 | 1432 | #define BPF_MAIN_FUNC (-1) |
---|
.. | .. |
---|
826 | 1504 | continue; |
---|
827 | 1505 | if (insn[i].src_reg != BPF_PSEUDO_CALL) |
---|
828 | 1506 | continue; |
---|
829 | | - if (!env->allow_ptr_leaks) { |
---|
830 | | - verbose(env, "function calls to other bpf functions are allowed for root only\n"); |
---|
| 1507 | + if (!env->bpf_capable) { |
---|
| 1508 | + verbose(env, |
---|
| 1509 | + "function calls to other bpf functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n"); |
---|
831 | 1510 | return -EPERM; |
---|
832 | | - } |
---|
833 | | - if (bpf_prog_is_dev_bound(env->prog->aux)) { |
---|
834 | | - verbose(env, "function calls in offloaded programs are not supported yet\n"); |
---|
835 | | - return -EINVAL; |
---|
836 | 1511 | } |
---|
837 | 1512 | ret = add_subprog(env, i + insn[i].imm + 1); |
---|
838 | 1513 | if (ret < 0) |
---|
.. | .. |
---|
844 | 1519 | */ |
---|
845 | 1520 | subprog[env->subprog_cnt].start = insn_cnt; |
---|
846 | 1521 | |
---|
847 | | - if (env->log.level > 1) |
---|
| 1522 | + if (env->log.level & BPF_LOG_LEVEL2) |
---|
848 | 1523 | for (i = 0; i < env->subprog_cnt; i++) |
---|
849 | 1524 | verbose(env, "func#%d @%d\n", i, subprog[i].start); |
---|
850 | 1525 | |
---|
.. | .. |
---|
854 | 1529 | for (i = 0; i < insn_cnt; i++) { |
---|
855 | 1530 | u8 code = insn[i].code; |
---|
856 | 1531 | |
---|
857 | | - if (BPF_CLASS(code) != BPF_JMP) |
---|
| 1532 | + if (code == (BPF_JMP | BPF_CALL) && |
---|
| 1533 | + insn[i].imm == BPF_FUNC_tail_call && |
---|
| 1534 | + insn[i].src_reg != BPF_PSEUDO_CALL) |
---|
| 1535 | + subprog[cur_subprog].has_tail_call = true; |
---|
| 1536 | + if (BPF_CLASS(code) == BPF_LD && |
---|
| 1537 | + (BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND)) |
---|
| 1538 | + subprog[cur_subprog].has_ld_abs = true; |
---|
| 1539 | + if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32) |
---|
858 | 1540 | goto next; |
---|
859 | 1541 | if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL) |
---|
860 | 1542 | goto next; |
---|
.. | .. |
---|
888 | 1570 | */ |
---|
889 | 1571 | static int mark_reg_read(struct bpf_verifier_env *env, |
---|
890 | 1572 | const struct bpf_reg_state *state, |
---|
891 | | - struct bpf_reg_state *parent) |
---|
| 1573 | + struct bpf_reg_state *parent, u8 flag) |
---|
892 | 1574 | { |
---|
893 | 1575 | bool writes = parent == state->parent; /* Observe write marks */ |
---|
| 1576 | + int cnt = 0; |
---|
894 | 1577 | |
---|
895 | 1578 | while (parent) { |
---|
896 | 1579 | /* if read wasn't screened by an earlier write ... */ |
---|
897 | 1580 | if (writes && state->live & REG_LIVE_WRITTEN) |
---|
898 | 1581 | break; |
---|
| 1582 | + if (parent->live & REG_LIVE_DONE) { |
---|
| 1583 | + verbose(env, "verifier BUG type %s var_off %lld off %d\n", |
---|
| 1584 | + reg_type_str[parent->type], |
---|
| 1585 | + parent->var_off.value, parent->off); |
---|
| 1586 | + return -EFAULT; |
---|
| 1587 | + } |
---|
| 1588 | + /* The first condition is more likely to be true than the |
---|
| 1589 | + * second, checked it first. |
---|
| 1590 | + */ |
---|
| 1591 | + if ((parent->live & REG_LIVE_READ) == flag || |
---|
| 1592 | + parent->live & REG_LIVE_READ64) |
---|
| 1593 | + /* The parentage chain never changes and |
---|
| 1594 | + * this parent was already marked as LIVE_READ. |
---|
| 1595 | + * There is no need to keep walking the chain again and |
---|
| 1596 | + * keep re-marking all parents as LIVE_READ. |
---|
| 1597 | + * This case happens when the same register is read |
---|
| 1598 | + * multiple times without writes into it in-between. |
---|
| 1599 | + * Also, if parent has the stronger REG_LIVE_READ64 set, |
---|
| 1600 | + * then no need to set the weak REG_LIVE_READ32. |
---|
| 1601 | + */ |
---|
| 1602 | + break; |
---|
899 | 1603 | /* ... then we depend on parent's value */ |
---|
900 | | - parent->live |= REG_LIVE_READ; |
---|
| 1604 | + parent->live |= flag; |
---|
| 1605 | + /* REG_LIVE_READ64 overrides REG_LIVE_READ32. */ |
---|
| 1606 | + if (flag == REG_LIVE_READ64) |
---|
| 1607 | + parent->live &= ~REG_LIVE_READ32; |
---|
901 | 1608 | state = parent; |
---|
902 | 1609 | parent = state->parent; |
---|
903 | 1610 | writes = true; |
---|
| 1611 | + cnt++; |
---|
904 | 1612 | } |
---|
| 1613 | + |
---|
| 1614 | + if (env->longest_mark_read_walk < cnt) |
---|
| 1615 | + env->longest_mark_read_walk = cnt; |
---|
905 | 1616 | return 0; |
---|
| 1617 | +} |
---|
| 1618 | + |
---|
| 1619 | +/* This function is supposed to be used by the following 32-bit optimization |
---|
| 1620 | + * code only. It returns TRUE if the source or destination register operates |
---|
| 1621 | + * on 64-bit, otherwise return FALSE. |
---|
| 1622 | + */ |
---|
| 1623 | +static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn, |
---|
| 1624 | + u32 regno, struct bpf_reg_state *reg, enum reg_arg_type t) |
---|
| 1625 | +{ |
---|
| 1626 | + u8 code, class, op; |
---|
| 1627 | + |
---|
| 1628 | + code = insn->code; |
---|
| 1629 | + class = BPF_CLASS(code); |
---|
| 1630 | + op = BPF_OP(code); |
---|
| 1631 | + if (class == BPF_JMP) { |
---|
| 1632 | + /* BPF_EXIT for "main" will reach here. Return TRUE |
---|
| 1633 | + * conservatively. |
---|
| 1634 | + */ |
---|
| 1635 | + if (op == BPF_EXIT) |
---|
| 1636 | + return true; |
---|
| 1637 | + if (op == BPF_CALL) { |
---|
| 1638 | + /* BPF to BPF call will reach here because of marking |
---|
| 1639 | + * caller saved clobber with DST_OP_NO_MARK for which we |
---|
| 1640 | + * don't care the register def because they are anyway |
---|
| 1641 | + * marked as NOT_INIT already. |
---|
| 1642 | + */ |
---|
| 1643 | + if (insn->src_reg == BPF_PSEUDO_CALL) |
---|
| 1644 | + return false; |
---|
| 1645 | + /* Helper call will reach here because of arg type |
---|
| 1646 | + * check, conservatively return TRUE. |
---|
| 1647 | + */ |
---|
| 1648 | + if (t == SRC_OP) |
---|
| 1649 | + return true; |
---|
| 1650 | + |
---|
| 1651 | + return false; |
---|
| 1652 | + } |
---|
| 1653 | + } |
---|
| 1654 | + |
---|
| 1655 | + if (class == BPF_ALU64 || class == BPF_JMP || |
---|
| 1656 | + /* BPF_END always use BPF_ALU class. */ |
---|
| 1657 | + (class == BPF_ALU && op == BPF_END && insn->imm == 64)) |
---|
| 1658 | + return true; |
---|
| 1659 | + |
---|
| 1660 | + if (class == BPF_ALU || class == BPF_JMP32) |
---|
| 1661 | + return false; |
---|
| 1662 | + |
---|
| 1663 | + if (class == BPF_LDX) { |
---|
| 1664 | + if (t != SRC_OP) |
---|
| 1665 | + return BPF_SIZE(code) == BPF_DW; |
---|
| 1666 | + /* LDX source must be ptr. */ |
---|
| 1667 | + return true; |
---|
| 1668 | + } |
---|
| 1669 | + |
---|
| 1670 | + if (class == BPF_STX) { |
---|
| 1671 | + if (reg->type != SCALAR_VALUE) |
---|
| 1672 | + return true; |
---|
| 1673 | + return BPF_SIZE(code) == BPF_DW; |
---|
| 1674 | + } |
---|
| 1675 | + |
---|
| 1676 | + if (class == BPF_LD) { |
---|
| 1677 | + u8 mode = BPF_MODE(code); |
---|
| 1678 | + |
---|
| 1679 | + /* LD_IMM64 */ |
---|
| 1680 | + if (mode == BPF_IMM) |
---|
| 1681 | + return true; |
---|
| 1682 | + |
---|
| 1683 | + /* Both LD_IND and LD_ABS return 32-bit data. */ |
---|
| 1684 | + if (t != SRC_OP) |
---|
| 1685 | + return false; |
---|
| 1686 | + |
---|
| 1687 | + /* Implicit ctx ptr. */ |
---|
| 1688 | + if (regno == BPF_REG_6) |
---|
| 1689 | + return true; |
---|
| 1690 | + |
---|
| 1691 | + /* Explicit source could be any width. */ |
---|
| 1692 | + return true; |
---|
| 1693 | + } |
---|
| 1694 | + |
---|
| 1695 | + if (class == BPF_ST) |
---|
| 1696 | + /* The only source register for BPF_ST is a ptr. */ |
---|
| 1697 | + return true; |
---|
| 1698 | + |
---|
| 1699 | + /* Conservatively return true at default. */ |
---|
| 1700 | + return true; |
---|
| 1701 | +} |
---|
| 1702 | + |
---|
| 1703 | +/* Return TRUE if INSN doesn't have explicit value define. */ |
---|
| 1704 | +static bool insn_no_def(struct bpf_insn *insn) |
---|
| 1705 | +{ |
---|
| 1706 | + u8 class = BPF_CLASS(insn->code); |
---|
| 1707 | + |
---|
| 1708 | + return (class == BPF_JMP || class == BPF_JMP32 || |
---|
| 1709 | + class == BPF_STX || class == BPF_ST); |
---|
| 1710 | +} |
---|
| 1711 | + |
---|
| 1712 | +/* Return TRUE if INSN has defined any 32-bit value explicitly. */ |
---|
| 1713 | +static bool insn_has_def32(struct bpf_verifier_env *env, struct bpf_insn *insn) |
---|
| 1714 | +{ |
---|
| 1715 | + if (insn_no_def(insn)) |
---|
| 1716 | + return false; |
---|
| 1717 | + |
---|
| 1718 | + return !is_reg64(env, insn, insn->dst_reg, NULL, DST_OP); |
---|
| 1719 | +} |
---|
| 1720 | + |
---|
| 1721 | +static void mark_insn_zext(struct bpf_verifier_env *env, |
---|
| 1722 | + struct bpf_reg_state *reg) |
---|
| 1723 | +{ |
---|
| 1724 | + s32 def_idx = reg->subreg_def; |
---|
| 1725 | + |
---|
| 1726 | + if (def_idx == DEF_NOT_SUBREG) |
---|
| 1727 | + return; |
---|
| 1728 | + |
---|
| 1729 | + env->insn_aux_data[def_idx - 1].zext_dst = true; |
---|
| 1730 | + /* The dst will be zero extended, so won't be sub-register anymore. */ |
---|
| 1731 | + reg->subreg_def = DEF_NOT_SUBREG; |
---|
906 | 1732 | } |
---|
907 | 1733 | |
---|
908 | 1734 | static int check_reg_arg(struct bpf_verifier_env *env, u32 regno, |
---|
.. | .. |
---|
910 | 1736 | { |
---|
911 | 1737 | struct bpf_verifier_state *vstate = env->cur_state; |
---|
912 | 1738 | struct bpf_func_state *state = vstate->frame[vstate->curframe]; |
---|
913 | | - struct bpf_reg_state *regs = state->regs; |
---|
| 1739 | + struct bpf_insn *insn = env->prog->insnsi + env->insn_idx; |
---|
| 1740 | + struct bpf_reg_state *reg, *regs = state->regs; |
---|
| 1741 | + bool rw64; |
---|
914 | 1742 | |
---|
915 | 1743 | if (regno >= MAX_BPF_REG) { |
---|
916 | 1744 | verbose(env, "R%d is invalid\n", regno); |
---|
917 | 1745 | return -EINVAL; |
---|
918 | 1746 | } |
---|
919 | 1747 | |
---|
| 1748 | + reg = ®s[regno]; |
---|
| 1749 | + rw64 = is_reg64(env, insn, regno, reg, t); |
---|
920 | 1750 | if (t == SRC_OP) { |
---|
921 | 1751 | /* check whether register used as source operand can be read */ |
---|
922 | | - if (regs[regno].type == NOT_INIT) { |
---|
| 1752 | + if (reg->type == NOT_INIT) { |
---|
923 | 1753 | verbose(env, "R%d !read_ok\n", regno); |
---|
924 | 1754 | return -EACCES; |
---|
925 | 1755 | } |
---|
926 | 1756 | /* We don't need to worry about FP liveness because it's read-only */ |
---|
927 | | - if (regno != BPF_REG_FP) |
---|
928 | | - return mark_reg_read(env, ®s[regno], |
---|
929 | | - regs[regno].parent); |
---|
| 1757 | + if (regno == BPF_REG_FP) |
---|
| 1758 | + return 0; |
---|
| 1759 | + |
---|
| 1760 | + if (rw64) |
---|
| 1761 | + mark_insn_zext(env, reg); |
---|
| 1762 | + |
---|
| 1763 | + return mark_reg_read(env, reg, reg->parent, |
---|
| 1764 | + rw64 ? REG_LIVE_READ64 : REG_LIVE_READ32); |
---|
930 | 1765 | } else { |
---|
931 | 1766 | /* check whether register used as dest operand can be written to */ |
---|
932 | 1767 | if (regno == BPF_REG_FP) { |
---|
933 | 1768 | verbose(env, "frame pointer is read only\n"); |
---|
934 | 1769 | return -EACCES; |
---|
935 | 1770 | } |
---|
936 | | - regs[regno].live |= REG_LIVE_WRITTEN; |
---|
| 1771 | + reg->live |= REG_LIVE_WRITTEN; |
---|
| 1772 | + reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1; |
---|
937 | 1773 | if (t == DST_OP) |
---|
938 | 1774 | mark_reg_unknown(env, regs, regno); |
---|
939 | 1775 | } |
---|
940 | 1776 | return 0; |
---|
| 1777 | +} |
---|
| 1778 | + |
---|
| 1779 | +/* for any branch, call, exit record the history of jmps in the given state */ |
---|
| 1780 | +static int push_jmp_history(struct bpf_verifier_env *env, |
---|
| 1781 | + struct bpf_verifier_state *cur) |
---|
| 1782 | +{ |
---|
| 1783 | + u32 cnt = cur->jmp_history_cnt; |
---|
| 1784 | + struct bpf_idx_pair *p; |
---|
| 1785 | + |
---|
| 1786 | + cnt++; |
---|
| 1787 | + p = krealloc(cur->jmp_history, cnt * sizeof(*p), GFP_USER); |
---|
| 1788 | + if (!p) |
---|
| 1789 | + return -ENOMEM; |
---|
| 1790 | + p[cnt - 1].idx = env->insn_idx; |
---|
| 1791 | + p[cnt - 1].prev_idx = env->prev_insn_idx; |
---|
| 1792 | + cur->jmp_history = p; |
---|
| 1793 | + cur->jmp_history_cnt = cnt; |
---|
| 1794 | + return 0; |
---|
| 1795 | +} |
---|
| 1796 | + |
---|
| 1797 | +/* Backtrack one insn at a time. If idx is not at the top of recorded |
---|
| 1798 | + * history then previous instruction came from straight line execution. |
---|
| 1799 | + */ |
---|
| 1800 | +static int get_prev_insn_idx(struct bpf_verifier_state *st, int i, |
---|
| 1801 | + u32 *history) |
---|
| 1802 | +{ |
---|
| 1803 | + u32 cnt = *history; |
---|
| 1804 | + |
---|
| 1805 | + if (cnt && st->jmp_history[cnt - 1].idx == i) { |
---|
| 1806 | + i = st->jmp_history[cnt - 1].prev_idx; |
---|
| 1807 | + (*history)--; |
---|
| 1808 | + } else { |
---|
| 1809 | + i--; |
---|
| 1810 | + } |
---|
| 1811 | + return i; |
---|
| 1812 | +} |
---|
| 1813 | + |
---|
| 1814 | +/* For given verifier state backtrack_insn() is called from the last insn to |
---|
| 1815 | + * the first insn. Its purpose is to compute a bitmask of registers and |
---|
| 1816 | + * stack slots that needs precision in the parent verifier state. |
---|
| 1817 | + */ |
---|
| 1818 | +static int backtrack_insn(struct bpf_verifier_env *env, int idx, |
---|
| 1819 | + u32 *reg_mask, u64 *stack_mask) |
---|
| 1820 | +{ |
---|
| 1821 | + const struct bpf_insn_cbs cbs = { |
---|
| 1822 | + .cb_print = verbose, |
---|
| 1823 | + .private_data = env, |
---|
| 1824 | + }; |
---|
| 1825 | + struct bpf_insn *insn = env->prog->insnsi + idx; |
---|
| 1826 | + u8 class = BPF_CLASS(insn->code); |
---|
| 1827 | + u8 opcode = BPF_OP(insn->code); |
---|
| 1828 | + u8 mode = BPF_MODE(insn->code); |
---|
| 1829 | + u32 dreg = 1u << insn->dst_reg; |
---|
| 1830 | + u32 sreg = 1u << insn->src_reg; |
---|
| 1831 | + u32 spi; |
---|
| 1832 | + |
---|
| 1833 | + if (insn->code == 0) |
---|
| 1834 | + return 0; |
---|
| 1835 | + if (env->log.level & BPF_LOG_LEVEL) { |
---|
| 1836 | + verbose(env, "regs=%x stack=%llx before ", *reg_mask, *stack_mask); |
---|
| 1837 | + verbose(env, "%d: ", idx); |
---|
| 1838 | + print_bpf_insn(&cbs, insn, env->allow_ptr_leaks); |
---|
| 1839 | + } |
---|
| 1840 | + |
---|
| 1841 | + if (class == BPF_ALU || class == BPF_ALU64) { |
---|
| 1842 | + if (!(*reg_mask & dreg)) |
---|
| 1843 | + return 0; |
---|
| 1844 | + if (opcode == BPF_MOV) { |
---|
| 1845 | + if (BPF_SRC(insn->code) == BPF_X) { |
---|
| 1846 | + /* dreg = sreg |
---|
| 1847 | + * dreg needs precision after this insn |
---|
| 1848 | + * sreg needs precision before this insn |
---|
| 1849 | + */ |
---|
| 1850 | + *reg_mask &= ~dreg; |
---|
| 1851 | + *reg_mask |= sreg; |
---|
| 1852 | + } else { |
---|
| 1853 | + /* dreg = K |
---|
| 1854 | + * dreg needs precision after this insn. |
---|
| 1855 | + * Corresponding register is already marked |
---|
| 1856 | + * as precise=true in this verifier state. |
---|
| 1857 | + * No further markings in parent are necessary |
---|
| 1858 | + */ |
---|
| 1859 | + *reg_mask &= ~dreg; |
---|
| 1860 | + } |
---|
| 1861 | + } else { |
---|
| 1862 | + if (BPF_SRC(insn->code) == BPF_X) { |
---|
| 1863 | + /* dreg += sreg |
---|
| 1864 | + * both dreg and sreg need precision |
---|
| 1865 | + * before this insn |
---|
| 1866 | + */ |
---|
| 1867 | + *reg_mask |= sreg; |
---|
| 1868 | + } /* else dreg += K |
---|
| 1869 | + * dreg still needs precision before this insn |
---|
| 1870 | + */ |
---|
| 1871 | + } |
---|
| 1872 | + } else if (class == BPF_LDX) { |
---|
| 1873 | + if (!(*reg_mask & dreg)) |
---|
| 1874 | + return 0; |
---|
| 1875 | + *reg_mask &= ~dreg; |
---|
| 1876 | + |
---|
| 1877 | + /* scalars can only be spilled into stack w/o losing precision. |
---|
| 1878 | + * Load from any other memory can be zero extended. |
---|
| 1879 | + * The desire to keep that precision is already indicated |
---|
| 1880 | + * by 'precise' mark in corresponding register of this state. |
---|
| 1881 | + * No further tracking necessary. |
---|
| 1882 | + */ |
---|
| 1883 | + if (insn->src_reg != BPF_REG_FP) |
---|
| 1884 | + return 0; |
---|
| 1885 | + |
---|
| 1886 | + /* dreg = *(u64 *)[fp - off] was a fill from the stack. |
---|
| 1887 | + * that [fp - off] slot contains scalar that needs to be |
---|
| 1888 | + * tracked with precision |
---|
| 1889 | + */ |
---|
| 1890 | + spi = (-insn->off - 1) / BPF_REG_SIZE; |
---|
| 1891 | + if (spi >= 64) { |
---|
| 1892 | + verbose(env, "BUG spi %d\n", spi); |
---|
| 1893 | + WARN_ONCE(1, "verifier backtracking bug"); |
---|
| 1894 | + return -EFAULT; |
---|
| 1895 | + } |
---|
| 1896 | + *stack_mask |= 1ull << spi; |
---|
| 1897 | + } else if (class == BPF_STX || class == BPF_ST) { |
---|
| 1898 | + if (*reg_mask & dreg) |
---|
| 1899 | + /* stx & st shouldn't be using _scalar_ dst_reg |
---|
| 1900 | + * to access memory. It means backtracking |
---|
| 1901 | + * encountered a case of pointer subtraction. |
---|
| 1902 | + */ |
---|
| 1903 | + return -ENOTSUPP; |
---|
| 1904 | + /* scalars can only be spilled into stack */ |
---|
| 1905 | + if (insn->dst_reg != BPF_REG_FP) |
---|
| 1906 | + return 0; |
---|
| 1907 | + spi = (-insn->off - 1) / BPF_REG_SIZE; |
---|
| 1908 | + if (spi >= 64) { |
---|
| 1909 | + verbose(env, "BUG spi %d\n", spi); |
---|
| 1910 | + WARN_ONCE(1, "verifier backtracking bug"); |
---|
| 1911 | + return -EFAULT; |
---|
| 1912 | + } |
---|
| 1913 | + if (!(*stack_mask & (1ull << spi))) |
---|
| 1914 | + return 0; |
---|
| 1915 | + *stack_mask &= ~(1ull << spi); |
---|
| 1916 | + if (class == BPF_STX) |
---|
| 1917 | + *reg_mask |= sreg; |
---|
| 1918 | + } else if (class == BPF_JMP || class == BPF_JMP32) { |
---|
| 1919 | + if (opcode == BPF_CALL) { |
---|
| 1920 | + if (insn->src_reg == BPF_PSEUDO_CALL) |
---|
| 1921 | + return -ENOTSUPP; |
---|
| 1922 | + /* regular helper call sets R0 */ |
---|
| 1923 | + *reg_mask &= ~1; |
---|
| 1924 | + if (*reg_mask & 0x3f) { |
---|
| 1925 | + /* if backtracing was looking for registers R1-R5 |
---|
| 1926 | + * they should have been found already. |
---|
| 1927 | + */ |
---|
| 1928 | + verbose(env, "BUG regs %x\n", *reg_mask); |
---|
| 1929 | + WARN_ONCE(1, "verifier backtracking bug"); |
---|
| 1930 | + return -EFAULT; |
---|
| 1931 | + } |
---|
| 1932 | + } else if (opcode == BPF_EXIT) { |
---|
| 1933 | + return -ENOTSUPP; |
---|
| 1934 | + } else if (BPF_SRC(insn->code) == BPF_X) { |
---|
| 1935 | + if (!(*reg_mask & (dreg | sreg))) |
---|
| 1936 | + return 0; |
---|
| 1937 | + /* dreg <cond> sreg |
---|
| 1938 | + * Both dreg and sreg need precision before |
---|
| 1939 | + * this insn. If only sreg was marked precise |
---|
| 1940 | + * before it would be equally necessary to |
---|
| 1941 | + * propagate it to dreg. |
---|
| 1942 | + */ |
---|
| 1943 | + *reg_mask |= (sreg | dreg); |
---|
| 1944 | + /* else dreg <cond> K |
---|
| 1945 | + * Only dreg still needs precision before |
---|
| 1946 | + * this insn, so for the K-based conditional |
---|
| 1947 | + * there is nothing new to be marked. |
---|
| 1948 | + */ |
---|
| 1949 | + } |
---|
| 1950 | + } else if (class == BPF_LD) { |
---|
| 1951 | + if (!(*reg_mask & dreg)) |
---|
| 1952 | + return 0; |
---|
| 1953 | + *reg_mask &= ~dreg; |
---|
| 1954 | + /* It's ld_imm64 or ld_abs or ld_ind. |
---|
| 1955 | + * For ld_imm64 no further tracking of precision |
---|
| 1956 | + * into parent is necessary |
---|
| 1957 | + */ |
---|
| 1958 | + if (mode == BPF_IND || mode == BPF_ABS) |
---|
| 1959 | + /* to be analyzed */ |
---|
| 1960 | + return -ENOTSUPP; |
---|
| 1961 | + } |
---|
| 1962 | + return 0; |
---|
| 1963 | +} |
---|
| 1964 | + |
---|
| 1965 | +/* the scalar precision tracking algorithm: |
---|
| 1966 | + * . at the start all registers have precise=false. |
---|
| 1967 | + * . scalar ranges are tracked as normal through alu and jmp insns. |
---|
| 1968 | + * . once precise value of the scalar register is used in: |
---|
| 1969 | + * . ptr + scalar alu |
---|
| 1970 | + * . if (scalar cond K|scalar) |
---|
| 1971 | + * . helper_call(.., scalar, ...) where ARG_CONST is expected |
---|
| 1972 | + * backtrack through the verifier states and mark all registers and |
---|
| 1973 | + * stack slots with spilled constants that these scalar regisers |
---|
| 1974 | + * should be precise. |
---|
| 1975 | + * . during state pruning two registers (or spilled stack slots) |
---|
| 1976 | + * are equivalent if both are not precise. |
---|
| 1977 | + * |
---|
| 1978 | + * Note the verifier cannot simply walk register parentage chain, |
---|
| 1979 | + * since many different registers and stack slots could have been |
---|
| 1980 | + * used to compute single precise scalar. |
---|
| 1981 | + * |
---|
| 1982 | + * The approach of starting with precise=true for all registers and then |
---|
| 1983 | + * backtrack to mark a register as not precise when the verifier detects |
---|
| 1984 | + * that program doesn't care about specific value (e.g., when helper |
---|
| 1985 | + * takes register as ARG_ANYTHING parameter) is not safe. |
---|
| 1986 | + * |
---|
| 1987 | + * It's ok to walk single parentage chain of the verifier states. |
---|
| 1988 | + * It's possible that this backtracking will go all the way till 1st insn. |
---|
| 1989 | + * All other branches will be explored for needing precision later. |
---|
| 1990 | + * |
---|
| 1991 | + * The backtracking needs to deal with cases like: |
---|
| 1992 | + * R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0) |
---|
| 1993 | + * r9 -= r8 |
---|
| 1994 | + * r5 = r9 |
---|
| 1995 | + * if r5 > 0x79f goto pc+7 |
---|
| 1996 | + * R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff)) |
---|
| 1997 | + * r5 += 1 |
---|
| 1998 | + * ... |
---|
| 1999 | + * call bpf_perf_event_output#25 |
---|
| 2000 | + * where .arg5_type = ARG_CONST_SIZE_OR_ZERO |
---|
| 2001 | + * |
---|
| 2002 | + * and this case: |
---|
| 2003 | + * r6 = 1 |
---|
| 2004 | + * call foo // uses callee's r6 inside to compute r0 |
---|
| 2005 | + * r0 += r6 |
---|
| 2006 | + * if r0 == 0 goto |
---|
| 2007 | + * |
---|
| 2008 | + * to track above reg_mask/stack_mask needs to be independent for each frame. |
---|
| 2009 | + * |
---|
| 2010 | + * Also if parent's curframe > frame where backtracking started, |
---|
| 2011 | + * the verifier need to mark registers in both frames, otherwise callees |
---|
| 2012 | + * may incorrectly prune callers. This is similar to |
---|
| 2013 | + * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences") |
---|
| 2014 | + * |
---|
| 2015 | + * For now backtracking falls back into conservative marking. |
---|
| 2016 | + */ |
---|
| 2017 | +static void mark_all_scalars_precise(struct bpf_verifier_env *env, |
---|
| 2018 | + struct bpf_verifier_state *st) |
---|
| 2019 | +{ |
---|
| 2020 | + struct bpf_func_state *func; |
---|
| 2021 | + struct bpf_reg_state *reg; |
---|
| 2022 | + int i, j; |
---|
| 2023 | + |
---|
| 2024 | + /* big hammer: mark all scalars precise in this path. |
---|
| 2025 | + * pop_stack may still get !precise scalars. |
---|
| 2026 | + * We also skip current state and go straight to first parent state, |
---|
| 2027 | + * because precision markings in current non-checkpointed state are |
---|
| 2028 | + * not needed. See why in the comment in __mark_chain_precision below. |
---|
| 2029 | + */ |
---|
| 2030 | + for (st = st->parent; st; st = st->parent) { |
---|
| 2031 | + for (i = 0; i <= st->curframe; i++) { |
---|
| 2032 | + func = st->frame[i]; |
---|
| 2033 | + for (j = 0; j < BPF_REG_FP; j++) { |
---|
| 2034 | + reg = &func->regs[j]; |
---|
| 2035 | + if (reg->type != SCALAR_VALUE) |
---|
| 2036 | + continue; |
---|
| 2037 | + reg->precise = true; |
---|
| 2038 | + } |
---|
| 2039 | + for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) { |
---|
| 2040 | + if (!is_spilled_reg(&func->stack[j])) |
---|
| 2041 | + continue; |
---|
| 2042 | + reg = &func->stack[j].spilled_ptr; |
---|
| 2043 | + if (reg->type != SCALAR_VALUE) |
---|
| 2044 | + continue; |
---|
| 2045 | + reg->precise = true; |
---|
| 2046 | + } |
---|
| 2047 | + } |
---|
| 2048 | + } |
---|
| 2049 | +} |
---|
| 2050 | + |
---|
| 2051 | +static void mark_all_scalars_imprecise(struct bpf_verifier_env *env, struct bpf_verifier_state *st) |
---|
| 2052 | +{ |
---|
| 2053 | + struct bpf_func_state *func; |
---|
| 2054 | + struct bpf_reg_state *reg; |
---|
| 2055 | + int i, j; |
---|
| 2056 | + |
---|
| 2057 | + for (i = 0; i <= st->curframe; i++) { |
---|
| 2058 | + func = st->frame[i]; |
---|
| 2059 | + for (j = 0; j < BPF_REG_FP; j++) { |
---|
| 2060 | + reg = &func->regs[j]; |
---|
| 2061 | + if (reg->type != SCALAR_VALUE) |
---|
| 2062 | + continue; |
---|
| 2063 | + reg->precise = false; |
---|
| 2064 | + } |
---|
| 2065 | + for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) { |
---|
| 2066 | + if (!is_spilled_reg(&func->stack[j])) |
---|
| 2067 | + continue; |
---|
| 2068 | + reg = &func->stack[j].spilled_ptr; |
---|
| 2069 | + if (reg->type != SCALAR_VALUE) |
---|
| 2070 | + continue; |
---|
| 2071 | + reg->precise = false; |
---|
| 2072 | + } |
---|
| 2073 | + } |
---|
| 2074 | +} |
---|
| 2075 | + |
---|
| 2076 | +/* |
---|
| 2077 | + * __mark_chain_precision() backtracks BPF program instruction sequence and |
---|
| 2078 | + * chain of verifier states making sure that register *regno* (if regno >= 0) |
---|
| 2079 | + * and/or stack slot *spi* (if spi >= 0) are marked as precisely tracked |
---|
| 2080 | + * SCALARS, as well as any other registers and slots that contribute to |
---|
| 2081 | + * a tracked state of given registers/stack slots, depending on specific BPF |
---|
| 2082 | + * assembly instructions (see backtrack_insns() for exact instruction handling |
---|
| 2083 | + * logic). This backtracking relies on recorded jmp_history and is able to |
---|
| 2084 | + * traverse entire chain of parent states. This process ends only when all the |
---|
| 2085 | + * necessary registers/slots and their transitive dependencies are marked as |
---|
| 2086 | + * precise. |
---|
| 2087 | + * |
---|
| 2088 | + * One important and subtle aspect is that precise marks *do not matter* in |
---|
| 2089 | + * the currently verified state (current state). It is important to understand |
---|
| 2090 | + * why this is the case. |
---|
| 2091 | + * |
---|
| 2092 | + * First, note that current state is the state that is not yet "checkpointed", |
---|
| 2093 | + * i.e., it is not yet put into env->explored_states, and it has no children |
---|
| 2094 | + * states as well. It's ephemeral, and can end up either a) being discarded if |
---|
| 2095 | + * compatible explored state is found at some point or BPF_EXIT instruction is |
---|
| 2096 | + * reached or b) checkpointed and put into env->explored_states, branching out |
---|
| 2097 | + * into one or more children states. |
---|
| 2098 | + * |
---|
| 2099 | + * In the former case, precise markings in current state are completely |
---|
| 2100 | + * ignored by state comparison code (see regsafe() for details). Only |
---|
| 2101 | + * checkpointed ("old") state precise markings are important, and if old |
---|
| 2102 | + * state's register/slot is precise, regsafe() assumes current state's |
---|
| 2103 | + * register/slot as precise and checks value ranges exactly and precisely. If |
---|
| 2104 | + * states turn out to be compatible, current state's necessary precise |
---|
| 2105 | + * markings and any required parent states' precise markings are enforced |
---|
| 2106 | + * after the fact with propagate_precision() logic, after the fact. But it's |
---|
| 2107 | + * important to realize that in this case, even after marking current state |
---|
| 2108 | + * registers/slots as precise, we immediately discard current state. So what |
---|
| 2109 | + * actually matters is any of the precise markings propagated into current |
---|
| 2110 | + * state's parent states, which are always checkpointed (due to b) case above). |
---|
| 2111 | + * As such, for scenario a) it doesn't matter if current state has precise |
---|
| 2112 | + * markings set or not. |
---|
| 2113 | + * |
---|
| 2114 | + * Now, for the scenario b), checkpointing and forking into child(ren) |
---|
| 2115 | + * state(s). Note that before current state gets to checkpointing step, any |
---|
| 2116 | + * processed instruction always assumes precise SCALAR register/slot |
---|
| 2117 | + * knowledge: if precise value or range is useful to prune jump branch, BPF |
---|
| 2118 | + * verifier takes this opportunity enthusiastically. Similarly, when |
---|
| 2119 | + * register's value is used to calculate offset or memory address, exact |
---|
| 2120 | + * knowledge of SCALAR range is assumed, checked, and enforced. So, similar to |
---|
| 2121 | + * what we mentioned above about state comparison ignoring precise markings |
---|
| 2122 | + * during state comparison, BPF verifier ignores and also assumes precise |
---|
| 2123 | + * markings *at will* during instruction verification process. But as verifier |
---|
| 2124 | + * assumes precision, it also propagates any precision dependencies across |
---|
| 2125 | + * parent states, which are not yet finalized, so can be further restricted |
---|
| 2126 | + * based on new knowledge gained from restrictions enforced by their children |
---|
| 2127 | + * states. This is so that once those parent states are finalized, i.e., when |
---|
| 2128 | + * they have no more active children state, state comparison logic in |
---|
| 2129 | + * is_state_visited() would enforce strict and precise SCALAR ranges, if |
---|
| 2130 | + * required for correctness. |
---|
| 2131 | + * |
---|
| 2132 | + * To build a bit more intuition, note also that once a state is checkpointed, |
---|
| 2133 | + * the path we took to get to that state is not important. This is crucial |
---|
| 2134 | + * property for state pruning. When state is checkpointed and finalized at |
---|
| 2135 | + * some instruction index, it can be correctly and safely used to "short |
---|
| 2136 | + * circuit" any *compatible* state that reaches exactly the same instruction |
---|
| 2137 | + * index. I.e., if we jumped to that instruction from a completely different |
---|
| 2138 | + * code path than original finalized state was derived from, it doesn't |
---|
| 2139 | + * matter, current state can be discarded because from that instruction |
---|
| 2140 | + * forward having a compatible state will ensure we will safely reach the |
---|
| 2141 | + * exit. States describe preconditions for further exploration, but completely |
---|
| 2142 | + * forget the history of how we got here. |
---|
| 2143 | + * |
---|
| 2144 | + * This also means that even if we needed precise SCALAR range to get to |
---|
| 2145 | + * finalized state, but from that point forward *that same* SCALAR register is |
---|
| 2146 | + * never used in a precise context (i.e., it's precise value is not needed for |
---|
| 2147 | + * correctness), it's correct and safe to mark such register as "imprecise" |
---|
| 2148 | + * (i.e., precise marking set to false). This is what we rely on when we do |
---|
| 2149 | + * not set precise marking in current state. If no child state requires |
---|
| 2150 | + * precision for any given SCALAR register, it's safe to dictate that it can |
---|
| 2151 | + * be imprecise. If any child state does require this register to be precise, |
---|
| 2152 | + * we'll mark it precise later retroactively during precise markings |
---|
| 2153 | + * propagation from child state to parent states. |
---|
| 2154 | + * |
---|
| 2155 | + * Skipping precise marking setting in current state is a mild version of |
---|
| 2156 | + * relying on the above observation. But we can utilize this property even |
---|
| 2157 | + * more aggressively by proactively forgetting any precise marking in the |
---|
| 2158 | + * current state (which we inherited from the parent state), right before we |
---|
| 2159 | + * checkpoint it and branch off into new child state. This is done by |
---|
| 2160 | + * mark_all_scalars_imprecise() to hopefully get more permissive and generic |
---|
| 2161 | + * finalized states which help in short circuiting more future states. |
---|
| 2162 | + */ |
---|
| 2163 | +static int __mark_chain_precision(struct bpf_verifier_env *env, int frame, int regno, |
---|
| 2164 | + int spi) |
---|
| 2165 | +{ |
---|
| 2166 | + struct bpf_verifier_state *st = env->cur_state; |
---|
| 2167 | + int first_idx = st->first_insn_idx; |
---|
| 2168 | + int last_idx = env->insn_idx; |
---|
| 2169 | + struct bpf_func_state *func; |
---|
| 2170 | + struct bpf_reg_state *reg; |
---|
| 2171 | + u32 reg_mask = regno >= 0 ? 1u << regno : 0; |
---|
| 2172 | + u64 stack_mask = spi >= 0 ? 1ull << spi : 0; |
---|
| 2173 | + bool skip_first = true; |
---|
| 2174 | + bool new_marks = false; |
---|
| 2175 | + int i, err; |
---|
| 2176 | + |
---|
| 2177 | + if (!env->bpf_capable) |
---|
| 2178 | + return 0; |
---|
| 2179 | + |
---|
| 2180 | + /* Do sanity checks against current state of register and/or stack |
---|
| 2181 | + * slot, but don't set precise flag in current state, as precision |
---|
| 2182 | + * tracking in the current state is unnecessary. |
---|
| 2183 | + */ |
---|
| 2184 | + func = st->frame[frame]; |
---|
| 2185 | + if (regno >= 0) { |
---|
| 2186 | + reg = &func->regs[regno]; |
---|
| 2187 | + if (reg->type != SCALAR_VALUE) { |
---|
| 2188 | + WARN_ONCE(1, "backtracing misuse"); |
---|
| 2189 | + return -EFAULT; |
---|
| 2190 | + } |
---|
| 2191 | + new_marks = true; |
---|
| 2192 | + } |
---|
| 2193 | + |
---|
| 2194 | + while (spi >= 0) { |
---|
| 2195 | + if (!is_spilled_reg(&func->stack[spi])) { |
---|
| 2196 | + stack_mask = 0; |
---|
| 2197 | + break; |
---|
| 2198 | + } |
---|
| 2199 | + reg = &func->stack[spi].spilled_ptr; |
---|
| 2200 | + if (reg->type != SCALAR_VALUE) { |
---|
| 2201 | + stack_mask = 0; |
---|
| 2202 | + break; |
---|
| 2203 | + } |
---|
| 2204 | + new_marks = true; |
---|
| 2205 | + break; |
---|
| 2206 | + } |
---|
| 2207 | + |
---|
| 2208 | + if (!new_marks) |
---|
| 2209 | + return 0; |
---|
| 2210 | + if (!reg_mask && !stack_mask) |
---|
| 2211 | + return 0; |
---|
| 2212 | + |
---|
| 2213 | + for (;;) { |
---|
| 2214 | + DECLARE_BITMAP(mask, 64); |
---|
| 2215 | + u32 history = st->jmp_history_cnt; |
---|
| 2216 | + |
---|
| 2217 | + if (env->log.level & BPF_LOG_LEVEL) |
---|
| 2218 | + verbose(env, "last_idx %d first_idx %d\n", last_idx, first_idx); |
---|
| 2219 | + |
---|
| 2220 | + if (last_idx < 0) { |
---|
| 2221 | + /* we are at the entry into subprog, which |
---|
| 2222 | + * is expected for global funcs, but only if |
---|
| 2223 | + * requested precise registers are R1-R5 |
---|
| 2224 | + * (which are global func's input arguments) |
---|
| 2225 | + */ |
---|
| 2226 | + if (st->curframe == 0 && |
---|
| 2227 | + st->frame[0]->subprogno > 0 && |
---|
| 2228 | + st->frame[0]->callsite == BPF_MAIN_FUNC && |
---|
| 2229 | + stack_mask == 0 && (reg_mask & ~0x3e) == 0) { |
---|
| 2230 | + bitmap_from_u64(mask, reg_mask); |
---|
| 2231 | + for_each_set_bit(i, mask, 32) { |
---|
| 2232 | + reg = &st->frame[0]->regs[i]; |
---|
| 2233 | + if (reg->type != SCALAR_VALUE) { |
---|
| 2234 | + reg_mask &= ~(1u << i); |
---|
| 2235 | + continue; |
---|
| 2236 | + } |
---|
| 2237 | + reg->precise = true; |
---|
| 2238 | + } |
---|
| 2239 | + return 0; |
---|
| 2240 | + } |
---|
| 2241 | + |
---|
| 2242 | + verbose(env, "BUG backtracing func entry subprog %d reg_mask %x stack_mask %llx\n", |
---|
| 2243 | + st->frame[0]->subprogno, reg_mask, stack_mask); |
---|
| 2244 | + WARN_ONCE(1, "verifier backtracking bug"); |
---|
| 2245 | + return -EFAULT; |
---|
| 2246 | + } |
---|
| 2247 | + |
---|
| 2248 | + for (i = last_idx;;) { |
---|
| 2249 | + if (skip_first) { |
---|
| 2250 | + err = 0; |
---|
| 2251 | + skip_first = false; |
---|
| 2252 | + } else { |
---|
| 2253 | + err = backtrack_insn(env, i, ®_mask, &stack_mask); |
---|
| 2254 | + } |
---|
| 2255 | + if (err == -ENOTSUPP) { |
---|
| 2256 | + mark_all_scalars_precise(env, st); |
---|
| 2257 | + return 0; |
---|
| 2258 | + } else if (err) { |
---|
| 2259 | + return err; |
---|
| 2260 | + } |
---|
| 2261 | + if (!reg_mask && !stack_mask) |
---|
| 2262 | + /* Found assignment(s) into tracked register in this state. |
---|
| 2263 | + * Since this state is already marked, just return. |
---|
| 2264 | + * Nothing to be tracked further in the parent state. |
---|
| 2265 | + */ |
---|
| 2266 | + return 0; |
---|
| 2267 | + if (i == first_idx) |
---|
| 2268 | + break; |
---|
| 2269 | + i = get_prev_insn_idx(st, i, &history); |
---|
| 2270 | + if (i >= env->prog->len) { |
---|
| 2271 | + /* This can happen if backtracking reached insn 0 |
---|
| 2272 | + * and there are still reg_mask or stack_mask |
---|
| 2273 | + * to backtrack. |
---|
| 2274 | + * It means the backtracking missed the spot where |
---|
| 2275 | + * particular register was initialized with a constant. |
---|
| 2276 | + */ |
---|
| 2277 | + verbose(env, "BUG backtracking idx %d\n", i); |
---|
| 2278 | + WARN_ONCE(1, "verifier backtracking bug"); |
---|
| 2279 | + return -EFAULT; |
---|
| 2280 | + } |
---|
| 2281 | + } |
---|
| 2282 | + st = st->parent; |
---|
| 2283 | + if (!st) |
---|
| 2284 | + break; |
---|
| 2285 | + |
---|
| 2286 | + new_marks = false; |
---|
| 2287 | + func = st->frame[frame]; |
---|
| 2288 | + bitmap_from_u64(mask, reg_mask); |
---|
| 2289 | + for_each_set_bit(i, mask, 32) { |
---|
| 2290 | + reg = &func->regs[i]; |
---|
| 2291 | + if (reg->type != SCALAR_VALUE) { |
---|
| 2292 | + reg_mask &= ~(1u << i); |
---|
| 2293 | + continue; |
---|
| 2294 | + } |
---|
| 2295 | + if (!reg->precise) |
---|
| 2296 | + new_marks = true; |
---|
| 2297 | + reg->precise = true; |
---|
| 2298 | + } |
---|
| 2299 | + |
---|
| 2300 | + bitmap_from_u64(mask, stack_mask); |
---|
| 2301 | + for_each_set_bit(i, mask, 64) { |
---|
| 2302 | + if (i >= func->allocated_stack / BPF_REG_SIZE) { |
---|
| 2303 | + /* the sequence of instructions: |
---|
| 2304 | + * 2: (bf) r3 = r10 |
---|
| 2305 | + * 3: (7b) *(u64 *)(r3 -8) = r0 |
---|
| 2306 | + * 4: (79) r4 = *(u64 *)(r10 -8) |
---|
| 2307 | + * doesn't contain jmps. It's backtracked |
---|
| 2308 | + * as a single block. |
---|
| 2309 | + * During backtracking insn 3 is not recognized as |
---|
| 2310 | + * stack access, so at the end of backtracking |
---|
| 2311 | + * stack slot fp-8 is still marked in stack_mask. |
---|
| 2312 | + * However the parent state may not have accessed |
---|
| 2313 | + * fp-8 and it's "unallocated" stack space. |
---|
| 2314 | + * In such case fallback to conservative. |
---|
| 2315 | + */ |
---|
| 2316 | + mark_all_scalars_precise(env, st); |
---|
| 2317 | + return 0; |
---|
| 2318 | + } |
---|
| 2319 | + |
---|
| 2320 | + if (!is_spilled_reg(&func->stack[i])) { |
---|
| 2321 | + stack_mask &= ~(1ull << i); |
---|
| 2322 | + continue; |
---|
| 2323 | + } |
---|
| 2324 | + reg = &func->stack[i].spilled_ptr; |
---|
| 2325 | + if (reg->type != SCALAR_VALUE) { |
---|
| 2326 | + stack_mask &= ~(1ull << i); |
---|
| 2327 | + continue; |
---|
| 2328 | + } |
---|
| 2329 | + if (!reg->precise) |
---|
| 2330 | + new_marks = true; |
---|
| 2331 | + reg->precise = true; |
---|
| 2332 | + } |
---|
| 2333 | + if (env->log.level & BPF_LOG_LEVEL) { |
---|
| 2334 | + print_verifier_state(env, func); |
---|
| 2335 | + verbose(env, "parent %s regs=%x stack=%llx marks\n", |
---|
| 2336 | + new_marks ? "didn't have" : "already had", |
---|
| 2337 | + reg_mask, stack_mask); |
---|
| 2338 | + } |
---|
| 2339 | + |
---|
| 2340 | + if (!reg_mask && !stack_mask) |
---|
| 2341 | + break; |
---|
| 2342 | + if (!new_marks) |
---|
| 2343 | + break; |
---|
| 2344 | + |
---|
| 2345 | + last_idx = st->last_insn_idx; |
---|
| 2346 | + first_idx = st->first_insn_idx; |
---|
| 2347 | + } |
---|
| 2348 | + return 0; |
---|
| 2349 | +} |
---|
| 2350 | + |
---|
| 2351 | +static int mark_chain_precision(struct bpf_verifier_env *env, int regno) |
---|
| 2352 | +{ |
---|
| 2353 | + return __mark_chain_precision(env, env->cur_state->curframe, regno, -1); |
---|
| 2354 | +} |
---|
| 2355 | + |
---|
| 2356 | +static int mark_chain_precision_frame(struct bpf_verifier_env *env, int frame, int regno) |
---|
| 2357 | +{ |
---|
| 2358 | + return __mark_chain_precision(env, frame, regno, -1); |
---|
| 2359 | +} |
---|
| 2360 | + |
---|
| 2361 | +static int mark_chain_precision_stack_frame(struct bpf_verifier_env *env, int frame, int spi) |
---|
| 2362 | +{ |
---|
| 2363 | + return __mark_chain_precision(env, frame, -1, spi); |
---|
941 | 2364 | } |
---|
942 | 2365 | |
---|
943 | 2366 | static bool is_spillable_regtype(enum bpf_reg_type type) |
---|
.. | .. |
---|
950 | 2373 | case PTR_TO_PACKET: |
---|
951 | 2374 | case PTR_TO_PACKET_META: |
---|
952 | 2375 | case PTR_TO_PACKET_END: |
---|
| 2376 | + case PTR_TO_FLOW_KEYS: |
---|
953 | 2377 | case CONST_PTR_TO_MAP: |
---|
| 2378 | + case PTR_TO_SOCKET: |
---|
| 2379 | + case PTR_TO_SOCKET_OR_NULL: |
---|
| 2380 | + case PTR_TO_SOCK_COMMON: |
---|
| 2381 | + case PTR_TO_SOCK_COMMON_OR_NULL: |
---|
| 2382 | + case PTR_TO_TCP_SOCK: |
---|
| 2383 | + case PTR_TO_TCP_SOCK_OR_NULL: |
---|
| 2384 | + case PTR_TO_XDP_SOCK: |
---|
| 2385 | + case PTR_TO_BTF_ID: |
---|
| 2386 | + case PTR_TO_BTF_ID_OR_NULL: |
---|
| 2387 | + case PTR_TO_RDONLY_BUF: |
---|
| 2388 | + case PTR_TO_RDONLY_BUF_OR_NULL: |
---|
| 2389 | + case PTR_TO_RDWR_BUF: |
---|
| 2390 | + case PTR_TO_RDWR_BUF_OR_NULL: |
---|
| 2391 | + case PTR_TO_PERCPU_BTF_ID: |
---|
| 2392 | + case PTR_TO_MEM: |
---|
| 2393 | + case PTR_TO_MEM_OR_NULL: |
---|
954 | 2394 | return true; |
---|
955 | 2395 | default: |
---|
956 | 2396 | return false; |
---|
.. | .. |
---|
968 | 2408 | return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off); |
---|
969 | 2409 | } |
---|
970 | 2410 | |
---|
| 2411 | +static bool __is_scalar_unbounded(struct bpf_reg_state *reg) |
---|
| 2412 | +{ |
---|
| 2413 | + return tnum_is_unknown(reg->var_off) && |
---|
| 2414 | + reg->smin_value == S64_MIN && reg->smax_value == S64_MAX && |
---|
| 2415 | + reg->umin_value == 0 && reg->umax_value == U64_MAX && |
---|
| 2416 | + reg->s32_min_value == S32_MIN && reg->s32_max_value == S32_MAX && |
---|
| 2417 | + reg->u32_min_value == 0 && reg->u32_max_value == U32_MAX; |
---|
| 2418 | +} |
---|
| 2419 | + |
---|
| 2420 | +static bool register_is_bounded(struct bpf_reg_state *reg) |
---|
| 2421 | +{ |
---|
| 2422 | + return reg->type == SCALAR_VALUE && !__is_scalar_unbounded(reg); |
---|
| 2423 | +} |
---|
| 2424 | + |
---|
| 2425 | +static bool __is_pointer_value(bool allow_ptr_leaks, |
---|
| 2426 | + const struct bpf_reg_state *reg) |
---|
| 2427 | +{ |
---|
| 2428 | + if (allow_ptr_leaks) |
---|
| 2429 | + return false; |
---|
| 2430 | + |
---|
| 2431 | + return reg->type != SCALAR_VALUE; |
---|
| 2432 | +} |
---|
| 2433 | + |
---|
| 2434 | +/* Copy src state preserving dst->parent and dst->live fields */ |
---|
| 2435 | +static void copy_register_state(struct bpf_reg_state *dst, const struct bpf_reg_state *src) |
---|
| 2436 | +{ |
---|
| 2437 | + struct bpf_reg_state *parent = dst->parent; |
---|
| 2438 | + enum bpf_reg_liveness live = dst->live; |
---|
| 2439 | + |
---|
| 2440 | + *dst = *src; |
---|
| 2441 | + dst->parent = parent; |
---|
| 2442 | + dst->live = live; |
---|
| 2443 | +} |
---|
| 2444 | + |
---|
971 | 2445 | static void save_register_state(struct bpf_func_state *state, |
---|
972 | | - int spi, struct bpf_reg_state *reg) |
---|
| 2446 | + int spi, struct bpf_reg_state *reg, |
---|
| 2447 | + int size) |
---|
973 | 2448 | { |
---|
974 | 2449 | int i; |
---|
975 | 2450 | |
---|
976 | | - state->stack[spi].spilled_ptr = *reg; |
---|
977 | | - state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN; |
---|
| 2451 | + copy_register_state(&state->stack[spi].spilled_ptr, reg); |
---|
| 2452 | + if (size == BPF_REG_SIZE) |
---|
| 2453 | + state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN; |
---|
978 | 2454 | |
---|
979 | | - for (i = 0; i < BPF_REG_SIZE; i++) |
---|
980 | | - state->stack[spi].slot_type[i] = STACK_SPILL; |
---|
| 2455 | + for (i = BPF_REG_SIZE; i > BPF_REG_SIZE - size; i--) |
---|
| 2456 | + state->stack[spi].slot_type[i - 1] = STACK_SPILL; |
---|
| 2457 | + |
---|
| 2458 | + /* size < 8 bytes spill */ |
---|
| 2459 | + for (; i; i--) |
---|
| 2460 | + scrub_spilled_slot(&state->stack[spi].slot_type[i - 1]); |
---|
981 | 2461 | } |
---|
982 | 2462 | |
---|
983 | | -/* check_stack_read/write functions track spill/fill of registers, |
---|
| 2463 | +static bool is_bpf_st_mem(struct bpf_insn *insn) |
---|
| 2464 | +{ |
---|
| 2465 | + return BPF_CLASS(insn->code) == BPF_ST && BPF_MODE(insn->code) == BPF_MEM; |
---|
| 2466 | +} |
---|
| 2467 | + |
---|
| 2468 | +/* check_stack_{read,write}_fixed_off functions track spill/fill of registers, |
---|
984 | 2469 | * stack boundary and alignment are checked in check_mem_access() |
---|
985 | 2470 | */ |
---|
986 | | -static int check_stack_write(struct bpf_verifier_env *env, |
---|
987 | | - struct bpf_func_state *state, /* func where register points to */ |
---|
988 | | - int off, int size, int value_regno, int insn_idx) |
---|
| 2471 | +static int check_stack_write_fixed_off(struct bpf_verifier_env *env, |
---|
| 2472 | + /* stack frame we're writing to */ |
---|
| 2473 | + struct bpf_func_state *state, |
---|
| 2474 | + int off, int size, int value_regno, |
---|
| 2475 | + int insn_idx) |
---|
989 | 2476 | { |
---|
990 | 2477 | struct bpf_func_state *cur; /* state of the current function */ |
---|
991 | 2478 | int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err; |
---|
| 2479 | + struct bpf_insn *insn = &env->prog->insnsi[insn_idx]; |
---|
992 | 2480 | struct bpf_reg_state *reg = NULL; |
---|
| 2481 | + u32 dst_reg = insn->dst_reg; |
---|
993 | 2482 | |
---|
994 | 2483 | err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE), |
---|
995 | | - true); |
---|
| 2484 | + state->acquired_refs, true); |
---|
996 | 2485 | if (err) |
---|
997 | 2486 | return err; |
---|
998 | 2487 | /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0, |
---|
.. | .. |
---|
1008 | 2497 | cur = env->cur_state->frame[env->cur_state->curframe]; |
---|
1009 | 2498 | if (value_regno >= 0) |
---|
1010 | 2499 | reg = &cur->regs[value_regno]; |
---|
1011 | | - if (!env->allow_ptr_leaks) { |
---|
| 2500 | + if (!env->bypass_spec_v4) { |
---|
1012 | 2501 | bool sanitize = reg && is_spillable_regtype(reg->type); |
---|
1013 | 2502 | |
---|
1014 | 2503 | for (i = 0; i < size; i++) { |
---|
1015 | | - if (state->stack[spi].slot_type[i] == STACK_INVALID) { |
---|
| 2504 | + u8 type = state->stack[spi].slot_type[i]; |
---|
| 2505 | + |
---|
| 2506 | + if (type != STACK_MISC && type != STACK_ZERO) { |
---|
1016 | 2507 | sanitize = true; |
---|
1017 | 2508 | break; |
---|
1018 | 2509 | } |
---|
.. | .. |
---|
1022 | 2513 | env->insn_aux_data[insn_idx].sanitize_stack_spill = true; |
---|
1023 | 2514 | } |
---|
1024 | 2515 | |
---|
1025 | | - if (reg && size == BPF_REG_SIZE && register_is_const(reg) && |
---|
1026 | | - !register_is_null(reg) && env->allow_ptr_leaks) { |
---|
1027 | | - save_register_state(state, spi, reg); |
---|
| 2516 | + if (reg && !(off % BPF_REG_SIZE) && register_is_bounded(reg) && |
---|
| 2517 | + !register_is_null(reg) && env->bpf_capable) { |
---|
| 2518 | + if (dst_reg != BPF_REG_FP) { |
---|
| 2519 | + /* The backtracking logic can only recognize explicit |
---|
| 2520 | + * stack slot address like [fp - 8]. Other spill of |
---|
| 2521 | + * scalar via different register has to be conervative. |
---|
| 2522 | + * Backtrack from here and mark all registers as precise |
---|
| 2523 | + * that contributed into 'reg' being a constant. |
---|
| 2524 | + */ |
---|
| 2525 | + err = mark_chain_precision(env, value_regno); |
---|
| 2526 | + if (err) |
---|
| 2527 | + return err; |
---|
| 2528 | + } |
---|
| 2529 | + save_register_state(state, spi, reg, size); |
---|
| 2530 | + /* Break the relation on a narrowing spill. */ |
---|
| 2531 | + if (fls64(reg->umax_value) > BITS_PER_BYTE * size) |
---|
| 2532 | + state->stack[spi].spilled_ptr.id = 0; |
---|
| 2533 | + } else if (!reg && !(off % BPF_REG_SIZE) && is_bpf_st_mem(insn) && |
---|
| 2534 | + insn->imm != 0 && env->bpf_capable) { |
---|
| 2535 | + struct bpf_reg_state fake_reg = {}; |
---|
| 2536 | + |
---|
| 2537 | + __mark_reg_known(&fake_reg, (u32)insn->imm); |
---|
| 2538 | + fake_reg.type = SCALAR_VALUE; |
---|
| 2539 | + save_register_state(state, spi, &fake_reg, size); |
---|
1028 | 2540 | } else if (reg && is_spillable_regtype(reg->type)) { |
---|
1029 | 2541 | /* register containing pointer is being spilled into stack */ |
---|
1030 | 2542 | if (size != BPF_REG_SIZE) { |
---|
| 2543 | + verbose_linfo(env, insn_idx, "; "); |
---|
1031 | 2544 | verbose(env, "invalid size of register spill\n"); |
---|
1032 | 2545 | return -EACCES; |
---|
1033 | 2546 | } |
---|
.. | .. |
---|
1035 | 2548 | verbose(env, "cannot spill pointers to stack into stack frame of the caller\n"); |
---|
1036 | 2549 | return -EINVAL; |
---|
1037 | 2550 | } |
---|
1038 | | - save_register_state(state, spi, reg); |
---|
| 2551 | + save_register_state(state, spi, reg, size); |
---|
1039 | 2552 | } else { |
---|
1040 | 2553 | u8 type = STACK_MISC; |
---|
1041 | 2554 | |
---|
1042 | 2555 | /* regular write of data into stack destroys any spilled ptr */ |
---|
1043 | 2556 | state->stack[spi].spilled_ptr.type = NOT_INIT; |
---|
1044 | 2557 | /* Mark slots as STACK_MISC if they belonged to spilled ptr. */ |
---|
1045 | | - if (state->stack[spi].slot_type[0] == STACK_SPILL) |
---|
| 2558 | + if (is_spilled_reg(&state->stack[spi])) |
---|
1046 | 2559 | for (i = 0; i < BPF_REG_SIZE; i++) |
---|
1047 | | - state->stack[spi].slot_type[i] = STACK_MISC; |
---|
| 2560 | + scrub_spilled_slot(&state->stack[spi].slot_type[i]); |
---|
1048 | 2561 | |
---|
1049 | 2562 | /* only mark the slot as written if all 8 bytes were written |
---|
1050 | 2563 | * otherwise read propagation may incorrectly stop too soon |
---|
.. | .. |
---|
1058 | 2571 | state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN; |
---|
1059 | 2572 | |
---|
1060 | 2573 | /* when we zero initialize stack slots mark them as such */ |
---|
1061 | | - if (reg && register_is_null(reg)) |
---|
| 2574 | + if ((reg && register_is_null(reg)) || |
---|
| 2575 | + (!reg && is_bpf_st_mem(insn) && insn->imm == 0)) { |
---|
| 2576 | + /* backtracking doesn't work for STACK_ZERO yet. */ |
---|
| 2577 | + err = mark_chain_precision(env, value_regno); |
---|
| 2578 | + if (err) |
---|
| 2579 | + return err; |
---|
1062 | 2580 | type = STACK_ZERO; |
---|
| 2581 | + } |
---|
1063 | 2582 | |
---|
1064 | 2583 | /* Mark slots affected by this stack write. */ |
---|
1065 | 2584 | for (i = 0; i < size; i++) |
---|
.. | .. |
---|
1069 | 2588 | return 0; |
---|
1070 | 2589 | } |
---|
1071 | 2590 | |
---|
1072 | | -static int check_stack_read(struct bpf_verifier_env *env, |
---|
1073 | | - struct bpf_func_state *reg_state /* func where register points to */, |
---|
1074 | | - int off, int size, int value_regno) |
---|
| 2591 | +/* Write the stack: 'stack[ptr_regno + off] = value_regno'. 'ptr_regno' is |
---|
| 2592 | + * known to contain a variable offset. |
---|
| 2593 | + * This function checks whether the write is permitted and conservatively |
---|
| 2594 | + * tracks the effects of the write, considering that each stack slot in the |
---|
| 2595 | + * dynamic range is potentially written to. |
---|
| 2596 | + * |
---|
| 2597 | + * 'off' includes 'regno->off'. |
---|
| 2598 | + * 'value_regno' can be -1, meaning that an unknown value is being written to |
---|
| 2599 | + * the stack. |
---|
| 2600 | + * |
---|
| 2601 | + * Spilled pointers in range are not marked as written because we don't know |
---|
| 2602 | + * what's going to be actually written. This means that read propagation for |
---|
| 2603 | + * future reads cannot be terminated by this write. |
---|
| 2604 | + * |
---|
| 2605 | + * For privileged programs, uninitialized stack slots are considered |
---|
| 2606 | + * initialized by this write (even though we don't know exactly what offsets |
---|
| 2607 | + * are going to be written to). The idea is that we don't want the verifier to |
---|
| 2608 | + * reject future reads that access slots written to through variable offsets. |
---|
| 2609 | + */ |
---|
| 2610 | +static int check_stack_write_var_off(struct bpf_verifier_env *env, |
---|
| 2611 | + /* func where register points to */ |
---|
| 2612 | + struct bpf_func_state *state, |
---|
| 2613 | + int ptr_regno, int off, int size, |
---|
| 2614 | + int value_regno, int insn_idx) |
---|
| 2615 | +{ |
---|
| 2616 | + struct bpf_func_state *cur; /* state of the current function */ |
---|
| 2617 | + int min_off, max_off; |
---|
| 2618 | + int i, err; |
---|
| 2619 | + struct bpf_reg_state *ptr_reg = NULL, *value_reg = NULL; |
---|
| 2620 | + bool writing_zero = false; |
---|
| 2621 | + /* set if the fact that we're writing a zero is used to let any |
---|
| 2622 | + * stack slots remain STACK_ZERO |
---|
| 2623 | + */ |
---|
| 2624 | + bool zero_used = false; |
---|
| 2625 | + |
---|
| 2626 | + cur = env->cur_state->frame[env->cur_state->curframe]; |
---|
| 2627 | + ptr_reg = &cur->regs[ptr_regno]; |
---|
| 2628 | + min_off = ptr_reg->smin_value + off; |
---|
| 2629 | + max_off = ptr_reg->smax_value + off + size; |
---|
| 2630 | + if (value_regno >= 0) |
---|
| 2631 | + value_reg = &cur->regs[value_regno]; |
---|
| 2632 | + if (value_reg && register_is_null(value_reg)) |
---|
| 2633 | + writing_zero = true; |
---|
| 2634 | + |
---|
| 2635 | + err = realloc_func_state(state, round_up(-min_off, BPF_REG_SIZE), |
---|
| 2636 | + state->acquired_refs, true); |
---|
| 2637 | + if (err) |
---|
| 2638 | + return err; |
---|
| 2639 | + |
---|
| 2640 | + |
---|
| 2641 | + /* Variable offset writes destroy any spilled pointers in range. */ |
---|
| 2642 | + for (i = min_off; i < max_off; i++) { |
---|
| 2643 | + u8 new_type, *stype; |
---|
| 2644 | + int slot, spi; |
---|
| 2645 | + |
---|
| 2646 | + slot = -i - 1; |
---|
| 2647 | + spi = slot / BPF_REG_SIZE; |
---|
| 2648 | + stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE]; |
---|
| 2649 | + |
---|
| 2650 | + if (!env->allow_ptr_leaks && *stype != STACK_MISC && *stype != STACK_ZERO) { |
---|
| 2651 | + /* Reject the write if range we may write to has not |
---|
| 2652 | + * been initialized beforehand. If we didn't reject |
---|
| 2653 | + * here, the ptr status would be erased below (even |
---|
| 2654 | + * though not all slots are actually overwritten), |
---|
| 2655 | + * possibly opening the door to leaks. |
---|
| 2656 | + * |
---|
| 2657 | + * We do however catch STACK_INVALID case below, and |
---|
| 2658 | + * only allow reading possibly uninitialized memory |
---|
| 2659 | + * later for CAP_PERFMON, as the write may not happen to |
---|
| 2660 | + * that slot. |
---|
| 2661 | + */ |
---|
| 2662 | + verbose(env, "spilled ptr in range of var-offset stack write; insn %d, ptr off: %d", |
---|
| 2663 | + insn_idx, i); |
---|
| 2664 | + return -EINVAL; |
---|
| 2665 | + } |
---|
| 2666 | + |
---|
| 2667 | + /* Erase all spilled pointers. */ |
---|
| 2668 | + state->stack[spi].spilled_ptr.type = NOT_INIT; |
---|
| 2669 | + |
---|
| 2670 | + /* Update the slot type. */ |
---|
| 2671 | + new_type = STACK_MISC; |
---|
| 2672 | + if (writing_zero && *stype == STACK_ZERO) { |
---|
| 2673 | + new_type = STACK_ZERO; |
---|
| 2674 | + zero_used = true; |
---|
| 2675 | + } |
---|
| 2676 | + /* If the slot is STACK_INVALID, we check whether it's OK to |
---|
| 2677 | + * pretend that it will be initialized by this write. The slot |
---|
| 2678 | + * might not actually be written to, and so if we mark it as |
---|
| 2679 | + * initialized future reads might leak uninitialized memory. |
---|
| 2680 | + * For privileged programs, we will accept such reads to slots |
---|
| 2681 | + * that may or may not be written because, if we're reject |
---|
| 2682 | + * them, the error would be too confusing. |
---|
| 2683 | + */ |
---|
| 2684 | + if (*stype == STACK_INVALID && !env->allow_uninit_stack) { |
---|
| 2685 | + verbose(env, "uninit stack in range of var-offset write prohibited for !root; insn %d, off: %d", |
---|
| 2686 | + insn_idx, i); |
---|
| 2687 | + return -EINVAL; |
---|
| 2688 | + } |
---|
| 2689 | + *stype = new_type; |
---|
| 2690 | + } |
---|
| 2691 | + if (zero_used) { |
---|
| 2692 | + /* backtracking doesn't work for STACK_ZERO yet. */ |
---|
| 2693 | + err = mark_chain_precision(env, value_regno); |
---|
| 2694 | + if (err) |
---|
| 2695 | + return err; |
---|
| 2696 | + } |
---|
| 2697 | + return 0; |
---|
| 2698 | +} |
---|
| 2699 | + |
---|
| 2700 | +/* When register 'dst_regno' is assigned some values from stack[min_off, |
---|
| 2701 | + * max_off), we set the register's type according to the types of the |
---|
| 2702 | + * respective stack slots. If all the stack values are known to be zeros, then |
---|
| 2703 | + * so is the destination reg. Otherwise, the register is considered to be |
---|
| 2704 | + * SCALAR. This function does not deal with register filling; the caller must |
---|
| 2705 | + * ensure that all spilled registers in the stack range have been marked as |
---|
| 2706 | + * read. |
---|
| 2707 | + */ |
---|
| 2708 | +static void mark_reg_stack_read(struct bpf_verifier_env *env, |
---|
| 2709 | + /* func where src register points to */ |
---|
| 2710 | + struct bpf_func_state *ptr_state, |
---|
| 2711 | + int min_off, int max_off, int dst_regno) |
---|
| 2712 | +{ |
---|
| 2713 | + struct bpf_verifier_state *vstate = env->cur_state; |
---|
| 2714 | + struct bpf_func_state *state = vstate->frame[vstate->curframe]; |
---|
| 2715 | + int i, slot, spi; |
---|
| 2716 | + u8 *stype; |
---|
| 2717 | + int zeros = 0; |
---|
| 2718 | + |
---|
| 2719 | + for (i = min_off; i < max_off; i++) { |
---|
| 2720 | + slot = -i - 1; |
---|
| 2721 | + spi = slot / BPF_REG_SIZE; |
---|
| 2722 | + stype = ptr_state->stack[spi].slot_type; |
---|
| 2723 | + if (stype[slot % BPF_REG_SIZE] != STACK_ZERO) |
---|
| 2724 | + break; |
---|
| 2725 | + zeros++; |
---|
| 2726 | + } |
---|
| 2727 | + if (zeros == max_off - min_off) { |
---|
| 2728 | + /* any access_size read into register is zero extended, |
---|
| 2729 | + * so the whole register == const_zero |
---|
| 2730 | + */ |
---|
| 2731 | + __mark_reg_const_zero(&state->regs[dst_regno]); |
---|
| 2732 | + /* backtracking doesn't support STACK_ZERO yet, |
---|
| 2733 | + * so mark it precise here, so that later |
---|
| 2734 | + * backtracking can stop here. |
---|
| 2735 | + * Backtracking may not need this if this register |
---|
| 2736 | + * doesn't participate in pointer adjustment. |
---|
| 2737 | + * Forward propagation of precise flag is not |
---|
| 2738 | + * necessary either. This mark is only to stop |
---|
| 2739 | + * backtracking. Any register that contributed |
---|
| 2740 | + * to const 0 was marked precise before spill. |
---|
| 2741 | + */ |
---|
| 2742 | + state->regs[dst_regno].precise = true; |
---|
| 2743 | + } else { |
---|
| 2744 | + /* have read misc data from the stack */ |
---|
| 2745 | + mark_reg_unknown(env, state->regs, dst_regno); |
---|
| 2746 | + } |
---|
| 2747 | + state->regs[dst_regno].live |= REG_LIVE_WRITTEN; |
---|
| 2748 | +} |
---|
| 2749 | + |
---|
| 2750 | +/* Read the stack at 'off' and put the results into the register indicated by |
---|
| 2751 | + * 'dst_regno'. It handles reg filling if the addressed stack slot is a |
---|
| 2752 | + * spilled reg. |
---|
| 2753 | + * |
---|
| 2754 | + * 'dst_regno' can be -1, meaning that the read value is not going to a |
---|
| 2755 | + * register. |
---|
| 2756 | + * |
---|
| 2757 | + * The access is assumed to be within the current stack bounds. |
---|
| 2758 | + */ |
---|
| 2759 | +static int check_stack_read_fixed_off(struct bpf_verifier_env *env, |
---|
| 2760 | + /* func where src register points to */ |
---|
| 2761 | + struct bpf_func_state *reg_state, |
---|
| 2762 | + int off, int size, int dst_regno) |
---|
1075 | 2763 | { |
---|
1076 | 2764 | struct bpf_verifier_state *vstate = env->cur_state; |
---|
1077 | 2765 | struct bpf_func_state *state = vstate->frame[vstate->curframe]; |
---|
1078 | 2766 | int i, slot = -off - 1, spi = slot / BPF_REG_SIZE; |
---|
1079 | 2767 | struct bpf_reg_state *reg; |
---|
1080 | | - u8 *stype; |
---|
| 2768 | + u8 *stype, type; |
---|
1081 | 2769 | |
---|
1082 | | - if (reg_state->allocated_stack <= slot) { |
---|
1083 | | - verbose(env, "invalid read from stack off %d+0 size %d\n", |
---|
1084 | | - off, size); |
---|
1085 | | - return -EACCES; |
---|
1086 | | - } |
---|
1087 | 2770 | stype = reg_state->stack[spi].slot_type; |
---|
1088 | 2771 | reg = ®_state->stack[spi].spilled_ptr; |
---|
1089 | 2772 | |
---|
1090 | | - if (stype[0] == STACK_SPILL) { |
---|
1091 | | - if (size != BPF_REG_SIZE) { |
---|
| 2773 | + if (is_spilled_reg(®_state->stack[spi])) { |
---|
| 2774 | + u8 spill_size = 1; |
---|
| 2775 | + |
---|
| 2776 | + for (i = BPF_REG_SIZE - 1; i > 0 && stype[i - 1] == STACK_SPILL; i--) |
---|
| 2777 | + spill_size++; |
---|
| 2778 | + |
---|
| 2779 | + if (size != BPF_REG_SIZE || spill_size != BPF_REG_SIZE) { |
---|
1092 | 2780 | if (reg->type != SCALAR_VALUE) { |
---|
| 2781 | + verbose_linfo(env, env->insn_idx, "; "); |
---|
1093 | 2782 | verbose(env, "invalid size of register fill\n"); |
---|
1094 | 2783 | return -EACCES; |
---|
1095 | 2784 | } |
---|
1096 | | - if (value_regno >= 0) { |
---|
1097 | | - mark_reg_unknown(env, state->regs, value_regno); |
---|
1098 | | - state->regs[value_regno].live |= REG_LIVE_WRITTEN; |
---|
| 2785 | + |
---|
| 2786 | + mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64); |
---|
| 2787 | + if (dst_regno < 0) |
---|
| 2788 | + return 0; |
---|
| 2789 | + |
---|
| 2790 | + if (!(off % BPF_REG_SIZE) && size == spill_size) { |
---|
| 2791 | + /* The earlier check_reg_arg() has decided the |
---|
| 2792 | + * subreg_def for this insn. Save it first. |
---|
| 2793 | + */ |
---|
| 2794 | + s32 subreg_def = state->regs[dst_regno].subreg_def; |
---|
| 2795 | + |
---|
| 2796 | + copy_register_state(&state->regs[dst_regno], reg); |
---|
| 2797 | + state->regs[dst_regno].subreg_def = subreg_def; |
---|
| 2798 | + } else { |
---|
| 2799 | + for (i = 0; i < size; i++) { |
---|
| 2800 | + type = stype[(slot - i) % BPF_REG_SIZE]; |
---|
| 2801 | + if (type == STACK_SPILL) |
---|
| 2802 | + continue; |
---|
| 2803 | + if (type == STACK_MISC) |
---|
| 2804 | + continue; |
---|
| 2805 | + verbose(env, "invalid read from stack off %d+%d size %d\n", |
---|
| 2806 | + off, i, size); |
---|
| 2807 | + return -EACCES; |
---|
| 2808 | + } |
---|
| 2809 | + mark_reg_unknown(env, state->regs, dst_regno); |
---|
1099 | 2810 | } |
---|
1100 | | - mark_reg_read(env, reg, reg->parent); |
---|
| 2811 | + state->regs[dst_regno].live |= REG_LIVE_WRITTEN; |
---|
1101 | 2812 | return 0; |
---|
1102 | 2813 | } |
---|
1103 | | - for (i = 1; i < BPF_REG_SIZE; i++) { |
---|
1104 | | - if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) { |
---|
1105 | | - verbose(env, "corrupted spill memory\n"); |
---|
1106 | | - return -EACCES; |
---|
1107 | | - } |
---|
1108 | | - } |
---|
1109 | 2814 | |
---|
1110 | | - if (value_regno >= 0) { |
---|
| 2815 | + if (dst_regno >= 0) { |
---|
1111 | 2816 | /* restore register state from stack */ |
---|
1112 | | - state->regs[value_regno] = *reg; |
---|
| 2817 | + copy_register_state(&state->regs[dst_regno], reg); |
---|
1113 | 2818 | /* mark reg as written since spilled pointer state likely |
---|
1114 | 2819 | * has its liveness marks cleared by is_state_visited() |
---|
1115 | 2820 | * which resets stack/reg liveness for state transitions |
---|
1116 | 2821 | */ |
---|
1117 | | - state->regs[value_regno].live |= REG_LIVE_WRITTEN; |
---|
| 2822 | + state->regs[dst_regno].live |= REG_LIVE_WRITTEN; |
---|
| 2823 | + } else if (__is_pointer_value(env->allow_ptr_leaks, reg)) { |
---|
| 2824 | + /* If dst_regno==-1, the caller is asking us whether |
---|
| 2825 | + * it is acceptable to use this value as a SCALAR_VALUE |
---|
| 2826 | + * (e.g. for XADD). |
---|
| 2827 | + * We must not allow unprivileged callers to do that |
---|
| 2828 | + * with spilled pointers. |
---|
| 2829 | + */ |
---|
| 2830 | + verbose(env, "leaking pointer from stack off %d\n", |
---|
| 2831 | + off); |
---|
| 2832 | + return -EACCES; |
---|
1118 | 2833 | } |
---|
1119 | | - mark_reg_read(env, reg, reg->parent); |
---|
| 2834 | + mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64); |
---|
1120 | 2835 | } else { |
---|
1121 | | - int zeros = 0; |
---|
1122 | | - |
---|
1123 | 2836 | for (i = 0; i < size; i++) { |
---|
1124 | | - if (stype[(slot - i) % BPF_REG_SIZE] == STACK_MISC) |
---|
| 2837 | + type = stype[(slot - i) % BPF_REG_SIZE]; |
---|
| 2838 | + if (type == STACK_MISC) |
---|
1125 | 2839 | continue; |
---|
1126 | | - if (stype[(slot - i) % BPF_REG_SIZE] == STACK_ZERO) { |
---|
1127 | | - zeros++; |
---|
| 2840 | + if (type == STACK_ZERO) |
---|
1128 | 2841 | continue; |
---|
1129 | | - } |
---|
1130 | 2842 | verbose(env, "invalid read from stack off %d+%d size %d\n", |
---|
1131 | 2843 | off, i, size); |
---|
1132 | 2844 | return -EACCES; |
---|
1133 | 2845 | } |
---|
1134 | | - mark_reg_read(env, reg, reg->parent); |
---|
1135 | | - if (value_regno >= 0) { |
---|
1136 | | - if (zeros == size) { |
---|
1137 | | - /* any size read into register is zero extended, |
---|
1138 | | - * so the whole register == const_zero |
---|
1139 | | - */ |
---|
1140 | | - __mark_reg_const_zero(&state->regs[value_regno]); |
---|
1141 | | - } else { |
---|
1142 | | - /* have read misc data from the stack */ |
---|
1143 | | - mark_reg_unknown(env, state->regs, value_regno); |
---|
1144 | | - } |
---|
1145 | | - state->regs[value_regno].live |= REG_LIVE_WRITTEN; |
---|
1146 | | - } |
---|
| 2846 | + mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64); |
---|
| 2847 | + if (dst_regno >= 0) |
---|
| 2848 | + mark_reg_stack_read(env, reg_state, off, off + size, dst_regno); |
---|
1147 | 2849 | } |
---|
1148 | 2850 | return 0; |
---|
1149 | 2851 | } |
---|
1150 | 2852 | |
---|
1151 | | -static int check_stack_access(struct bpf_verifier_env *env, |
---|
1152 | | - const struct bpf_reg_state *reg, |
---|
1153 | | - int off, int size) |
---|
| 2853 | +enum stack_access_src { |
---|
| 2854 | + ACCESS_DIRECT = 1, /* the access is performed by an instruction */ |
---|
| 2855 | + ACCESS_HELPER = 2, /* the access is performed by a helper */ |
---|
| 2856 | +}; |
---|
| 2857 | + |
---|
| 2858 | +static int check_stack_range_initialized(struct bpf_verifier_env *env, |
---|
| 2859 | + int regno, int off, int access_size, |
---|
| 2860 | + bool zero_size_allowed, |
---|
| 2861 | + enum stack_access_src type, |
---|
| 2862 | + struct bpf_call_arg_meta *meta); |
---|
| 2863 | + |
---|
| 2864 | +static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno) |
---|
1154 | 2865 | { |
---|
1155 | | - /* Stack accesses must be at a fixed offset, so that we |
---|
1156 | | - * can determine what type of data were returned. See |
---|
1157 | | - * check_stack_read(). |
---|
| 2866 | + return cur_regs(env) + regno; |
---|
| 2867 | +} |
---|
| 2868 | + |
---|
| 2869 | +/* Read the stack at 'ptr_regno + off' and put the result into the register |
---|
| 2870 | + * 'dst_regno'. |
---|
| 2871 | + * 'off' includes the pointer register's fixed offset(i.e. 'ptr_regno.off'), |
---|
| 2872 | + * but not its variable offset. |
---|
| 2873 | + * 'size' is assumed to be <= reg size and the access is assumed to be aligned. |
---|
| 2874 | + * |
---|
| 2875 | + * As opposed to check_stack_read_fixed_off, this function doesn't deal with |
---|
| 2876 | + * filling registers (i.e. reads of spilled register cannot be detected when |
---|
| 2877 | + * the offset is not fixed). We conservatively mark 'dst_regno' as containing |
---|
| 2878 | + * SCALAR_VALUE. That's why we assert that the 'ptr_regno' has a variable |
---|
| 2879 | + * offset; for a fixed offset check_stack_read_fixed_off should be used |
---|
| 2880 | + * instead. |
---|
| 2881 | + */ |
---|
| 2882 | +static int check_stack_read_var_off(struct bpf_verifier_env *env, |
---|
| 2883 | + int ptr_regno, int off, int size, int dst_regno) |
---|
| 2884 | +{ |
---|
| 2885 | + /* The state of the source register. */ |
---|
| 2886 | + struct bpf_reg_state *reg = reg_state(env, ptr_regno); |
---|
| 2887 | + struct bpf_func_state *ptr_state = func(env, reg); |
---|
| 2888 | + int err; |
---|
| 2889 | + int min_off, max_off; |
---|
| 2890 | + |
---|
| 2891 | + /* Note that we pass a NULL meta, so raw access will not be permitted. |
---|
1158 | 2892 | */ |
---|
1159 | | - if (!tnum_is_const(reg->var_off)) { |
---|
| 2893 | + err = check_stack_range_initialized(env, ptr_regno, off, size, |
---|
| 2894 | + false, ACCESS_DIRECT, NULL); |
---|
| 2895 | + if (err) |
---|
| 2896 | + return err; |
---|
| 2897 | + |
---|
| 2898 | + min_off = reg->smin_value + off; |
---|
| 2899 | + max_off = reg->smax_value + off; |
---|
| 2900 | + mark_reg_stack_read(env, ptr_state, min_off, max_off + size, dst_regno); |
---|
| 2901 | + return 0; |
---|
| 2902 | +} |
---|
| 2903 | + |
---|
| 2904 | +/* check_stack_read dispatches to check_stack_read_fixed_off or |
---|
| 2905 | + * check_stack_read_var_off. |
---|
| 2906 | + * |
---|
| 2907 | + * The caller must ensure that the offset falls within the allocated stack |
---|
| 2908 | + * bounds. |
---|
| 2909 | + * |
---|
| 2910 | + * 'dst_regno' is a register which will receive the value from the stack. It |
---|
| 2911 | + * can be -1, meaning that the read value is not going to a register. |
---|
| 2912 | + */ |
---|
| 2913 | +static int check_stack_read(struct bpf_verifier_env *env, |
---|
| 2914 | + int ptr_regno, int off, int size, |
---|
| 2915 | + int dst_regno) |
---|
| 2916 | +{ |
---|
| 2917 | + struct bpf_reg_state *reg = reg_state(env, ptr_regno); |
---|
| 2918 | + struct bpf_func_state *state = func(env, reg); |
---|
| 2919 | + int err; |
---|
| 2920 | + /* Some accesses are only permitted with a static offset. */ |
---|
| 2921 | + bool var_off = !tnum_is_const(reg->var_off); |
---|
| 2922 | + |
---|
| 2923 | + /* The offset is required to be static when reads don't go to a |
---|
| 2924 | + * register, in order to not leak pointers (see |
---|
| 2925 | + * check_stack_read_fixed_off). |
---|
| 2926 | + */ |
---|
| 2927 | + if (dst_regno < 0 && var_off) { |
---|
1160 | 2928 | char tn_buf[48]; |
---|
1161 | 2929 | |
---|
1162 | 2930 | tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); |
---|
1163 | | - verbose(env, "variable stack access var_off=%s off=%d size=%d\n", |
---|
| 2931 | + verbose(env, "variable offset stack pointer cannot be passed into helper function; var_off=%s off=%d size=%d\n", |
---|
1164 | 2932 | tn_buf, off, size); |
---|
1165 | 2933 | return -EACCES; |
---|
1166 | 2934 | } |
---|
1167 | | - |
---|
1168 | | - if (off >= 0 || off < -MAX_BPF_STACK) { |
---|
1169 | | - verbose(env, "invalid stack off=%d size=%d\n", off, size); |
---|
1170 | | - return -EACCES; |
---|
| 2935 | + /* Variable offset is prohibited for unprivileged mode for simplicity |
---|
| 2936 | + * since it requires corresponding support in Spectre masking for stack |
---|
| 2937 | + * ALU. See also retrieve_ptr_limit(). The check in |
---|
| 2938 | + * check_stack_access_for_ptr_arithmetic() called by |
---|
| 2939 | + * adjust_ptr_min_max_vals() prevents users from creating stack pointers |
---|
| 2940 | + * with variable offsets, therefore no check is required here. Further, |
---|
| 2941 | + * just checking it here would be insufficient as speculative stack |
---|
| 2942 | + * writes could still lead to unsafe speculative behaviour. |
---|
| 2943 | + */ |
---|
| 2944 | + if (!var_off) { |
---|
| 2945 | + off += reg->var_off.value; |
---|
| 2946 | + err = check_stack_read_fixed_off(env, state, off, size, |
---|
| 2947 | + dst_regno); |
---|
| 2948 | + } else { |
---|
| 2949 | + /* Variable offset stack reads need more conservative handling |
---|
| 2950 | + * than fixed offset ones. Note that dst_regno >= 0 on this |
---|
| 2951 | + * branch. |
---|
| 2952 | + */ |
---|
| 2953 | + err = check_stack_read_var_off(env, ptr_regno, off, size, |
---|
| 2954 | + dst_regno); |
---|
1171 | 2955 | } |
---|
1172 | | - |
---|
1173 | | - return 0; |
---|
| 2956 | + return err; |
---|
1174 | 2957 | } |
---|
1175 | 2958 | |
---|
1176 | | -/* check read/write into map element returned by bpf_map_lookup_elem() */ |
---|
1177 | | -static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off, |
---|
1178 | | - int size, bool zero_size_allowed) |
---|
| 2959 | + |
---|
| 2960 | +/* check_stack_write dispatches to check_stack_write_fixed_off or |
---|
| 2961 | + * check_stack_write_var_off. |
---|
| 2962 | + * |
---|
| 2963 | + * 'ptr_regno' is the register used as a pointer into the stack. |
---|
| 2964 | + * 'off' includes 'ptr_regno->off', but not its variable offset (if any). |
---|
| 2965 | + * 'value_regno' is the register whose value we're writing to the stack. It can |
---|
| 2966 | + * be -1, meaning that we're not writing from a register. |
---|
| 2967 | + * |
---|
| 2968 | + * The caller must ensure that the offset falls within the maximum stack size. |
---|
| 2969 | + */ |
---|
| 2970 | +static int check_stack_write(struct bpf_verifier_env *env, |
---|
| 2971 | + int ptr_regno, int off, int size, |
---|
| 2972 | + int value_regno, int insn_idx) |
---|
| 2973 | +{ |
---|
| 2974 | + struct bpf_reg_state *reg = reg_state(env, ptr_regno); |
---|
| 2975 | + struct bpf_func_state *state = func(env, reg); |
---|
| 2976 | + int err; |
---|
| 2977 | + |
---|
| 2978 | + if (tnum_is_const(reg->var_off)) { |
---|
| 2979 | + off += reg->var_off.value; |
---|
| 2980 | + err = check_stack_write_fixed_off(env, state, off, size, |
---|
| 2981 | + value_regno, insn_idx); |
---|
| 2982 | + } else { |
---|
| 2983 | + /* Variable offset stack reads need more conservative handling |
---|
| 2984 | + * than fixed offset ones. |
---|
| 2985 | + */ |
---|
| 2986 | + err = check_stack_write_var_off(env, state, |
---|
| 2987 | + ptr_regno, off, size, |
---|
| 2988 | + value_regno, insn_idx); |
---|
| 2989 | + } |
---|
| 2990 | + return err; |
---|
| 2991 | +} |
---|
| 2992 | + |
---|
| 2993 | +static int check_map_access_type(struct bpf_verifier_env *env, u32 regno, |
---|
| 2994 | + int off, int size, enum bpf_access_type type) |
---|
1179 | 2995 | { |
---|
1180 | 2996 | struct bpf_reg_state *regs = cur_regs(env); |
---|
1181 | 2997 | struct bpf_map *map = regs[regno].map_ptr; |
---|
| 2998 | + u32 cap = bpf_map_flags_to_cap(map); |
---|
1182 | 2999 | |
---|
1183 | | - if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) || |
---|
1184 | | - off + size > map->value_size) { |
---|
1185 | | - verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n", |
---|
| 3000 | + if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) { |
---|
| 3001 | + verbose(env, "write into map forbidden, value_size=%d off=%d size=%d\n", |
---|
1186 | 3002 | map->value_size, off, size); |
---|
1187 | 3003 | return -EACCES; |
---|
1188 | 3004 | } |
---|
| 3005 | + |
---|
| 3006 | + if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) { |
---|
| 3007 | + verbose(env, "read from map forbidden, value_size=%d off=%d size=%d\n", |
---|
| 3008 | + map->value_size, off, size); |
---|
| 3009 | + return -EACCES; |
---|
| 3010 | + } |
---|
| 3011 | + |
---|
1189 | 3012 | return 0; |
---|
1190 | 3013 | } |
---|
1191 | 3014 | |
---|
1192 | | -/* check read/write into a map element with possible variable offset */ |
---|
1193 | | -static int check_map_access(struct bpf_verifier_env *env, u32 regno, |
---|
1194 | | - int off, int size, bool zero_size_allowed) |
---|
| 3015 | +/* check read/write into memory region (e.g., map value, ringbuf sample, etc) */ |
---|
| 3016 | +static int __check_mem_access(struct bpf_verifier_env *env, int regno, |
---|
| 3017 | + int off, int size, u32 mem_size, |
---|
| 3018 | + bool zero_size_allowed) |
---|
| 3019 | +{ |
---|
| 3020 | + bool size_ok = size > 0 || (size == 0 && zero_size_allowed); |
---|
| 3021 | + struct bpf_reg_state *reg; |
---|
| 3022 | + |
---|
| 3023 | + if (off >= 0 && size_ok && (u64)off + size <= mem_size) |
---|
| 3024 | + return 0; |
---|
| 3025 | + |
---|
| 3026 | + reg = &cur_regs(env)[regno]; |
---|
| 3027 | + switch (reg->type) { |
---|
| 3028 | + case PTR_TO_MAP_VALUE: |
---|
| 3029 | + verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n", |
---|
| 3030 | + mem_size, off, size); |
---|
| 3031 | + break; |
---|
| 3032 | + case PTR_TO_PACKET: |
---|
| 3033 | + case PTR_TO_PACKET_META: |
---|
| 3034 | + case PTR_TO_PACKET_END: |
---|
| 3035 | + verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n", |
---|
| 3036 | + off, size, regno, reg->id, off, mem_size); |
---|
| 3037 | + break; |
---|
| 3038 | + case PTR_TO_MEM: |
---|
| 3039 | + default: |
---|
| 3040 | + verbose(env, "invalid access to memory, mem_size=%u off=%d size=%d\n", |
---|
| 3041 | + mem_size, off, size); |
---|
| 3042 | + } |
---|
| 3043 | + |
---|
| 3044 | + return -EACCES; |
---|
| 3045 | +} |
---|
| 3046 | + |
---|
| 3047 | +/* check read/write into a memory region with possible variable offset */ |
---|
| 3048 | +static int check_mem_region_access(struct bpf_verifier_env *env, u32 regno, |
---|
| 3049 | + int off, int size, u32 mem_size, |
---|
| 3050 | + bool zero_size_allowed) |
---|
1195 | 3051 | { |
---|
1196 | 3052 | struct bpf_verifier_state *vstate = env->cur_state; |
---|
1197 | 3053 | struct bpf_func_state *state = vstate->frame[vstate->curframe]; |
---|
1198 | 3054 | struct bpf_reg_state *reg = &state->regs[regno]; |
---|
1199 | 3055 | int err; |
---|
1200 | 3056 | |
---|
1201 | | - /* We may have adjusted the register to this map value, so we |
---|
| 3057 | + /* We may have adjusted the register pointing to memory region, so we |
---|
1202 | 3058 | * need to try adding each of min_value and max_value to off |
---|
1203 | 3059 | * to make sure our theoretical access will be safe. |
---|
1204 | 3060 | */ |
---|
1205 | | - if (env->log.level) |
---|
| 3061 | + if (env->log.level & BPF_LOG_LEVEL) |
---|
1206 | 3062 | print_verifier_state(env, state); |
---|
1207 | 3063 | |
---|
1208 | 3064 | /* The minimum value is only important with signed |
---|
.. | .. |
---|
1219 | 3075 | regno); |
---|
1220 | 3076 | return -EACCES; |
---|
1221 | 3077 | } |
---|
1222 | | - err = __check_map_access(env, regno, reg->smin_value + off, size, |
---|
1223 | | - zero_size_allowed); |
---|
| 3078 | + err = __check_mem_access(env, regno, reg->smin_value + off, size, |
---|
| 3079 | + mem_size, zero_size_allowed); |
---|
1224 | 3080 | if (err) { |
---|
1225 | | - verbose(env, "R%d min value is outside of the array range\n", |
---|
| 3081 | + verbose(env, "R%d min value is outside of the allowed memory range\n", |
---|
1226 | 3082 | regno); |
---|
1227 | 3083 | return err; |
---|
1228 | 3084 | } |
---|
.. | .. |
---|
1232 | 3088 | * If reg->umax_value + off could overflow, treat that as unbounded too. |
---|
1233 | 3089 | */ |
---|
1234 | 3090 | if (reg->umax_value >= BPF_MAX_VAR_OFF) { |
---|
1235 | | - verbose(env, "R%d unbounded memory access, make sure to bounds check any array access into a map\n", |
---|
| 3091 | + verbose(env, "R%d unbounded memory access, make sure to bounds check any such access\n", |
---|
1236 | 3092 | regno); |
---|
1237 | 3093 | return -EACCES; |
---|
1238 | 3094 | } |
---|
1239 | | - err = __check_map_access(env, regno, reg->umax_value + off, size, |
---|
1240 | | - zero_size_allowed); |
---|
1241 | | - if (err) |
---|
1242 | | - verbose(env, "R%d max value is outside of the array range\n", |
---|
| 3095 | + err = __check_mem_access(env, regno, reg->umax_value + off, size, |
---|
| 3096 | + mem_size, zero_size_allowed); |
---|
| 3097 | + if (err) { |
---|
| 3098 | + verbose(env, "R%d max value is outside of the allowed memory range\n", |
---|
1243 | 3099 | regno); |
---|
| 3100 | + return err; |
---|
| 3101 | + } |
---|
| 3102 | + |
---|
| 3103 | + return 0; |
---|
| 3104 | +} |
---|
| 3105 | + |
---|
| 3106 | +/* check read/write into a map element with possible variable offset */ |
---|
| 3107 | +static int check_map_access(struct bpf_verifier_env *env, u32 regno, |
---|
| 3108 | + int off, int size, bool zero_size_allowed) |
---|
| 3109 | +{ |
---|
| 3110 | + struct bpf_verifier_state *vstate = env->cur_state; |
---|
| 3111 | + struct bpf_func_state *state = vstate->frame[vstate->curframe]; |
---|
| 3112 | + struct bpf_reg_state *reg = &state->regs[regno]; |
---|
| 3113 | + struct bpf_map *map = reg->map_ptr; |
---|
| 3114 | + int err; |
---|
| 3115 | + |
---|
| 3116 | + err = check_mem_region_access(env, regno, off, size, map->value_size, |
---|
| 3117 | + zero_size_allowed); |
---|
| 3118 | + if (err) |
---|
| 3119 | + return err; |
---|
| 3120 | + |
---|
| 3121 | + if (map_value_has_spin_lock(map)) { |
---|
| 3122 | + u32 lock = map->spin_lock_off; |
---|
| 3123 | + |
---|
| 3124 | + /* if any part of struct bpf_spin_lock can be touched by |
---|
| 3125 | + * load/store reject this program. |
---|
| 3126 | + * To check that [x1, x2) overlaps with [y1, y2) |
---|
| 3127 | + * it is sufficient to check x1 < y2 && y1 < x2. |
---|
| 3128 | + */ |
---|
| 3129 | + if (reg->smin_value + off < lock + sizeof(struct bpf_spin_lock) && |
---|
| 3130 | + lock < reg->umax_value + off + size) { |
---|
| 3131 | + verbose(env, "bpf_spin_lock cannot be accessed directly by load/store\n"); |
---|
| 3132 | + return -EACCES; |
---|
| 3133 | + } |
---|
| 3134 | + } |
---|
1244 | 3135 | return err; |
---|
1245 | 3136 | } |
---|
1246 | 3137 | |
---|
1247 | 3138 | #define MAX_PACKET_OFF 0xffff |
---|
1248 | 3139 | |
---|
| 3140 | +static enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog) |
---|
| 3141 | +{ |
---|
| 3142 | + return prog->aux->dst_prog ? prog->aux->dst_prog->type : prog->type; |
---|
| 3143 | +} |
---|
| 3144 | + |
---|
1249 | 3145 | static bool may_access_direct_pkt_data(struct bpf_verifier_env *env, |
---|
1250 | 3146 | const struct bpf_call_arg_meta *meta, |
---|
1251 | 3147 | enum bpf_access_type t) |
---|
1252 | 3148 | { |
---|
1253 | | - switch (env->prog->type) { |
---|
| 3149 | + enum bpf_prog_type prog_type = resolve_prog_type(env->prog); |
---|
| 3150 | + |
---|
| 3151 | + switch (prog_type) { |
---|
| 3152 | + /* Program types only with direct read access go here! */ |
---|
1254 | 3153 | case BPF_PROG_TYPE_LWT_IN: |
---|
1255 | 3154 | case BPF_PROG_TYPE_LWT_OUT: |
---|
1256 | 3155 | case BPF_PROG_TYPE_LWT_SEG6LOCAL: |
---|
1257 | 3156 | case BPF_PROG_TYPE_SK_REUSEPORT: |
---|
1258 | | - /* dst_input() and dst_output() can't write for now */ |
---|
| 3157 | + case BPF_PROG_TYPE_FLOW_DISSECTOR: |
---|
| 3158 | + case BPF_PROG_TYPE_CGROUP_SKB: |
---|
1259 | 3159 | if (t == BPF_WRITE) |
---|
1260 | 3160 | return false; |
---|
1261 | | - /* fallthrough */ |
---|
| 3161 | + fallthrough; |
---|
| 3162 | + |
---|
| 3163 | + /* Program types with direct read + write access go here! */ |
---|
1262 | 3164 | case BPF_PROG_TYPE_SCHED_CLS: |
---|
1263 | 3165 | case BPF_PROG_TYPE_SCHED_ACT: |
---|
1264 | 3166 | case BPF_PROG_TYPE_XDP: |
---|
.. | .. |
---|
1270 | 3172 | |
---|
1271 | 3173 | env->seen_direct_write = true; |
---|
1272 | 3174 | return true; |
---|
| 3175 | + |
---|
| 3176 | + case BPF_PROG_TYPE_CGROUP_SOCKOPT: |
---|
| 3177 | + if (t == BPF_WRITE) |
---|
| 3178 | + env->seen_direct_write = true; |
---|
| 3179 | + |
---|
| 3180 | + return true; |
---|
| 3181 | + |
---|
1273 | 3182 | default: |
---|
1274 | 3183 | return false; |
---|
1275 | 3184 | } |
---|
1276 | | -} |
---|
1277 | | - |
---|
1278 | | -static int __check_packet_access(struct bpf_verifier_env *env, u32 regno, |
---|
1279 | | - int off, int size, bool zero_size_allowed) |
---|
1280 | | -{ |
---|
1281 | | - struct bpf_reg_state *regs = cur_regs(env); |
---|
1282 | | - struct bpf_reg_state *reg = ®s[regno]; |
---|
1283 | | - |
---|
1284 | | - if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) || |
---|
1285 | | - (u64)off + size > reg->range) { |
---|
1286 | | - verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n", |
---|
1287 | | - off, size, regno, reg->id, reg->off, reg->range); |
---|
1288 | | - return -EACCES; |
---|
1289 | | - } |
---|
1290 | | - return 0; |
---|
1291 | 3185 | } |
---|
1292 | 3186 | |
---|
1293 | 3187 | static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off, |
---|
.. | .. |
---|
1310 | 3204 | regno); |
---|
1311 | 3205 | return -EACCES; |
---|
1312 | 3206 | } |
---|
1313 | | - err = __check_packet_access(env, regno, off, size, zero_size_allowed); |
---|
| 3207 | + |
---|
| 3208 | + err = reg->range < 0 ? -EINVAL : |
---|
| 3209 | + __check_mem_access(env, regno, off, size, reg->range, |
---|
| 3210 | + zero_size_allowed); |
---|
1314 | 3211 | if (err) { |
---|
1315 | 3212 | verbose(env, "R%d offset is outside of the packet\n", regno); |
---|
1316 | 3213 | return err; |
---|
1317 | 3214 | } |
---|
| 3215 | + |
---|
| 3216 | + /* __check_mem_access has made sure "off + size - 1" is within u16. |
---|
| 3217 | + * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff, |
---|
| 3218 | + * otherwise find_good_pkt_pointers would have refused to set range info |
---|
| 3219 | + * that __check_mem_access would have rejected this pkt access. |
---|
| 3220 | + * Therefore, "off + reg->umax_value + size - 1" won't overflow u32. |
---|
| 3221 | + */ |
---|
| 3222 | + env->prog->aux->max_pkt_offset = |
---|
| 3223 | + max_t(u32, env->prog->aux->max_pkt_offset, |
---|
| 3224 | + off + reg->umax_value + size - 1); |
---|
| 3225 | + |
---|
1318 | 3226 | return err; |
---|
1319 | 3227 | } |
---|
1320 | 3228 | |
---|
1321 | 3229 | /* check access to 'struct bpf_context' fields. Supports fixed offsets only */ |
---|
1322 | 3230 | static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size, |
---|
1323 | | - enum bpf_access_type t, enum bpf_reg_type *reg_type) |
---|
| 3231 | + enum bpf_access_type t, enum bpf_reg_type *reg_type, |
---|
| 3232 | + u32 *btf_id) |
---|
1324 | 3233 | { |
---|
1325 | 3234 | struct bpf_insn_access_aux info = { |
---|
1326 | 3235 | .reg_type = *reg_type, |
---|
| 3236 | + .log = &env->log, |
---|
1327 | 3237 | }; |
---|
1328 | 3238 | |
---|
1329 | 3239 | if (env->ops->is_valid_access && |
---|
.. | .. |
---|
1337 | 3247 | */ |
---|
1338 | 3248 | *reg_type = info.reg_type; |
---|
1339 | 3249 | |
---|
1340 | | - env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size; |
---|
| 3250 | + if (*reg_type == PTR_TO_BTF_ID || *reg_type == PTR_TO_BTF_ID_OR_NULL) |
---|
| 3251 | + *btf_id = info.btf_id; |
---|
| 3252 | + else |
---|
| 3253 | + env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size; |
---|
1341 | 3254 | /* remember the offset of last byte accessed in ctx */ |
---|
1342 | 3255 | if (env->prog->aux->max_ctx_offset < off + size) |
---|
1343 | 3256 | env->prog->aux->max_ctx_offset = off + size; |
---|
.. | .. |
---|
1348 | 3261 | return -EACCES; |
---|
1349 | 3262 | } |
---|
1350 | 3263 | |
---|
1351 | | -static bool __is_pointer_value(bool allow_ptr_leaks, |
---|
1352 | | - const struct bpf_reg_state *reg) |
---|
| 3264 | +static int check_flow_keys_access(struct bpf_verifier_env *env, int off, |
---|
| 3265 | + int size) |
---|
1353 | 3266 | { |
---|
1354 | | - if (allow_ptr_leaks) |
---|
1355 | | - return false; |
---|
| 3267 | + if (size < 0 || off < 0 || |
---|
| 3268 | + (u64)off + size > sizeof(struct bpf_flow_keys)) { |
---|
| 3269 | + verbose(env, "invalid access to flow keys off=%d size=%d\n", |
---|
| 3270 | + off, size); |
---|
| 3271 | + return -EACCES; |
---|
| 3272 | + } |
---|
| 3273 | + return 0; |
---|
| 3274 | +} |
---|
1356 | 3275 | |
---|
1357 | | - return reg->type != SCALAR_VALUE; |
---|
| 3276 | +static int check_sock_access(struct bpf_verifier_env *env, int insn_idx, |
---|
| 3277 | + u32 regno, int off, int size, |
---|
| 3278 | + enum bpf_access_type t) |
---|
| 3279 | +{ |
---|
| 3280 | + struct bpf_reg_state *regs = cur_regs(env); |
---|
| 3281 | + struct bpf_reg_state *reg = ®s[regno]; |
---|
| 3282 | + struct bpf_insn_access_aux info = {}; |
---|
| 3283 | + bool valid; |
---|
| 3284 | + |
---|
| 3285 | + if (reg->smin_value < 0) { |
---|
| 3286 | + verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", |
---|
| 3287 | + regno); |
---|
| 3288 | + return -EACCES; |
---|
| 3289 | + } |
---|
| 3290 | + |
---|
| 3291 | + switch (reg->type) { |
---|
| 3292 | + case PTR_TO_SOCK_COMMON: |
---|
| 3293 | + valid = bpf_sock_common_is_valid_access(off, size, t, &info); |
---|
| 3294 | + break; |
---|
| 3295 | + case PTR_TO_SOCKET: |
---|
| 3296 | + valid = bpf_sock_is_valid_access(off, size, t, &info); |
---|
| 3297 | + break; |
---|
| 3298 | + case PTR_TO_TCP_SOCK: |
---|
| 3299 | + valid = bpf_tcp_sock_is_valid_access(off, size, t, &info); |
---|
| 3300 | + break; |
---|
| 3301 | + case PTR_TO_XDP_SOCK: |
---|
| 3302 | + valid = bpf_xdp_sock_is_valid_access(off, size, t, &info); |
---|
| 3303 | + break; |
---|
| 3304 | + default: |
---|
| 3305 | + valid = false; |
---|
| 3306 | + } |
---|
| 3307 | + |
---|
| 3308 | + |
---|
| 3309 | + if (valid) { |
---|
| 3310 | + env->insn_aux_data[insn_idx].ctx_field_size = |
---|
| 3311 | + info.ctx_field_size; |
---|
| 3312 | + return 0; |
---|
| 3313 | + } |
---|
| 3314 | + |
---|
| 3315 | + verbose(env, "R%d invalid %s access off=%d size=%d\n", |
---|
| 3316 | + regno, reg_type_str[reg->type], off, size); |
---|
| 3317 | + |
---|
| 3318 | + return -EACCES; |
---|
1358 | 3319 | } |
---|
1359 | 3320 | |
---|
1360 | 3321 | static bool is_pointer_value(struct bpf_verifier_env *env, int regno) |
---|
1361 | 3322 | { |
---|
1362 | | - return __is_pointer_value(env->allow_ptr_leaks, cur_regs(env) + regno); |
---|
| 3323 | + return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno)); |
---|
1363 | 3324 | } |
---|
1364 | 3325 | |
---|
1365 | 3326 | static bool is_ctx_reg(struct bpf_verifier_env *env, int regno) |
---|
1366 | 3327 | { |
---|
1367 | | - const struct bpf_reg_state *reg = cur_regs(env) + regno; |
---|
| 3328 | + const struct bpf_reg_state *reg = reg_state(env, regno); |
---|
1368 | 3329 | |
---|
1369 | 3330 | return reg->type == PTR_TO_CTX; |
---|
1370 | 3331 | } |
---|
1371 | 3332 | |
---|
| 3333 | +static bool is_sk_reg(struct bpf_verifier_env *env, int regno) |
---|
| 3334 | +{ |
---|
| 3335 | + const struct bpf_reg_state *reg = reg_state(env, regno); |
---|
| 3336 | + |
---|
| 3337 | + return type_is_sk_pointer(reg->type); |
---|
| 3338 | +} |
---|
| 3339 | + |
---|
1372 | 3340 | static bool is_pkt_reg(struct bpf_verifier_env *env, int regno) |
---|
1373 | 3341 | { |
---|
1374 | | - const struct bpf_reg_state *reg = cur_regs(env) + regno; |
---|
| 3342 | + const struct bpf_reg_state *reg = reg_state(env, regno); |
---|
1375 | 3343 | |
---|
1376 | 3344 | return type_is_pkt_pointer(reg->type); |
---|
| 3345 | +} |
---|
| 3346 | + |
---|
| 3347 | +static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno) |
---|
| 3348 | +{ |
---|
| 3349 | + const struct bpf_reg_state *reg = reg_state(env, regno); |
---|
| 3350 | + |
---|
| 3351 | + /* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */ |
---|
| 3352 | + return reg->type == PTR_TO_FLOW_KEYS; |
---|
1377 | 3353 | } |
---|
1378 | 3354 | |
---|
1379 | 3355 | static int check_pkt_ptr_alignment(struct bpf_verifier_env *env, |
---|
.. | .. |
---|
1449 | 3425 | * right in front, treat it the very same way. |
---|
1450 | 3426 | */ |
---|
1451 | 3427 | return check_pkt_ptr_alignment(env, reg, off, size, strict); |
---|
| 3428 | + case PTR_TO_FLOW_KEYS: |
---|
| 3429 | + pointer_desc = "flow keys "; |
---|
| 3430 | + break; |
---|
1452 | 3431 | case PTR_TO_MAP_VALUE: |
---|
1453 | 3432 | pointer_desc = "value "; |
---|
1454 | 3433 | break; |
---|
.. | .. |
---|
1457 | 3436 | break; |
---|
1458 | 3437 | case PTR_TO_STACK: |
---|
1459 | 3438 | pointer_desc = "stack "; |
---|
1460 | | - /* The stack spill tracking logic in check_stack_write() |
---|
1461 | | - * and check_stack_read() relies on stack accesses being |
---|
| 3439 | + /* The stack spill tracking logic in check_stack_write_fixed_off() |
---|
| 3440 | + * and check_stack_read_fixed_off() relies on stack accesses being |
---|
1462 | 3441 | * aligned. |
---|
1463 | 3442 | */ |
---|
1464 | 3443 | strict = true; |
---|
| 3444 | + break; |
---|
| 3445 | + case PTR_TO_SOCKET: |
---|
| 3446 | + pointer_desc = "sock "; |
---|
| 3447 | + break; |
---|
| 3448 | + case PTR_TO_SOCK_COMMON: |
---|
| 3449 | + pointer_desc = "sock_common "; |
---|
| 3450 | + break; |
---|
| 3451 | + case PTR_TO_TCP_SOCK: |
---|
| 3452 | + pointer_desc = "tcp_sock "; |
---|
| 3453 | + break; |
---|
| 3454 | + case PTR_TO_XDP_SOCK: |
---|
| 3455 | + pointer_desc = "xdp_sock "; |
---|
1465 | 3456 | break; |
---|
1466 | 3457 | default: |
---|
1467 | 3458 | break; |
---|
.. | .. |
---|
1495 | 3486 | int depth = 0, frame = 0, idx = 0, i = 0, subprog_end; |
---|
1496 | 3487 | struct bpf_subprog_info *subprog = env->subprog_info; |
---|
1497 | 3488 | struct bpf_insn *insn = env->prog->insnsi; |
---|
| 3489 | + bool tail_call_reachable = false; |
---|
1498 | 3490 | int ret_insn[MAX_CALL_FRAMES]; |
---|
1499 | 3491 | int ret_prog[MAX_CALL_FRAMES]; |
---|
| 3492 | + int j; |
---|
1500 | 3493 | |
---|
1501 | 3494 | process_func: |
---|
| 3495 | + /* protect against potential stack overflow that might happen when |
---|
| 3496 | + * bpf2bpf calls get combined with tailcalls. Limit the caller's stack |
---|
| 3497 | + * depth for such case down to 256 so that the worst case scenario |
---|
| 3498 | + * would result in 8k stack size (32 which is tailcall limit * 256 = |
---|
| 3499 | + * 8k). |
---|
| 3500 | + * |
---|
| 3501 | + * To get the idea what might happen, see an example: |
---|
| 3502 | + * func1 -> sub rsp, 128 |
---|
| 3503 | + * subfunc1 -> sub rsp, 256 |
---|
| 3504 | + * tailcall1 -> add rsp, 256 |
---|
| 3505 | + * func2 -> sub rsp, 192 (total stack size = 128 + 192 = 320) |
---|
| 3506 | + * subfunc2 -> sub rsp, 64 |
---|
| 3507 | + * subfunc22 -> sub rsp, 128 |
---|
| 3508 | + * tailcall2 -> add rsp, 128 |
---|
| 3509 | + * func3 -> sub rsp, 32 (total stack size 128 + 192 + 64 + 32 = 416) |
---|
| 3510 | + * |
---|
| 3511 | + * tailcall will unwind the current stack frame but it will not get rid |
---|
| 3512 | + * of caller's stack as shown on the example above. |
---|
| 3513 | + */ |
---|
| 3514 | + if (idx && subprog[idx].has_tail_call && depth >= 256) { |
---|
| 3515 | + verbose(env, |
---|
| 3516 | + "tail_calls are not allowed when call stack of previous frames is %d bytes. Too large\n", |
---|
| 3517 | + depth); |
---|
| 3518 | + return -EACCES; |
---|
| 3519 | + } |
---|
1502 | 3520 | /* round up to 32-bytes, since this is granularity |
---|
1503 | 3521 | * of interpreter stack size |
---|
1504 | 3522 | */ |
---|
.. | .. |
---|
1527 | 3545 | i); |
---|
1528 | 3546 | return -EFAULT; |
---|
1529 | 3547 | } |
---|
| 3548 | + |
---|
| 3549 | + if (subprog[idx].has_tail_call) |
---|
| 3550 | + tail_call_reachable = true; |
---|
| 3551 | + |
---|
1530 | 3552 | frame++; |
---|
1531 | 3553 | if (frame >= MAX_CALL_FRAMES) { |
---|
1532 | | - WARN_ONCE(1, "verifier bug. Call stack is too deep\n"); |
---|
1533 | | - return -EFAULT; |
---|
| 3554 | + verbose(env, "the call stack of %d frames is too deep !\n", |
---|
| 3555 | + frame); |
---|
| 3556 | + return -E2BIG; |
---|
1534 | 3557 | } |
---|
1535 | 3558 | goto process_func; |
---|
1536 | 3559 | } |
---|
| 3560 | + /* if tail call got detected across bpf2bpf calls then mark each of the |
---|
| 3561 | + * currently present subprog frames as tail call reachable subprogs; |
---|
| 3562 | + * this info will be utilized by JIT so that we will be preserving the |
---|
| 3563 | + * tail call counter throughout bpf2bpf calls combined with tailcalls |
---|
| 3564 | + */ |
---|
| 3565 | + if (tail_call_reachable) |
---|
| 3566 | + for (j = 0; j < frame; j++) |
---|
| 3567 | + subprog[ret_prog[j]].tail_call_reachable = true; |
---|
| 3568 | + if (subprog[0].tail_call_reachable) |
---|
| 3569 | + env->prog->aux->tail_call_reachable = true; |
---|
| 3570 | + |
---|
1537 | 3571 | /* end of for() loop means the last insn of the 'subprog' |
---|
1538 | 3572 | * was reached. Doesn't matter whether it was JA or EXIT |
---|
1539 | 3573 | */ |
---|
.. | .. |
---|
1562 | 3596 | } |
---|
1563 | 3597 | #endif |
---|
1564 | 3598 | |
---|
1565 | | -static int check_ctx_reg(struct bpf_verifier_env *env, |
---|
1566 | | - const struct bpf_reg_state *reg, int regno) |
---|
| 3599 | +int check_ctx_reg(struct bpf_verifier_env *env, |
---|
| 3600 | + const struct bpf_reg_state *reg, int regno) |
---|
1567 | 3601 | { |
---|
1568 | 3602 | /* Access to ctx or passing it to a helper is only allowed in |
---|
1569 | 3603 | * its original, unmodified form. |
---|
.. | .. |
---|
1584 | 3618 | } |
---|
1585 | 3619 | |
---|
1586 | 3620 | return 0; |
---|
| 3621 | +} |
---|
| 3622 | + |
---|
| 3623 | +static int __check_buffer_access(struct bpf_verifier_env *env, |
---|
| 3624 | + const char *buf_info, |
---|
| 3625 | + const struct bpf_reg_state *reg, |
---|
| 3626 | + int regno, int off, int size) |
---|
| 3627 | +{ |
---|
| 3628 | + if (off < 0) { |
---|
| 3629 | + verbose(env, |
---|
| 3630 | + "R%d invalid %s buffer access: off=%d, size=%d\n", |
---|
| 3631 | + regno, buf_info, off, size); |
---|
| 3632 | + return -EACCES; |
---|
| 3633 | + } |
---|
| 3634 | + if (!tnum_is_const(reg->var_off) || reg->var_off.value) { |
---|
| 3635 | + char tn_buf[48]; |
---|
| 3636 | + |
---|
| 3637 | + tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); |
---|
| 3638 | + verbose(env, |
---|
| 3639 | + "R%d invalid variable buffer offset: off=%d, var_off=%s\n", |
---|
| 3640 | + regno, off, tn_buf); |
---|
| 3641 | + return -EACCES; |
---|
| 3642 | + } |
---|
| 3643 | + |
---|
| 3644 | + return 0; |
---|
| 3645 | +} |
---|
| 3646 | + |
---|
| 3647 | +static int check_tp_buffer_access(struct bpf_verifier_env *env, |
---|
| 3648 | + const struct bpf_reg_state *reg, |
---|
| 3649 | + int regno, int off, int size) |
---|
| 3650 | +{ |
---|
| 3651 | + int err; |
---|
| 3652 | + |
---|
| 3653 | + err = __check_buffer_access(env, "tracepoint", reg, regno, off, size); |
---|
| 3654 | + if (err) |
---|
| 3655 | + return err; |
---|
| 3656 | + |
---|
| 3657 | + if (off + size > env->prog->aux->max_tp_access) |
---|
| 3658 | + env->prog->aux->max_tp_access = off + size; |
---|
| 3659 | + |
---|
| 3660 | + return 0; |
---|
| 3661 | +} |
---|
| 3662 | + |
---|
| 3663 | +static int check_buffer_access(struct bpf_verifier_env *env, |
---|
| 3664 | + const struct bpf_reg_state *reg, |
---|
| 3665 | + int regno, int off, int size, |
---|
| 3666 | + bool zero_size_allowed, |
---|
| 3667 | + const char *buf_info, |
---|
| 3668 | + u32 *max_access) |
---|
| 3669 | +{ |
---|
| 3670 | + int err; |
---|
| 3671 | + |
---|
| 3672 | + err = __check_buffer_access(env, buf_info, reg, regno, off, size); |
---|
| 3673 | + if (err) |
---|
| 3674 | + return err; |
---|
| 3675 | + |
---|
| 3676 | + if (off + size > *max_access) |
---|
| 3677 | + *max_access = off + size; |
---|
| 3678 | + |
---|
| 3679 | + return 0; |
---|
| 3680 | +} |
---|
| 3681 | + |
---|
| 3682 | +/* BPF architecture zero extends alu32 ops into 64-bit registesr */ |
---|
| 3683 | +static void zext_32_to_64(struct bpf_reg_state *reg) |
---|
| 3684 | +{ |
---|
| 3685 | + reg->var_off = tnum_subreg(reg->var_off); |
---|
| 3686 | + __reg_assign_32_into_64(reg); |
---|
1587 | 3687 | } |
---|
1588 | 3688 | |
---|
1589 | 3689 | /* truncate register to smaller size (in bytes) |
---|
.. | .. |
---|
1607 | 3707 | } |
---|
1608 | 3708 | reg->smin_value = reg->umin_value; |
---|
1609 | 3709 | reg->smax_value = reg->umax_value; |
---|
| 3710 | + |
---|
| 3711 | + /* If size is smaller than 32bit register the 32bit register |
---|
| 3712 | + * values are also truncated so we push 64-bit bounds into |
---|
| 3713 | + * 32-bit bounds. Above were truncated < 32-bits already. |
---|
| 3714 | + */ |
---|
| 3715 | + if (size >= 4) |
---|
| 3716 | + return; |
---|
| 3717 | + __reg_combine_64_into_32(reg); |
---|
| 3718 | +} |
---|
| 3719 | + |
---|
| 3720 | +static bool bpf_map_is_rdonly(const struct bpf_map *map) |
---|
| 3721 | +{ |
---|
| 3722 | + /* A map is considered read-only if the following condition are true: |
---|
| 3723 | + * |
---|
| 3724 | + * 1) BPF program side cannot change any of the map content. The |
---|
| 3725 | + * BPF_F_RDONLY_PROG flag is throughout the lifetime of a map |
---|
| 3726 | + * and was set at map creation time. |
---|
| 3727 | + * 2) The map value(s) have been initialized from user space by a |
---|
| 3728 | + * loader and then "frozen", such that no new map update/delete |
---|
| 3729 | + * operations from syscall side are possible for the rest of |
---|
| 3730 | + * the map's lifetime from that point onwards. |
---|
| 3731 | + * 3) Any parallel/pending map update/delete operations from syscall |
---|
| 3732 | + * side have been completed. Only after that point, it's safe to |
---|
| 3733 | + * assume that map value(s) are immutable. |
---|
| 3734 | + */ |
---|
| 3735 | + return (map->map_flags & BPF_F_RDONLY_PROG) && |
---|
| 3736 | + READ_ONCE(map->frozen) && |
---|
| 3737 | + !bpf_map_write_active(map); |
---|
| 3738 | +} |
---|
| 3739 | + |
---|
| 3740 | +static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val) |
---|
| 3741 | +{ |
---|
| 3742 | + void *ptr; |
---|
| 3743 | + u64 addr; |
---|
| 3744 | + int err; |
---|
| 3745 | + |
---|
| 3746 | + err = map->ops->map_direct_value_addr(map, &addr, off); |
---|
| 3747 | + if (err) |
---|
| 3748 | + return err; |
---|
| 3749 | + ptr = (void *)(long)addr + off; |
---|
| 3750 | + |
---|
| 3751 | + switch (size) { |
---|
| 3752 | + case sizeof(u8): |
---|
| 3753 | + *val = (u64)*(u8 *)ptr; |
---|
| 3754 | + break; |
---|
| 3755 | + case sizeof(u16): |
---|
| 3756 | + *val = (u64)*(u16 *)ptr; |
---|
| 3757 | + break; |
---|
| 3758 | + case sizeof(u32): |
---|
| 3759 | + *val = (u64)*(u32 *)ptr; |
---|
| 3760 | + break; |
---|
| 3761 | + case sizeof(u64): |
---|
| 3762 | + *val = *(u64 *)ptr; |
---|
| 3763 | + break; |
---|
| 3764 | + default: |
---|
| 3765 | + return -EINVAL; |
---|
| 3766 | + } |
---|
| 3767 | + return 0; |
---|
| 3768 | +} |
---|
| 3769 | + |
---|
| 3770 | +static int check_ptr_to_btf_access(struct bpf_verifier_env *env, |
---|
| 3771 | + struct bpf_reg_state *regs, |
---|
| 3772 | + int regno, int off, int size, |
---|
| 3773 | + enum bpf_access_type atype, |
---|
| 3774 | + int value_regno) |
---|
| 3775 | +{ |
---|
| 3776 | + struct bpf_reg_state *reg = regs + regno; |
---|
| 3777 | + const struct btf_type *t = btf_type_by_id(btf_vmlinux, reg->btf_id); |
---|
| 3778 | + const char *tname = btf_name_by_offset(btf_vmlinux, t->name_off); |
---|
| 3779 | + u32 btf_id; |
---|
| 3780 | + int ret; |
---|
| 3781 | + |
---|
| 3782 | + if (off < 0) { |
---|
| 3783 | + verbose(env, |
---|
| 3784 | + "R%d is ptr_%s invalid negative access: off=%d\n", |
---|
| 3785 | + regno, tname, off); |
---|
| 3786 | + return -EACCES; |
---|
| 3787 | + } |
---|
| 3788 | + if (!tnum_is_const(reg->var_off) || reg->var_off.value) { |
---|
| 3789 | + char tn_buf[48]; |
---|
| 3790 | + |
---|
| 3791 | + tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); |
---|
| 3792 | + verbose(env, |
---|
| 3793 | + "R%d is ptr_%s invalid variable offset: off=%d, var_off=%s\n", |
---|
| 3794 | + regno, tname, off, tn_buf); |
---|
| 3795 | + return -EACCES; |
---|
| 3796 | + } |
---|
| 3797 | + |
---|
| 3798 | + if (env->ops->btf_struct_access) { |
---|
| 3799 | + ret = env->ops->btf_struct_access(&env->log, t, off, size, |
---|
| 3800 | + atype, &btf_id); |
---|
| 3801 | + } else { |
---|
| 3802 | + if (atype != BPF_READ) { |
---|
| 3803 | + verbose(env, "only read is supported\n"); |
---|
| 3804 | + return -EACCES; |
---|
| 3805 | + } |
---|
| 3806 | + |
---|
| 3807 | + ret = btf_struct_access(&env->log, t, off, size, atype, |
---|
| 3808 | + &btf_id); |
---|
| 3809 | + } |
---|
| 3810 | + |
---|
| 3811 | + if (ret < 0) |
---|
| 3812 | + return ret; |
---|
| 3813 | + |
---|
| 3814 | + if (atype == BPF_READ && value_regno >= 0) |
---|
| 3815 | + mark_btf_ld_reg(env, regs, value_regno, ret, btf_id); |
---|
| 3816 | + |
---|
| 3817 | + return 0; |
---|
| 3818 | +} |
---|
| 3819 | + |
---|
| 3820 | +static int check_ptr_to_map_access(struct bpf_verifier_env *env, |
---|
| 3821 | + struct bpf_reg_state *regs, |
---|
| 3822 | + int regno, int off, int size, |
---|
| 3823 | + enum bpf_access_type atype, |
---|
| 3824 | + int value_regno) |
---|
| 3825 | +{ |
---|
| 3826 | + struct bpf_reg_state *reg = regs + regno; |
---|
| 3827 | + struct bpf_map *map = reg->map_ptr; |
---|
| 3828 | + const struct btf_type *t; |
---|
| 3829 | + const char *tname; |
---|
| 3830 | + u32 btf_id; |
---|
| 3831 | + int ret; |
---|
| 3832 | + |
---|
| 3833 | + if (!btf_vmlinux) { |
---|
| 3834 | + verbose(env, "map_ptr access not supported without CONFIG_DEBUG_INFO_BTF\n"); |
---|
| 3835 | + return -ENOTSUPP; |
---|
| 3836 | + } |
---|
| 3837 | + |
---|
| 3838 | + if (!map->ops->map_btf_id || !*map->ops->map_btf_id) { |
---|
| 3839 | + verbose(env, "map_ptr access not supported for map type %d\n", |
---|
| 3840 | + map->map_type); |
---|
| 3841 | + return -ENOTSUPP; |
---|
| 3842 | + } |
---|
| 3843 | + |
---|
| 3844 | + t = btf_type_by_id(btf_vmlinux, *map->ops->map_btf_id); |
---|
| 3845 | + tname = btf_name_by_offset(btf_vmlinux, t->name_off); |
---|
| 3846 | + |
---|
| 3847 | + if (!env->allow_ptr_to_map_access) { |
---|
| 3848 | + verbose(env, |
---|
| 3849 | + "%s access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n", |
---|
| 3850 | + tname); |
---|
| 3851 | + return -EPERM; |
---|
| 3852 | + } |
---|
| 3853 | + |
---|
| 3854 | + if (off < 0) { |
---|
| 3855 | + verbose(env, "R%d is %s invalid negative access: off=%d\n", |
---|
| 3856 | + regno, tname, off); |
---|
| 3857 | + return -EACCES; |
---|
| 3858 | + } |
---|
| 3859 | + |
---|
| 3860 | + if (atype != BPF_READ) { |
---|
| 3861 | + verbose(env, "only read from %s is supported\n", tname); |
---|
| 3862 | + return -EACCES; |
---|
| 3863 | + } |
---|
| 3864 | + |
---|
| 3865 | + ret = btf_struct_access(&env->log, t, off, size, atype, &btf_id); |
---|
| 3866 | + if (ret < 0) |
---|
| 3867 | + return ret; |
---|
| 3868 | + |
---|
| 3869 | + if (value_regno >= 0) |
---|
| 3870 | + mark_btf_ld_reg(env, regs, value_regno, ret, btf_id); |
---|
| 3871 | + |
---|
| 3872 | + return 0; |
---|
| 3873 | +} |
---|
| 3874 | + |
---|
| 3875 | +/* Check that the stack access at the given offset is within bounds. The |
---|
| 3876 | + * maximum valid offset is -1. |
---|
| 3877 | + * |
---|
| 3878 | + * The minimum valid offset is -MAX_BPF_STACK for writes, and |
---|
| 3879 | + * -state->allocated_stack for reads. |
---|
| 3880 | + */ |
---|
| 3881 | +static int check_stack_slot_within_bounds(int off, |
---|
| 3882 | + struct bpf_func_state *state, |
---|
| 3883 | + enum bpf_access_type t) |
---|
| 3884 | +{ |
---|
| 3885 | + int min_valid_off; |
---|
| 3886 | + |
---|
| 3887 | + if (t == BPF_WRITE) |
---|
| 3888 | + min_valid_off = -MAX_BPF_STACK; |
---|
| 3889 | + else |
---|
| 3890 | + min_valid_off = -state->allocated_stack; |
---|
| 3891 | + |
---|
| 3892 | + if (off < min_valid_off || off > -1) |
---|
| 3893 | + return -EACCES; |
---|
| 3894 | + return 0; |
---|
| 3895 | +} |
---|
| 3896 | + |
---|
| 3897 | +/* Check that the stack access at 'regno + off' falls within the maximum stack |
---|
| 3898 | + * bounds. |
---|
| 3899 | + * |
---|
| 3900 | + * 'off' includes `regno->offset`, but not its dynamic part (if any). |
---|
| 3901 | + */ |
---|
| 3902 | +static int check_stack_access_within_bounds( |
---|
| 3903 | + struct bpf_verifier_env *env, |
---|
| 3904 | + int regno, int off, int access_size, |
---|
| 3905 | + enum stack_access_src src, enum bpf_access_type type) |
---|
| 3906 | +{ |
---|
| 3907 | + struct bpf_reg_state *regs = cur_regs(env); |
---|
| 3908 | + struct bpf_reg_state *reg = regs + regno; |
---|
| 3909 | + struct bpf_func_state *state = func(env, reg); |
---|
| 3910 | + int min_off, max_off; |
---|
| 3911 | + int err; |
---|
| 3912 | + char *err_extra; |
---|
| 3913 | + |
---|
| 3914 | + if (src == ACCESS_HELPER) |
---|
| 3915 | + /* We don't know if helpers are reading or writing (or both). */ |
---|
| 3916 | + err_extra = " indirect access to"; |
---|
| 3917 | + else if (type == BPF_READ) |
---|
| 3918 | + err_extra = " read from"; |
---|
| 3919 | + else |
---|
| 3920 | + err_extra = " write to"; |
---|
| 3921 | + |
---|
| 3922 | + if (tnum_is_const(reg->var_off)) { |
---|
| 3923 | + min_off = reg->var_off.value + off; |
---|
| 3924 | + if (access_size > 0) |
---|
| 3925 | + max_off = min_off + access_size - 1; |
---|
| 3926 | + else |
---|
| 3927 | + max_off = min_off; |
---|
| 3928 | + } else { |
---|
| 3929 | + if (reg->smax_value >= BPF_MAX_VAR_OFF || |
---|
| 3930 | + reg->smin_value <= -BPF_MAX_VAR_OFF) { |
---|
| 3931 | + verbose(env, "invalid unbounded variable-offset%s stack R%d\n", |
---|
| 3932 | + err_extra, regno); |
---|
| 3933 | + return -EACCES; |
---|
| 3934 | + } |
---|
| 3935 | + min_off = reg->smin_value + off; |
---|
| 3936 | + if (access_size > 0) |
---|
| 3937 | + max_off = reg->smax_value + off + access_size - 1; |
---|
| 3938 | + else |
---|
| 3939 | + max_off = min_off; |
---|
| 3940 | + } |
---|
| 3941 | + |
---|
| 3942 | + err = check_stack_slot_within_bounds(min_off, state, type); |
---|
| 3943 | + if (!err) |
---|
| 3944 | + err = check_stack_slot_within_bounds(max_off, state, type); |
---|
| 3945 | + |
---|
| 3946 | + if (err) { |
---|
| 3947 | + if (tnum_is_const(reg->var_off)) { |
---|
| 3948 | + verbose(env, "invalid%s stack R%d off=%d size=%d\n", |
---|
| 3949 | + err_extra, regno, off, access_size); |
---|
| 3950 | + } else { |
---|
| 3951 | + char tn_buf[48]; |
---|
| 3952 | + |
---|
| 3953 | + tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); |
---|
| 3954 | + verbose(env, "invalid variable-offset%s stack R%d var_off=%s size=%d\n", |
---|
| 3955 | + err_extra, regno, tn_buf, access_size); |
---|
| 3956 | + } |
---|
| 3957 | + } |
---|
| 3958 | + return err; |
---|
1610 | 3959 | } |
---|
1611 | 3960 | |
---|
1612 | 3961 | /* check whether memory at (regno + off) is accessible for t = (read | write) |
---|
.. | .. |
---|
1642 | 3991 | verbose(env, "R%d leaks addr into map\n", value_regno); |
---|
1643 | 3992 | return -EACCES; |
---|
1644 | 3993 | } |
---|
1645 | | - |
---|
| 3994 | + err = check_map_access_type(env, regno, off, size, t); |
---|
| 3995 | + if (err) |
---|
| 3996 | + return err; |
---|
1646 | 3997 | err = check_map_access(env, regno, off, size, false); |
---|
| 3998 | + if (!err && t == BPF_READ && value_regno >= 0) { |
---|
| 3999 | + struct bpf_map *map = reg->map_ptr; |
---|
| 4000 | + |
---|
| 4001 | + /* if map is read-only, track its contents as scalars */ |
---|
| 4002 | + if (tnum_is_const(reg->var_off) && |
---|
| 4003 | + bpf_map_is_rdonly(map) && |
---|
| 4004 | + map->ops->map_direct_value_addr) { |
---|
| 4005 | + int map_off = off + reg->var_off.value; |
---|
| 4006 | + u64 val = 0; |
---|
| 4007 | + |
---|
| 4008 | + err = bpf_map_direct_read(map, map_off, size, |
---|
| 4009 | + &val); |
---|
| 4010 | + if (err) |
---|
| 4011 | + return err; |
---|
| 4012 | + |
---|
| 4013 | + regs[value_regno].type = SCALAR_VALUE; |
---|
| 4014 | + __mark_reg_known(®s[value_regno], val); |
---|
| 4015 | + } else { |
---|
| 4016 | + mark_reg_unknown(env, regs, value_regno); |
---|
| 4017 | + } |
---|
| 4018 | + } |
---|
| 4019 | + } else if (reg->type == PTR_TO_MEM) { |
---|
| 4020 | + if (t == BPF_WRITE && value_regno >= 0 && |
---|
| 4021 | + is_pointer_value(env, value_regno)) { |
---|
| 4022 | + verbose(env, "R%d leaks addr into mem\n", value_regno); |
---|
| 4023 | + return -EACCES; |
---|
| 4024 | + } |
---|
| 4025 | + err = check_mem_region_access(env, regno, off, size, |
---|
| 4026 | + reg->mem_size, false); |
---|
1647 | 4027 | if (!err && t == BPF_READ && value_regno >= 0) |
---|
1648 | 4028 | mark_reg_unknown(env, regs, value_regno); |
---|
1649 | | - |
---|
1650 | 4029 | } else if (reg->type == PTR_TO_CTX) { |
---|
1651 | 4030 | enum bpf_reg_type reg_type = SCALAR_VALUE; |
---|
| 4031 | + u32 btf_id = 0; |
---|
1652 | 4032 | |
---|
1653 | 4033 | if (t == BPF_WRITE && value_regno >= 0 && |
---|
1654 | 4034 | is_pointer_value(env, value_regno)) { |
---|
.. | .. |
---|
1660 | 4040 | if (err < 0) |
---|
1661 | 4041 | return err; |
---|
1662 | 4042 | |
---|
1663 | | - err = check_ctx_access(env, insn_idx, off, size, t, ®_type); |
---|
| 4043 | + err = check_ctx_access(env, insn_idx, off, size, t, ®_type, &btf_id); |
---|
| 4044 | + if (err) |
---|
| 4045 | + verbose_linfo(env, insn_idx, "; "); |
---|
1664 | 4046 | if (!err && t == BPF_READ && value_regno >= 0) { |
---|
1665 | 4047 | /* ctx access returns either a scalar, or a |
---|
1666 | 4048 | * PTR_TO_PACKET[_META,_END]. In the latter |
---|
1667 | 4049 | * case, we know the offset is zero. |
---|
1668 | 4050 | */ |
---|
1669 | | - if (reg_type == SCALAR_VALUE) |
---|
| 4051 | + if (reg_type == SCALAR_VALUE) { |
---|
1670 | 4052 | mark_reg_unknown(env, regs, value_regno); |
---|
1671 | | - else |
---|
| 4053 | + } else { |
---|
1672 | 4054 | mark_reg_known_zero(env, regs, |
---|
1673 | 4055 | value_regno); |
---|
| 4056 | + if (reg_type_may_be_null(reg_type)) |
---|
| 4057 | + regs[value_regno].id = ++env->id_gen; |
---|
| 4058 | + /* A load of ctx field could have different |
---|
| 4059 | + * actual load size with the one encoded in the |
---|
| 4060 | + * insn. When the dst is PTR, it is for sure not |
---|
| 4061 | + * a sub-register. |
---|
| 4062 | + */ |
---|
| 4063 | + regs[value_regno].subreg_def = DEF_NOT_SUBREG; |
---|
| 4064 | + if (reg_type == PTR_TO_BTF_ID || |
---|
| 4065 | + reg_type == PTR_TO_BTF_ID_OR_NULL) |
---|
| 4066 | + regs[value_regno].btf_id = btf_id; |
---|
| 4067 | + } |
---|
1674 | 4068 | regs[value_regno].type = reg_type; |
---|
1675 | 4069 | } |
---|
1676 | 4070 | |
---|
1677 | 4071 | } else if (reg->type == PTR_TO_STACK) { |
---|
1678 | | - off += reg->var_off.value; |
---|
1679 | | - err = check_stack_access(env, reg, off, size); |
---|
| 4072 | + /* Basic bounds checks. */ |
---|
| 4073 | + err = check_stack_access_within_bounds(env, regno, off, size, ACCESS_DIRECT, t); |
---|
1680 | 4074 | if (err) |
---|
1681 | 4075 | return err; |
---|
1682 | 4076 | |
---|
.. | .. |
---|
1685 | 4079 | if (err) |
---|
1686 | 4080 | return err; |
---|
1687 | 4081 | |
---|
1688 | | - if (t == BPF_WRITE) |
---|
1689 | | - err = check_stack_write(env, state, off, size, |
---|
1690 | | - value_regno, insn_idx); |
---|
1691 | | - else |
---|
1692 | | - err = check_stack_read(env, state, off, size, |
---|
| 4082 | + if (t == BPF_READ) |
---|
| 4083 | + err = check_stack_read(env, regno, off, size, |
---|
1693 | 4084 | value_regno); |
---|
| 4085 | + else |
---|
| 4086 | + err = check_stack_write(env, regno, off, size, |
---|
| 4087 | + value_regno, insn_idx); |
---|
1694 | 4088 | } else if (reg_is_pkt_pointer(reg)) { |
---|
1695 | 4089 | if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) { |
---|
1696 | 4090 | verbose(env, "cannot write into packet\n"); |
---|
.. | .. |
---|
1703 | 4097 | return -EACCES; |
---|
1704 | 4098 | } |
---|
1705 | 4099 | err = check_packet_access(env, regno, off, size, false); |
---|
| 4100 | + if (!err && t == BPF_READ && value_regno >= 0) |
---|
| 4101 | + mark_reg_unknown(env, regs, value_regno); |
---|
| 4102 | + } else if (reg->type == PTR_TO_FLOW_KEYS) { |
---|
| 4103 | + if (t == BPF_WRITE && value_regno >= 0 && |
---|
| 4104 | + is_pointer_value(env, value_regno)) { |
---|
| 4105 | + verbose(env, "R%d leaks addr into flow keys\n", |
---|
| 4106 | + value_regno); |
---|
| 4107 | + return -EACCES; |
---|
| 4108 | + } |
---|
| 4109 | + |
---|
| 4110 | + err = check_flow_keys_access(env, off, size); |
---|
| 4111 | + if (!err && t == BPF_READ && value_regno >= 0) |
---|
| 4112 | + mark_reg_unknown(env, regs, value_regno); |
---|
| 4113 | + } else if (type_is_sk_pointer(reg->type)) { |
---|
| 4114 | + if (t == BPF_WRITE) { |
---|
| 4115 | + verbose(env, "R%d cannot write into %s\n", |
---|
| 4116 | + regno, reg_type_str[reg->type]); |
---|
| 4117 | + return -EACCES; |
---|
| 4118 | + } |
---|
| 4119 | + err = check_sock_access(env, insn_idx, regno, off, size, t); |
---|
| 4120 | + if (!err && value_regno >= 0) |
---|
| 4121 | + mark_reg_unknown(env, regs, value_regno); |
---|
| 4122 | + } else if (reg->type == PTR_TO_TP_BUFFER) { |
---|
| 4123 | + err = check_tp_buffer_access(env, reg, regno, off, size); |
---|
| 4124 | + if (!err && t == BPF_READ && value_regno >= 0) |
---|
| 4125 | + mark_reg_unknown(env, regs, value_regno); |
---|
| 4126 | + } else if (reg->type == PTR_TO_BTF_ID) { |
---|
| 4127 | + err = check_ptr_to_btf_access(env, regs, regno, off, size, t, |
---|
| 4128 | + value_regno); |
---|
| 4129 | + } else if (reg->type == CONST_PTR_TO_MAP) { |
---|
| 4130 | + err = check_ptr_to_map_access(env, regs, regno, off, size, t, |
---|
| 4131 | + value_regno); |
---|
| 4132 | + } else if (reg->type == PTR_TO_RDONLY_BUF) { |
---|
| 4133 | + if (t == BPF_WRITE) { |
---|
| 4134 | + verbose(env, "R%d cannot write into %s\n", |
---|
| 4135 | + regno, reg_type_str[reg->type]); |
---|
| 4136 | + return -EACCES; |
---|
| 4137 | + } |
---|
| 4138 | + err = check_buffer_access(env, reg, regno, off, size, false, |
---|
| 4139 | + "rdonly", |
---|
| 4140 | + &env->prog->aux->max_rdonly_access); |
---|
| 4141 | + if (!err && value_regno >= 0) |
---|
| 4142 | + mark_reg_unknown(env, regs, value_regno); |
---|
| 4143 | + } else if (reg->type == PTR_TO_RDWR_BUF) { |
---|
| 4144 | + err = check_buffer_access(env, reg, regno, off, size, false, |
---|
| 4145 | + "rdwr", |
---|
| 4146 | + &env->prog->aux->max_rdwr_access); |
---|
1706 | 4147 | if (!err && t == BPF_READ && value_regno >= 0) |
---|
1707 | 4148 | mark_reg_unknown(env, regs, value_regno); |
---|
1708 | 4149 | } else { |
---|
.. | .. |
---|
1745 | 4186 | } |
---|
1746 | 4187 | |
---|
1747 | 4188 | if (is_ctx_reg(env, insn->dst_reg) || |
---|
1748 | | - is_pkt_reg(env, insn->dst_reg)) { |
---|
| 4189 | + is_pkt_reg(env, insn->dst_reg) || |
---|
| 4190 | + is_flow_key_reg(env, insn->dst_reg) || |
---|
| 4191 | + is_sk_reg(env, insn->dst_reg)) { |
---|
1749 | 4192 | verbose(env, "BPF_XADD stores into R%d %s is not allowed\n", |
---|
1750 | | - insn->dst_reg, is_ctx_reg(env, insn->dst_reg) ? |
---|
1751 | | - "context" : "packet"); |
---|
| 4193 | + insn->dst_reg, |
---|
| 4194 | + reg_type_str[reg_state(env, insn->dst_reg)->type]); |
---|
1752 | 4195 | return -EACCES; |
---|
1753 | 4196 | } |
---|
1754 | 4197 | |
---|
.. | .. |
---|
1763 | 4206 | BPF_SIZE(insn->code), BPF_WRITE, -1, true); |
---|
1764 | 4207 | } |
---|
1765 | 4208 | |
---|
1766 | | -static int __check_stack_boundary(struct bpf_verifier_env *env, u32 regno, |
---|
1767 | | - int off, int access_size, |
---|
1768 | | - bool zero_size_allowed) |
---|
1769 | | -{ |
---|
1770 | | - struct bpf_reg_state *reg = cur_regs(env) + regno; |
---|
1771 | | - |
---|
1772 | | - if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 || |
---|
1773 | | - access_size < 0 || (access_size == 0 && !zero_size_allowed)) { |
---|
1774 | | - if (tnum_is_const(reg->var_off)) { |
---|
1775 | | - verbose(env, "invalid stack type R%d off=%d access_size=%d\n", |
---|
1776 | | - regno, off, access_size); |
---|
1777 | | - } else { |
---|
1778 | | - char tn_buf[48]; |
---|
1779 | | - |
---|
1780 | | - tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); |
---|
1781 | | - verbose(env, "invalid stack type R%d var_off=%s access_size=%d\n", |
---|
1782 | | - regno, tn_buf, access_size); |
---|
1783 | | - } |
---|
1784 | | - return -EACCES; |
---|
1785 | | - } |
---|
1786 | | - return 0; |
---|
1787 | | -} |
---|
1788 | | - |
---|
1789 | | -/* when register 'regno' is passed into function that will read 'access_size' |
---|
1790 | | - * bytes from that pointer, make sure that it's within stack boundary |
---|
1791 | | - * and all elements of stack are initialized. |
---|
1792 | | - * Unlike most pointer bounds-checking functions, this one doesn't take an |
---|
1793 | | - * 'off' argument, so it has to add in reg->off itself. |
---|
| 4209 | +/* When register 'regno' is used to read the stack (either directly or through |
---|
| 4210 | + * a helper function) make sure that it's within stack boundary and, depending |
---|
| 4211 | + * on the access type, that all elements of the stack are initialized. |
---|
| 4212 | + * |
---|
| 4213 | + * 'off' includes 'regno->off', but not its dynamic part (if any). |
---|
| 4214 | + * |
---|
| 4215 | + * All registers that have been spilled on the stack in the slots within the |
---|
| 4216 | + * read offsets are marked as read. |
---|
1794 | 4217 | */ |
---|
1795 | | -static int check_stack_boundary(struct bpf_verifier_env *env, int regno, |
---|
1796 | | - int access_size, bool zero_size_allowed, |
---|
1797 | | - struct bpf_call_arg_meta *meta) |
---|
| 4218 | +static int check_stack_range_initialized( |
---|
| 4219 | + struct bpf_verifier_env *env, int regno, int off, |
---|
| 4220 | + int access_size, bool zero_size_allowed, |
---|
| 4221 | + enum stack_access_src type, struct bpf_call_arg_meta *meta) |
---|
1798 | 4222 | { |
---|
1799 | | - struct bpf_reg_state *reg = cur_regs(env) + regno; |
---|
| 4223 | + struct bpf_reg_state *reg = reg_state(env, regno); |
---|
1800 | 4224 | struct bpf_func_state *state = func(env, reg); |
---|
1801 | 4225 | int err, min_off, max_off, i, j, slot, spi; |
---|
| 4226 | + char *err_extra = type == ACCESS_HELPER ? " indirect" : ""; |
---|
| 4227 | + enum bpf_access_type bounds_check_type; |
---|
| 4228 | + /* Some accesses can write anything into the stack, others are |
---|
| 4229 | + * read-only. |
---|
| 4230 | + */ |
---|
| 4231 | + bool clobber = false; |
---|
1802 | 4232 | |
---|
1803 | | - if (reg->type != PTR_TO_STACK) { |
---|
1804 | | - /* Allow zero-byte read from NULL, regardless of pointer type */ |
---|
1805 | | - if (zero_size_allowed && access_size == 0 && |
---|
1806 | | - register_is_null(reg)) |
---|
1807 | | - return 0; |
---|
1808 | | - |
---|
1809 | | - verbose(env, "R%d type=%s expected=%s\n", regno, |
---|
1810 | | - reg_type_str[reg->type], |
---|
1811 | | - reg_type_str[PTR_TO_STACK]); |
---|
| 4233 | + if (access_size == 0 && !zero_size_allowed) { |
---|
| 4234 | + verbose(env, "invalid zero-sized read\n"); |
---|
1812 | 4235 | return -EACCES; |
---|
1813 | 4236 | } |
---|
1814 | 4237 | |
---|
| 4238 | + if (type == ACCESS_HELPER) { |
---|
| 4239 | + /* The bounds checks for writes are more permissive than for |
---|
| 4240 | + * reads. However, if raw_mode is not set, we'll do extra |
---|
| 4241 | + * checks below. |
---|
| 4242 | + */ |
---|
| 4243 | + bounds_check_type = BPF_WRITE; |
---|
| 4244 | + clobber = true; |
---|
| 4245 | + } else { |
---|
| 4246 | + bounds_check_type = BPF_READ; |
---|
| 4247 | + } |
---|
| 4248 | + err = check_stack_access_within_bounds(env, regno, off, access_size, |
---|
| 4249 | + type, bounds_check_type); |
---|
| 4250 | + if (err) |
---|
| 4251 | + return err; |
---|
| 4252 | + |
---|
| 4253 | + |
---|
1815 | 4254 | if (tnum_is_const(reg->var_off)) { |
---|
1816 | | - min_off = max_off = reg->var_off.value + reg->off; |
---|
1817 | | - err = __check_stack_boundary(env, regno, min_off, access_size, |
---|
1818 | | - zero_size_allowed); |
---|
1819 | | - if (err) |
---|
1820 | | - return err; |
---|
| 4255 | + min_off = max_off = reg->var_off.value + off; |
---|
1821 | 4256 | } else { |
---|
1822 | 4257 | /* Variable offset is prohibited for unprivileged mode for |
---|
1823 | 4258 | * simplicity since it requires corresponding support in |
---|
1824 | 4259 | * Spectre masking for stack ALU. |
---|
1825 | 4260 | * See also retrieve_ptr_limit(). |
---|
1826 | 4261 | */ |
---|
1827 | | - if (!env->allow_ptr_leaks) { |
---|
| 4262 | + if (!env->bypass_spec_v1) { |
---|
1828 | 4263 | char tn_buf[48]; |
---|
1829 | 4264 | |
---|
1830 | 4265 | tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); |
---|
1831 | | - verbose(env, "R%d indirect variable offset stack access prohibited for !root, var_off=%s\n", |
---|
1832 | | - regno, tn_buf); |
---|
| 4266 | + verbose(env, "R%d%s variable offset stack access prohibited for !root, var_off=%s\n", |
---|
| 4267 | + regno, err_extra, tn_buf); |
---|
1833 | 4268 | return -EACCES; |
---|
1834 | 4269 | } |
---|
1835 | 4270 | /* Only initialized buffer on stack is allowed to be accessed |
---|
.. | .. |
---|
1841 | 4276 | if (meta && meta->raw_mode) |
---|
1842 | 4277 | meta = NULL; |
---|
1843 | 4278 | |
---|
1844 | | - if (reg->smax_value >= BPF_MAX_VAR_OFF || |
---|
1845 | | - reg->smax_value <= -BPF_MAX_VAR_OFF) { |
---|
1846 | | - verbose(env, "R%d unbounded indirect variable offset stack access\n", |
---|
1847 | | - regno); |
---|
1848 | | - return -EACCES; |
---|
1849 | | - } |
---|
1850 | | - min_off = reg->smin_value + reg->off; |
---|
1851 | | - max_off = reg->smax_value + reg->off; |
---|
1852 | | - err = __check_stack_boundary(env, regno, min_off, access_size, |
---|
1853 | | - zero_size_allowed); |
---|
1854 | | - if (err) { |
---|
1855 | | - verbose(env, "R%d min value is outside of stack bound\n", |
---|
1856 | | - regno); |
---|
1857 | | - return err; |
---|
1858 | | - } |
---|
1859 | | - err = __check_stack_boundary(env, regno, max_off, access_size, |
---|
1860 | | - zero_size_allowed); |
---|
1861 | | - if (err) { |
---|
1862 | | - verbose(env, "R%d max value is outside of stack bound\n", |
---|
1863 | | - regno); |
---|
1864 | | - return err; |
---|
1865 | | - } |
---|
| 4279 | + min_off = reg->smin_value + off; |
---|
| 4280 | + max_off = reg->smax_value + off; |
---|
1866 | 4281 | } |
---|
1867 | 4282 | |
---|
1868 | 4283 | if (meta && meta->raw_mode) { |
---|
.. | .. |
---|
1882 | 4297 | if (*stype == STACK_MISC) |
---|
1883 | 4298 | goto mark; |
---|
1884 | 4299 | if (*stype == STACK_ZERO) { |
---|
1885 | | - /* helper can write anything into the stack */ |
---|
1886 | | - *stype = STACK_MISC; |
---|
| 4300 | + if (clobber) { |
---|
| 4301 | + /* helper can write anything into the stack */ |
---|
| 4302 | + *stype = STACK_MISC; |
---|
| 4303 | + } |
---|
1887 | 4304 | goto mark; |
---|
1888 | 4305 | } |
---|
1889 | | - if (state->stack[spi].slot_type[0] == STACK_SPILL && |
---|
1890 | | - state->stack[spi].spilled_ptr.type == SCALAR_VALUE) { |
---|
1891 | | - __mark_reg_unknown(&state->stack[spi].spilled_ptr); |
---|
1892 | | - for (j = 0; j < BPF_REG_SIZE; j++) |
---|
1893 | | - state->stack[spi].slot_type[j] = STACK_MISC; |
---|
| 4306 | + |
---|
| 4307 | + if (is_spilled_reg(&state->stack[spi]) && |
---|
| 4308 | + state->stack[spi].spilled_ptr.type == PTR_TO_BTF_ID) |
---|
| 4309 | + goto mark; |
---|
| 4310 | + |
---|
| 4311 | + if (is_spilled_reg(&state->stack[spi]) && |
---|
| 4312 | + (state->stack[spi].spilled_ptr.type == SCALAR_VALUE || |
---|
| 4313 | + env->allow_ptr_leaks)) { |
---|
| 4314 | + if (clobber) { |
---|
| 4315 | + __mark_reg_unknown(env, &state->stack[spi].spilled_ptr); |
---|
| 4316 | + for (j = 0; j < BPF_REG_SIZE; j++) |
---|
| 4317 | + scrub_spilled_slot(&state->stack[spi].slot_type[j]); |
---|
| 4318 | + } |
---|
1894 | 4319 | goto mark; |
---|
1895 | 4320 | } |
---|
1896 | 4321 | |
---|
1897 | 4322 | err: |
---|
1898 | 4323 | if (tnum_is_const(reg->var_off)) { |
---|
1899 | | - verbose(env, "invalid indirect read from stack off %d+%d size %d\n", |
---|
1900 | | - min_off, i - min_off, access_size); |
---|
| 4324 | + verbose(env, "invalid%s read from stack R%d off %d+%d size %d\n", |
---|
| 4325 | + err_extra, regno, min_off, i - min_off, access_size); |
---|
1901 | 4326 | } else { |
---|
1902 | 4327 | char tn_buf[48]; |
---|
1903 | 4328 | |
---|
1904 | 4329 | tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); |
---|
1905 | | - verbose(env, "invalid indirect read from stack var_off %s+%d size %d\n", |
---|
1906 | | - tn_buf, i - min_off, access_size); |
---|
| 4330 | + verbose(env, "invalid%s read from stack R%d var_off %s+%d size %d\n", |
---|
| 4331 | + err_extra, regno, tn_buf, i - min_off, access_size); |
---|
1907 | 4332 | } |
---|
1908 | 4333 | return -EACCES; |
---|
1909 | 4334 | mark: |
---|
.. | .. |
---|
1911 | 4336 | * the whole slot to be marked as 'read' |
---|
1912 | 4337 | */ |
---|
1913 | 4338 | mark_reg_read(env, &state->stack[spi].spilled_ptr, |
---|
1914 | | - state->stack[spi].spilled_ptr.parent); |
---|
| 4339 | + state->stack[spi].spilled_ptr.parent, |
---|
| 4340 | + REG_LIVE_READ64); |
---|
1915 | 4341 | } |
---|
1916 | 4342 | return update_stack_depth(env, state, min_off); |
---|
1917 | 4343 | } |
---|
.. | .. |
---|
1928 | 4354 | return check_packet_access(env, regno, reg->off, access_size, |
---|
1929 | 4355 | zero_size_allowed); |
---|
1930 | 4356 | case PTR_TO_MAP_VALUE: |
---|
| 4357 | + if (check_map_access_type(env, regno, reg->off, access_size, |
---|
| 4358 | + meta && meta->raw_mode ? BPF_WRITE : |
---|
| 4359 | + BPF_READ)) |
---|
| 4360 | + return -EACCES; |
---|
1931 | 4361 | return check_map_access(env, regno, reg->off, access_size, |
---|
1932 | 4362 | zero_size_allowed); |
---|
1933 | | - default: /* scalar_value|ptr_to_stack or invalid ptr */ |
---|
1934 | | - return check_stack_boundary(env, regno, access_size, |
---|
1935 | | - zero_size_allowed, meta); |
---|
| 4363 | + case PTR_TO_MEM: |
---|
| 4364 | + return check_mem_region_access(env, regno, reg->off, |
---|
| 4365 | + access_size, reg->mem_size, |
---|
| 4366 | + zero_size_allowed); |
---|
| 4367 | + case PTR_TO_RDONLY_BUF: |
---|
| 4368 | + if (meta && meta->raw_mode) |
---|
| 4369 | + return -EACCES; |
---|
| 4370 | + return check_buffer_access(env, reg, regno, reg->off, |
---|
| 4371 | + access_size, zero_size_allowed, |
---|
| 4372 | + "rdonly", |
---|
| 4373 | + &env->prog->aux->max_rdonly_access); |
---|
| 4374 | + case PTR_TO_RDWR_BUF: |
---|
| 4375 | + return check_buffer_access(env, reg, regno, reg->off, |
---|
| 4376 | + access_size, zero_size_allowed, |
---|
| 4377 | + "rdwr", |
---|
| 4378 | + &env->prog->aux->max_rdwr_access); |
---|
| 4379 | + case PTR_TO_STACK: |
---|
| 4380 | + return check_stack_range_initialized( |
---|
| 4381 | + env, |
---|
| 4382 | + regno, reg->off, access_size, |
---|
| 4383 | + zero_size_allowed, ACCESS_HELPER, meta); |
---|
| 4384 | + default: /* scalar_value or invalid ptr */ |
---|
| 4385 | + /* Allow zero-byte read from NULL, regardless of pointer type */ |
---|
| 4386 | + if (zero_size_allowed && access_size == 0 && |
---|
| 4387 | + register_is_null(reg)) |
---|
| 4388 | + return 0; |
---|
| 4389 | + |
---|
| 4390 | + verbose(env, "R%d type=%s expected=%s\n", regno, |
---|
| 4391 | + reg_type_str[reg->type], |
---|
| 4392 | + reg_type_str[PTR_TO_STACK]); |
---|
| 4393 | + return -EACCES; |
---|
1936 | 4394 | } |
---|
| 4395 | +} |
---|
| 4396 | + |
---|
| 4397 | +/* Implementation details: |
---|
| 4398 | + * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL |
---|
| 4399 | + * Two bpf_map_lookups (even with the same key) will have different reg->id. |
---|
| 4400 | + * For traditional PTR_TO_MAP_VALUE the verifier clears reg->id after |
---|
| 4401 | + * value_or_null->value transition, since the verifier only cares about |
---|
| 4402 | + * the range of access to valid map value pointer and doesn't care about actual |
---|
| 4403 | + * address of the map element. |
---|
| 4404 | + * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps |
---|
| 4405 | + * reg->id > 0 after value_or_null->value transition. By doing so |
---|
| 4406 | + * two bpf_map_lookups will be considered two different pointers that |
---|
| 4407 | + * point to different bpf_spin_locks. |
---|
| 4408 | + * The verifier allows taking only one bpf_spin_lock at a time to avoid |
---|
| 4409 | + * dead-locks. |
---|
| 4410 | + * Since only one bpf_spin_lock is allowed the checks are simpler than |
---|
| 4411 | + * reg_is_refcounted() logic. The verifier needs to remember only |
---|
| 4412 | + * one spin_lock instead of array of acquired_refs. |
---|
| 4413 | + * cur_state->active_spin_lock remembers which map value element got locked |
---|
| 4414 | + * and clears it after bpf_spin_unlock. |
---|
| 4415 | + */ |
---|
| 4416 | +static int process_spin_lock(struct bpf_verifier_env *env, int regno, |
---|
| 4417 | + bool is_lock) |
---|
| 4418 | +{ |
---|
| 4419 | + struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; |
---|
| 4420 | + struct bpf_verifier_state *cur = env->cur_state; |
---|
| 4421 | + bool is_const = tnum_is_const(reg->var_off); |
---|
| 4422 | + struct bpf_map *map = reg->map_ptr; |
---|
| 4423 | + u64 val = reg->var_off.value; |
---|
| 4424 | + |
---|
| 4425 | + if (!is_const) { |
---|
| 4426 | + verbose(env, |
---|
| 4427 | + "R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n", |
---|
| 4428 | + regno); |
---|
| 4429 | + return -EINVAL; |
---|
| 4430 | + } |
---|
| 4431 | + if (!map->btf) { |
---|
| 4432 | + verbose(env, |
---|
| 4433 | + "map '%s' has to have BTF in order to use bpf_spin_lock\n", |
---|
| 4434 | + map->name); |
---|
| 4435 | + return -EINVAL; |
---|
| 4436 | + } |
---|
| 4437 | + if (!map_value_has_spin_lock(map)) { |
---|
| 4438 | + if (map->spin_lock_off == -E2BIG) |
---|
| 4439 | + verbose(env, |
---|
| 4440 | + "map '%s' has more than one 'struct bpf_spin_lock'\n", |
---|
| 4441 | + map->name); |
---|
| 4442 | + else if (map->spin_lock_off == -ENOENT) |
---|
| 4443 | + verbose(env, |
---|
| 4444 | + "map '%s' doesn't have 'struct bpf_spin_lock'\n", |
---|
| 4445 | + map->name); |
---|
| 4446 | + else |
---|
| 4447 | + verbose(env, |
---|
| 4448 | + "map '%s' is not a struct type or bpf_spin_lock is mangled\n", |
---|
| 4449 | + map->name); |
---|
| 4450 | + return -EINVAL; |
---|
| 4451 | + } |
---|
| 4452 | + if (map->spin_lock_off != val + reg->off) { |
---|
| 4453 | + verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock'\n", |
---|
| 4454 | + val + reg->off); |
---|
| 4455 | + return -EINVAL; |
---|
| 4456 | + } |
---|
| 4457 | + if (is_lock) { |
---|
| 4458 | + if (cur->active_spin_lock) { |
---|
| 4459 | + verbose(env, |
---|
| 4460 | + "Locking two bpf_spin_locks are not allowed\n"); |
---|
| 4461 | + return -EINVAL; |
---|
| 4462 | + } |
---|
| 4463 | + cur->active_spin_lock = reg->id; |
---|
| 4464 | + } else { |
---|
| 4465 | + if (!cur->active_spin_lock) { |
---|
| 4466 | + verbose(env, "bpf_spin_unlock without taking a lock\n"); |
---|
| 4467 | + return -EINVAL; |
---|
| 4468 | + } |
---|
| 4469 | + if (cur->active_spin_lock != reg->id) { |
---|
| 4470 | + verbose(env, "bpf_spin_unlock of different lock\n"); |
---|
| 4471 | + return -EINVAL; |
---|
| 4472 | + } |
---|
| 4473 | + cur->active_spin_lock = 0; |
---|
| 4474 | + } |
---|
| 4475 | + return 0; |
---|
1937 | 4476 | } |
---|
1938 | 4477 | |
---|
1939 | 4478 | static bool arg_type_is_mem_ptr(enum bpf_arg_type type) |
---|
.. | .. |
---|
1949 | 4488 | type == ARG_CONST_SIZE_OR_ZERO; |
---|
1950 | 4489 | } |
---|
1951 | 4490 | |
---|
1952 | | -static int check_func_arg(struct bpf_verifier_env *env, u32 regno, |
---|
| 4491 | +static bool arg_type_is_alloc_size(enum bpf_arg_type type) |
---|
| 4492 | +{ |
---|
| 4493 | + return type == ARG_CONST_ALLOC_SIZE_OR_ZERO; |
---|
| 4494 | +} |
---|
| 4495 | + |
---|
| 4496 | +static bool arg_type_is_int_ptr(enum bpf_arg_type type) |
---|
| 4497 | +{ |
---|
| 4498 | + return type == ARG_PTR_TO_INT || |
---|
| 4499 | + type == ARG_PTR_TO_LONG; |
---|
| 4500 | +} |
---|
| 4501 | + |
---|
| 4502 | +static int int_ptr_type_to_size(enum bpf_arg_type type) |
---|
| 4503 | +{ |
---|
| 4504 | + if (type == ARG_PTR_TO_INT) |
---|
| 4505 | + return sizeof(u32); |
---|
| 4506 | + else if (type == ARG_PTR_TO_LONG) |
---|
| 4507 | + return sizeof(u64); |
---|
| 4508 | + |
---|
| 4509 | + return -EINVAL; |
---|
| 4510 | +} |
---|
| 4511 | + |
---|
| 4512 | +static int resolve_map_arg_type(struct bpf_verifier_env *env, |
---|
| 4513 | + const struct bpf_call_arg_meta *meta, |
---|
| 4514 | + enum bpf_arg_type *arg_type) |
---|
| 4515 | +{ |
---|
| 4516 | + if (!meta->map_ptr) { |
---|
| 4517 | + /* kernel subsystem misconfigured verifier */ |
---|
| 4518 | + verbose(env, "invalid map_ptr to access map->type\n"); |
---|
| 4519 | + return -EACCES; |
---|
| 4520 | + } |
---|
| 4521 | + |
---|
| 4522 | + switch (meta->map_ptr->map_type) { |
---|
| 4523 | + case BPF_MAP_TYPE_SOCKMAP: |
---|
| 4524 | + case BPF_MAP_TYPE_SOCKHASH: |
---|
| 4525 | + if (*arg_type == ARG_PTR_TO_MAP_VALUE) { |
---|
| 4526 | + *arg_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON; |
---|
| 4527 | + } else { |
---|
| 4528 | + verbose(env, "invalid arg_type for sockmap/sockhash\n"); |
---|
| 4529 | + return -EINVAL; |
---|
| 4530 | + } |
---|
| 4531 | + break; |
---|
| 4532 | + |
---|
| 4533 | + default: |
---|
| 4534 | + break; |
---|
| 4535 | + } |
---|
| 4536 | + return 0; |
---|
| 4537 | +} |
---|
| 4538 | + |
---|
| 4539 | +struct bpf_reg_types { |
---|
| 4540 | + const enum bpf_reg_type types[10]; |
---|
| 4541 | + u32 *btf_id; |
---|
| 4542 | +}; |
---|
| 4543 | + |
---|
| 4544 | +static const struct bpf_reg_types map_key_value_types = { |
---|
| 4545 | + .types = { |
---|
| 4546 | + PTR_TO_STACK, |
---|
| 4547 | + PTR_TO_PACKET, |
---|
| 4548 | + PTR_TO_PACKET_META, |
---|
| 4549 | + PTR_TO_MAP_VALUE, |
---|
| 4550 | + }, |
---|
| 4551 | +}; |
---|
| 4552 | + |
---|
| 4553 | +static const struct bpf_reg_types sock_types = { |
---|
| 4554 | + .types = { |
---|
| 4555 | + PTR_TO_SOCK_COMMON, |
---|
| 4556 | + PTR_TO_SOCKET, |
---|
| 4557 | + PTR_TO_TCP_SOCK, |
---|
| 4558 | + PTR_TO_XDP_SOCK, |
---|
| 4559 | + }, |
---|
| 4560 | +}; |
---|
| 4561 | + |
---|
| 4562 | +#ifdef CONFIG_NET |
---|
| 4563 | +static const struct bpf_reg_types btf_id_sock_common_types = { |
---|
| 4564 | + .types = { |
---|
| 4565 | + PTR_TO_SOCK_COMMON, |
---|
| 4566 | + PTR_TO_SOCKET, |
---|
| 4567 | + PTR_TO_TCP_SOCK, |
---|
| 4568 | + PTR_TO_XDP_SOCK, |
---|
| 4569 | + PTR_TO_BTF_ID, |
---|
| 4570 | + }, |
---|
| 4571 | + .btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON], |
---|
| 4572 | +}; |
---|
| 4573 | +#endif |
---|
| 4574 | + |
---|
| 4575 | +static const struct bpf_reg_types mem_types = { |
---|
| 4576 | + .types = { |
---|
| 4577 | + PTR_TO_STACK, |
---|
| 4578 | + PTR_TO_PACKET, |
---|
| 4579 | + PTR_TO_PACKET_META, |
---|
| 4580 | + PTR_TO_MAP_VALUE, |
---|
| 4581 | + PTR_TO_MEM, |
---|
| 4582 | + PTR_TO_RDONLY_BUF, |
---|
| 4583 | + PTR_TO_RDWR_BUF, |
---|
| 4584 | + }, |
---|
| 4585 | +}; |
---|
| 4586 | + |
---|
| 4587 | +static const struct bpf_reg_types int_ptr_types = { |
---|
| 4588 | + .types = { |
---|
| 4589 | + PTR_TO_STACK, |
---|
| 4590 | + PTR_TO_PACKET, |
---|
| 4591 | + PTR_TO_PACKET_META, |
---|
| 4592 | + PTR_TO_MAP_VALUE, |
---|
| 4593 | + }, |
---|
| 4594 | +}; |
---|
| 4595 | + |
---|
| 4596 | +static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } }; |
---|
| 4597 | +static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } }; |
---|
| 4598 | +static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } }; |
---|
| 4599 | +static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM } }; |
---|
| 4600 | +static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } }; |
---|
| 4601 | +static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } }; |
---|
| 4602 | +static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } }; |
---|
| 4603 | +static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_PERCPU_BTF_ID } }; |
---|
| 4604 | + |
---|
| 4605 | +static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { |
---|
| 4606 | + [ARG_PTR_TO_MAP_KEY] = &map_key_value_types, |
---|
| 4607 | + [ARG_PTR_TO_MAP_VALUE] = &map_key_value_types, |
---|
| 4608 | + [ARG_PTR_TO_UNINIT_MAP_VALUE] = &map_key_value_types, |
---|
| 4609 | + [ARG_PTR_TO_MAP_VALUE_OR_NULL] = &map_key_value_types, |
---|
| 4610 | + [ARG_CONST_SIZE] = &scalar_types, |
---|
| 4611 | + [ARG_CONST_SIZE_OR_ZERO] = &scalar_types, |
---|
| 4612 | + [ARG_CONST_ALLOC_SIZE_OR_ZERO] = &scalar_types, |
---|
| 4613 | + [ARG_CONST_MAP_PTR] = &const_map_ptr_types, |
---|
| 4614 | + [ARG_PTR_TO_CTX] = &context_types, |
---|
| 4615 | + [ARG_PTR_TO_CTX_OR_NULL] = &context_types, |
---|
| 4616 | + [ARG_PTR_TO_SOCK_COMMON] = &sock_types, |
---|
| 4617 | +#ifdef CONFIG_NET |
---|
| 4618 | + [ARG_PTR_TO_BTF_ID_SOCK_COMMON] = &btf_id_sock_common_types, |
---|
| 4619 | +#endif |
---|
| 4620 | + [ARG_PTR_TO_SOCKET] = &fullsock_types, |
---|
| 4621 | + [ARG_PTR_TO_SOCKET_OR_NULL] = &fullsock_types, |
---|
| 4622 | + [ARG_PTR_TO_BTF_ID] = &btf_ptr_types, |
---|
| 4623 | + [ARG_PTR_TO_SPIN_LOCK] = &spin_lock_types, |
---|
| 4624 | + [ARG_PTR_TO_MEM] = &mem_types, |
---|
| 4625 | + [ARG_PTR_TO_MEM_OR_NULL] = &mem_types, |
---|
| 4626 | + [ARG_PTR_TO_UNINIT_MEM] = &mem_types, |
---|
| 4627 | + [ARG_PTR_TO_ALLOC_MEM] = &alloc_mem_types, |
---|
| 4628 | + [ARG_PTR_TO_ALLOC_MEM_OR_NULL] = &alloc_mem_types, |
---|
| 4629 | + [ARG_PTR_TO_INT] = &int_ptr_types, |
---|
| 4630 | + [ARG_PTR_TO_LONG] = &int_ptr_types, |
---|
| 4631 | + [ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types, |
---|
| 4632 | +}; |
---|
| 4633 | + |
---|
| 4634 | +static int check_reg_type(struct bpf_verifier_env *env, u32 regno, |
---|
1953 | 4635 | enum bpf_arg_type arg_type, |
---|
1954 | | - struct bpf_call_arg_meta *meta) |
---|
| 4636 | + const u32 *arg_btf_id) |
---|
1955 | 4637 | { |
---|
1956 | 4638 | struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; |
---|
1957 | | - enum bpf_reg_type expected_type, type = reg->type; |
---|
| 4639 | + enum bpf_reg_type expected, type = reg->type; |
---|
| 4640 | + const struct bpf_reg_types *compatible; |
---|
| 4641 | + int i, j; |
---|
| 4642 | + |
---|
| 4643 | + compatible = compatible_reg_types[arg_type]; |
---|
| 4644 | + if (!compatible) { |
---|
| 4645 | + verbose(env, "verifier internal error: unsupported arg type %d\n", arg_type); |
---|
| 4646 | + return -EFAULT; |
---|
| 4647 | + } |
---|
| 4648 | + |
---|
| 4649 | + for (i = 0; i < ARRAY_SIZE(compatible->types); i++) { |
---|
| 4650 | + expected = compatible->types[i]; |
---|
| 4651 | + if (expected == NOT_INIT) |
---|
| 4652 | + break; |
---|
| 4653 | + |
---|
| 4654 | + if (type == expected) |
---|
| 4655 | + goto found; |
---|
| 4656 | + } |
---|
| 4657 | + |
---|
| 4658 | + verbose(env, "R%d type=%s expected=", regno, reg_type_str[type]); |
---|
| 4659 | + for (j = 0; j + 1 < i; j++) |
---|
| 4660 | + verbose(env, "%s, ", reg_type_str[compatible->types[j]]); |
---|
| 4661 | + verbose(env, "%s\n", reg_type_str[compatible->types[j]]); |
---|
| 4662 | + return -EACCES; |
---|
| 4663 | + |
---|
| 4664 | +found: |
---|
| 4665 | + if (type == PTR_TO_BTF_ID) { |
---|
| 4666 | + if (!arg_btf_id) { |
---|
| 4667 | + if (!compatible->btf_id) { |
---|
| 4668 | + verbose(env, "verifier internal error: missing arg compatible BTF ID\n"); |
---|
| 4669 | + return -EFAULT; |
---|
| 4670 | + } |
---|
| 4671 | + arg_btf_id = compatible->btf_id; |
---|
| 4672 | + } |
---|
| 4673 | + |
---|
| 4674 | + if (!btf_struct_ids_match(&env->log, reg->off, reg->btf_id, |
---|
| 4675 | + *arg_btf_id)) { |
---|
| 4676 | + verbose(env, "R%d is of type %s but %s is expected\n", |
---|
| 4677 | + regno, kernel_type_name(reg->btf_id), |
---|
| 4678 | + kernel_type_name(*arg_btf_id)); |
---|
| 4679 | + return -EACCES; |
---|
| 4680 | + } |
---|
| 4681 | + |
---|
| 4682 | + if (!tnum_is_const(reg->var_off) || reg->var_off.value) { |
---|
| 4683 | + verbose(env, "R%d is a pointer to in-kernel struct with non-zero offset\n", |
---|
| 4684 | + regno); |
---|
| 4685 | + return -EACCES; |
---|
| 4686 | + } |
---|
| 4687 | + } |
---|
| 4688 | + |
---|
| 4689 | + return 0; |
---|
| 4690 | +} |
---|
| 4691 | + |
---|
| 4692 | +static int check_func_arg(struct bpf_verifier_env *env, u32 arg, |
---|
| 4693 | + struct bpf_call_arg_meta *meta, |
---|
| 4694 | + const struct bpf_func_proto *fn) |
---|
| 4695 | +{ |
---|
| 4696 | + u32 regno = BPF_REG_1 + arg; |
---|
| 4697 | + struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; |
---|
| 4698 | + enum bpf_arg_type arg_type = fn->arg_type[arg]; |
---|
| 4699 | + enum bpf_reg_type type = reg->type; |
---|
1958 | 4700 | int err = 0; |
---|
1959 | 4701 | |
---|
1960 | 4702 | if (arg_type == ARG_DONTCARE) |
---|
.. | .. |
---|
1979 | 4721 | return -EACCES; |
---|
1980 | 4722 | } |
---|
1981 | 4723 | |
---|
1982 | | - if (arg_type == ARG_PTR_TO_MAP_KEY || |
---|
1983 | | - arg_type == ARG_PTR_TO_MAP_VALUE) { |
---|
1984 | | - expected_type = PTR_TO_STACK; |
---|
1985 | | - if (!type_is_pkt_pointer(type) && type != PTR_TO_MAP_VALUE && |
---|
1986 | | - type != expected_type) |
---|
1987 | | - goto err_type; |
---|
1988 | | - } else if (arg_type == ARG_CONST_SIZE || |
---|
1989 | | - arg_type == ARG_CONST_SIZE_OR_ZERO) { |
---|
1990 | | - expected_type = SCALAR_VALUE; |
---|
1991 | | - if (type != expected_type) |
---|
1992 | | - goto err_type; |
---|
1993 | | - } else if (arg_type == ARG_CONST_MAP_PTR) { |
---|
1994 | | - expected_type = CONST_PTR_TO_MAP; |
---|
1995 | | - if (type != expected_type) |
---|
1996 | | - goto err_type; |
---|
1997 | | - } else if (arg_type == ARG_PTR_TO_CTX) { |
---|
1998 | | - expected_type = PTR_TO_CTX; |
---|
1999 | | - if (type != expected_type) |
---|
2000 | | - goto err_type; |
---|
| 4724 | + if (arg_type == ARG_PTR_TO_MAP_VALUE || |
---|
| 4725 | + arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE || |
---|
| 4726 | + arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL) { |
---|
| 4727 | + err = resolve_map_arg_type(env, meta, &arg_type); |
---|
| 4728 | + if (err) |
---|
| 4729 | + return err; |
---|
| 4730 | + } |
---|
| 4731 | + |
---|
| 4732 | + if (register_is_null(reg) && arg_type_may_be_null(arg_type)) |
---|
| 4733 | + /* A NULL register has a SCALAR_VALUE type, so skip |
---|
| 4734 | + * type checking. |
---|
| 4735 | + */ |
---|
| 4736 | + goto skip_type_check; |
---|
| 4737 | + |
---|
| 4738 | + err = check_reg_type(env, regno, arg_type, fn->arg_btf_id[arg]); |
---|
| 4739 | + if (err) |
---|
| 4740 | + return err; |
---|
| 4741 | + |
---|
| 4742 | + if (type == PTR_TO_CTX) { |
---|
2001 | 4743 | err = check_ctx_reg(env, reg, regno); |
---|
2002 | 4744 | if (err < 0) |
---|
2003 | 4745 | return err; |
---|
2004 | | - } else if (arg_type_is_mem_ptr(arg_type)) { |
---|
2005 | | - expected_type = PTR_TO_STACK; |
---|
2006 | | - /* One exception here. In case function allows for NULL to be |
---|
2007 | | - * passed in as argument, it's a SCALAR_VALUE type. Final test |
---|
2008 | | - * happens during stack boundary checking. |
---|
2009 | | - */ |
---|
2010 | | - if (register_is_null(reg) && |
---|
2011 | | - arg_type == ARG_PTR_TO_MEM_OR_NULL) |
---|
2012 | | - /* final test in check_stack_boundary() */; |
---|
2013 | | - else if (!type_is_pkt_pointer(type) && |
---|
2014 | | - type != PTR_TO_MAP_VALUE && |
---|
2015 | | - type != expected_type) |
---|
2016 | | - goto err_type; |
---|
2017 | | - meta->raw_mode = arg_type == ARG_PTR_TO_UNINIT_MEM; |
---|
2018 | | - } else { |
---|
2019 | | - verbose(env, "unsupported arg_type %d\n", arg_type); |
---|
2020 | | - return -EFAULT; |
---|
| 4746 | + } |
---|
| 4747 | + |
---|
| 4748 | +skip_type_check: |
---|
| 4749 | + if (reg->ref_obj_id) { |
---|
| 4750 | + if (meta->ref_obj_id) { |
---|
| 4751 | + verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n", |
---|
| 4752 | + regno, reg->ref_obj_id, |
---|
| 4753 | + meta->ref_obj_id); |
---|
| 4754 | + return -EFAULT; |
---|
| 4755 | + } |
---|
| 4756 | + meta->ref_obj_id = reg->ref_obj_id; |
---|
2021 | 4757 | } |
---|
2022 | 4758 | |
---|
2023 | 4759 | if (arg_type == ARG_CONST_MAP_PTR) { |
---|
.. | .. |
---|
2040 | 4776 | err = check_helper_mem_access(env, regno, |
---|
2041 | 4777 | meta->map_ptr->key_size, false, |
---|
2042 | 4778 | NULL); |
---|
2043 | | - } else if (arg_type == ARG_PTR_TO_MAP_VALUE) { |
---|
| 4779 | + } else if (arg_type == ARG_PTR_TO_MAP_VALUE || |
---|
| 4780 | + (arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL && |
---|
| 4781 | + !register_is_null(reg)) || |
---|
| 4782 | + arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) { |
---|
2044 | 4783 | /* bpf_map_xxx(..., map_ptr, ..., value) call: |
---|
2045 | 4784 | * check [value, value + map->value_size) validity |
---|
2046 | 4785 | */ |
---|
.. | .. |
---|
2049 | 4788 | verbose(env, "invalid map_ptr to access map->value\n"); |
---|
2050 | 4789 | return -EACCES; |
---|
2051 | 4790 | } |
---|
| 4791 | + meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE); |
---|
2052 | 4792 | err = check_helper_mem_access(env, regno, |
---|
2053 | 4793 | meta->map_ptr->value_size, false, |
---|
2054 | | - NULL); |
---|
| 4794 | + meta); |
---|
| 4795 | + } else if (arg_type == ARG_PTR_TO_PERCPU_BTF_ID) { |
---|
| 4796 | + if (!reg->btf_id) { |
---|
| 4797 | + verbose(env, "Helper has invalid btf_id in R%d\n", regno); |
---|
| 4798 | + return -EACCES; |
---|
| 4799 | + } |
---|
| 4800 | + meta->ret_btf_id = reg->btf_id; |
---|
| 4801 | + } else if (arg_type == ARG_PTR_TO_SPIN_LOCK) { |
---|
| 4802 | + if (meta->func_id == BPF_FUNC_spin_lock) { |
---|
| 4803 | + if (process_spin_lock(env, regno, true)) |
---|
| 4804 | + return -EACCES; |
---|
| 4805 | + } else if (meta->func_id == BPF_FUNC_spin_unlock) { |
---|
| 4806 | + if (process_spin_lock(env, regno, false)) |
---|
| 4807 | + return -EACCES; |
---|
| 4808 | + } else { |
---|
| 4809 | + verbose(env, "verifier internal error\n"); |
---|
| 4810 | + return -EFAULT; |
---|
| 4811 | + } |
---|
| 4812 | + } else if (arg_type_is_mem_ptr(arg_type)) { |
---|
| 4813 | + /* The access to this pointer is only checked when we hit the |
---|
| 4814 | + * next is_mem_size argument below. |
---|
| 4815 | + */ |
---|
| 4816 | + meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MEM); |
---|
2055 | 4817 | } else if (arg_type_is_mem_size(arg_type)) { |
---|
2056 | 4818 | bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO); |
---|
2057 | 4819 | |
---|
2058 | | - /* remember the mem_size which may be used later |
---|
2059 | | - * to refine return values. |
---|
| 4820 | + /* This is used to refine r0 return value bounds for helpers |
---|
| 4821 | + * that enforce this value as an upper bound on return values. |
---|
| 4822 | + * See do_refine_retval_range() for helpers that can refine |
---|
| 4823 | + * the return value. C type of helper is u32 so we pull register |
---|
| 4824 | + * bound from umax_value however, if negative verifier errors |
---|
| 4825 | + * out. Only upper bounds can be learned because retval is an |
---|
| 4826 | + * int type and negative retvals are allowed. |
---|
2060 | 4827 | */ |
---|
2061 | 4828 | meta->msize_max_value = reg->umax_value; |
---|
2062 | 4829 | |
---|
.. | .. |
---|
2093 | 4860 | err = check_helper_mem_access(env, regno - 1, |
---|
2094 | 4861 | reg->umax_value, |
---|
2095 | 4862 | zero_size_allowed, meta); |
---|
| 4863 | + if (!err) |
---|
| 4864 | + err = mark_chain_precision(env, regno); |
---|
| 4865 | + } else if (arg_type_is_alloc_size(arg_type)) { |
---|
| 4866 | + if (!tnum_is_const(reg->var_off)) { |
---|
| 4867 | + verbose(env, "R%d unbounded size, use 'var &= const' or 'if (var < const)'\n", |
---|
| 4868 | + regno); |
---|
| 4869 | + return -EACCES; |
---|
| 4870 | + } |
---|
| 4871 | + meta->mem_size = reg->var_off.value; |
---|
| 4872 | + } else if (arg_type_is_int_ptr(arg_type)) { |
---|
| 4873 | + int size = int_ptr_type_to_size(arg_type); |
---|
| 4874 | + |
---|
| 4875 | + err = check_helper_mem_access(env, regno, size, false, meta); |
---|
| 4876 | + if (err) |
---|
| 4877 | + return err; |
---|
| 4878 | + err = check_ptr_alignment(env, reg, 0, size, true); |
---|
2096 | 4879 | } |
---|
2097 | 4880 | |
---|
2098 | 4881 | return err; |
---|
2099 | | -err_type: |
---|
2100 | | - verbose(env, "R%d type=%s expected=%s\n", regno, |
---|
2101 | | - reg_type_str[type], reg_type_str[expected_type]); |
---|
2102 | | - return -EACCES; |
---|
| 4882 | +} |
---|
| 4883 | + |
---|
| 4884 | +static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id) |
---|
| 4885 | +{ |
---|
| 4886 | + enum bpf_attach_type eatype = env->prog->expected_attach_type; |
---|
| 4887 | + enum bpf_prog_type type = resolve_prog_type(env->prog); |
---|
| 4888 | + |
---|
| 4889 | + if (func_id != BPF_FUNC_map_update_elem) |
---|
| 4890 | + return false; |
---|
| 4891 | + |
---|
| 4892 | + /* It's not possible to get access to a locked struct sock in these |
---|
| 4893 | + * contexts, so updating is safe. |
---|
| 4894 | + */ |
---|
| 4895 | + switch (type) { |
---|
| 4896 | + case BPF_PROG_TYPE_TRACING: |
---|
| 4897 | + if (eatype == BPF_TRACE_ITER) |
---|
| 4898 | + return true; |
---|
| 4899 | + break; |
---|
| 4900 | + case BPF_PROG_TYPE_SOCKET_FILTER: |
---|
| 4901 | + case BPF_PROG_TYPE_SCHED_CLS: |
---|
| 4902 | + case BPF_PROG_TYPE_SCHED_ACT: |
---|
| 4903 | + case BPF_PROG_TYPE_XDP: |
---|
| 4904 | + case BPF_PROG_TYPE_SK_REUSEPORT: |
---|
| 4905 | + case BPF_PROG_TYPE_FLOW_DISSECTOR: |
---|
| 4906 | + case BPF_PROG_TYPE_SK_LOOKUP: |
---|
| 4907 | + return true; |
---|
| 4908 | + default: |
---|
| 4909 | + break; |
---|
| 4910 | + } |
---|
| 4911 | + |
---|
| 4912 | + verbose(env, "cannot update sockmap in this context\n"); |
---|
| 4913 | + return false; |
---|
| 4914 | +} |
---|
| 4915 | + |
---|
| 4916 | +static bool allow_tail_call_in_subprogs(struct bpf_verifier_env *env) |
---|
| 4917 | +{ |
---|
| 4918 | + return env->prog->jit_requested && IS_ENABLED(CONFIG_X86_64); |
---|
2103 | 4919 | } |
---|
2104 | 4920 | |
---|
2105 | 4921 | static int check_map_func_compatibility(struct bpf_verifier_env *env, |
---|
.. | .. |
---|
2117 | 4933 | case BPF_MAP_TYPE_PERF_EVENT_ARRAY: |
---|
2118 | 4934 | if (func_id != BPF_FUNC_perf_event_read && |
---|
2119 | 4935 | func_id != BPF_FUNC_perf_event_output && |
---|
2120 | | - func_id != BPF_FUNC_perf_event_read_value) |
---|
| 4936 | + func_id != BPF_FUNC_skb_output && |
---|
| 4937 | + func_id != BPF_FUNC_perf_event_read_value && |
---|
| 4938 | + func_id != BPF_FUNC_xdp_output) |
---|
| 4939 | + goto error; |
---|
| 4940 | + break; |
---|
| 4941 | + case BPF_MAP_TYPE_RINGBUF: |
---|
| 4942 | + if (func_id != BPF_FUNC_ringbuf_output && |
---|
| 4943 | + func_id != BPF_FUNC_ringbuf_reserve && |
---|
| 4944 | + func_id != BPF_FUNC_ringbuf_query) |
---|
2121 | 4945 | goto error; |
---|
2122 | 4946 | break; |
---|
2123 | 4947 | case BPF_MAP_TYPE_STACK_TRACE: |
---|
.. | .. |
---|
2130 | 4954 | goto error; |
---|
2131 | 4955 | break; |
---|
2132 | 4956 | case BPF_MAP_TYPE_CGROUP_STORAGE: |
---|
| 4957 | + case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: |
---|
2133 | 4958 | if (func_id != BPF_FUNC_get_local_storage) |
---|
2134 | 4959 | goto error; |
---|
2135 | 4960 | break; |
---|
2136 | | - /* devmap returns a pointer to a live net_device ifindex that we cannot |
---|
2137 | | - * allow to be modified from bpf side. So do not allow lookup elements |
---|
2138 | | - * for now. |
---|
2139 | | - */ |
---|
2140 | 4961 | case BPF_MAP_TYPE_DEVMAP: |
---|
2141 | | - if (func_id != BPF_FUNC_redirect_map) |
---|
| 4962 | + case BPF_MAP_TYPE_DEVMAP_HASH: |
---|
| 4963 | + if (func_id != BPF_FUNC_redirect_map && |
---|
| 4964 | + func_id != BPF_FUNC_map_lookup_elem) |
---|
2142 | 4965 | goto error; |
---|
2143 | 4966 | break; |
---|
2144 | 4967 | /* Restrict bpf side of cpumap and xskmap, open when use-cases |
---|
2145 | 4968 | * appear. |
---|
2146 | 4969 | */ |
---|
2147 | 4970 | case BPF_MAP_TYPE_CPUMAP: |
---|
2148 | | - case BPF_MAP_TYPE_XSKMAP: |
---|
2149 | 4971 | if (func_id != BPF_FUNC_redirect_map) |
---|
| 4972 | + goto error; |
---|
| 4973 | + break; |
---|
| 4974 | + case BPF_MAP_TYPE_XSKMAP: |
---|
| 4975 | + if (func_id != BPF_FUNC_redirect_map && |
---|
| 4976 | + func_id != BPF_FUNC_map_lookup_elem) |
---|
2150 | 4977 | goto error; |
---|
2151 | 4978 | break; |
---|
2152 | 4979 | case BPF_MAP_TYPE_ARRAY_OF_MAPS: |
---|
.. | .. |
---|
2158 | 4985 | if (func_id != BPF_FUNC_sk_redirect_map && |
---|
2159 | 4986 | func_id != BPF_FUNC_sock_map_update && |
---|
2160 | 4987 | func_id != BPF_FUNC_map_delete_elem && |
---|
2161 | | - func_id != BPF_FUNC_msg_redirect_map) |
---|
| 4988 | + func_id != BPF_FUNC_msg_redirect_map && |
---|
| 4989 | + func_id != BPF_FUNC_sk_select_reuseport && |
---|
| 4990 | + func_id != BPF_FUNC_map_lookup_elem && |
---|
| 4991 | + !may_update_sockmap(env, func_id)) |
---|
2162 | 4992 | goto error; |
---|
2163 | 4993 | break; |
---|
2164 | 4994 | case BPF_MAP_TYPE_SOCKHASH: |
---|
2165 | 4995 | if (func_id != BPF_FUNC_sk_redirect_hash && |
---|
2166 | 4996 | func_id != BPF_FUNC_sock_hash_update && |
---|
2167 | 4997 | func_id != BPF_FUNC_map_delete_elem && |
---|
2168 | | - func_id != BPF_FUNC_msg_redirect_hash) |
---|
| 4998 | + func_id != BPF_FUNC_msg_redirect_hash && |
---|
| 4999 | + func_id != BPF_FUNC_sk_select_reuseport && |
---|
| 5000 | + func_id != BPF_FUNC_map_lookup_elem && |
---|
| 5001 | + !may_update_sockmap(env, func_id)) |
---|
2169 | 5002 | goto error; |
---|
2170 | 5003 | break; |
---|
2171 | 5004 | case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY: |
---|
2172 | 5005 | if (func_id != BPF_FUNC_sk_select_reuseport) |
---|
| 5006 | + goto error; |
---|
| 5007 | + break; |
---|
| 5008 | + case BPF_MAP_TYPE_QUEUE: |
---|
| 5009 | + case BPF_MAP_TYPE_STACK: |
---|
| 5010 | + if (func_id != BPF_FUNC_map_peek_elem && |
---|
| 5011 | + func_id != BPF_FUNC_map_pop_elem && |
---|
| 5012 | + func_id != BPF_FUNC_map_push_elem) |
---|
| 5013 | + goto error; |
---|
| 5014 | + break; |
---|
| 5015 | + case BPF_MAP_TYPE_SK_STORAGE: |
---|
| 5016 | + if (func_id != BPF_FUNC_sk_storage_get && |
---|
| 5017 | + func_id != BPF_FUNC_sk_storage_delete) |
---|
| 5018 | + goto error; |
---|
| 5019 | + break; |
---|
| 5020 | + case BPF_MAP_TYPE_INODE_STORAGE: |
---|
| 5021 | + if (func_id != BPF_FUNC_inode_storage_get && |
---|
| 5022 | + func_id != BPF_FUNC_inode_storage_delete) |
---|
2173 | 5023 | goto error; |
---|
2174 | 5024 | break; |
---|
2175 | 5025 | default: |
---|
.. | .. |
---|
2181 | 5031 | case BPF_FUNC_tail_call: |
---|
2182 | 5032 | if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY) |
---|
2183 | 5033 | goto error; |
---|
2184 | | - if (env->subprog_cnt > 1) { |
---|
2185 | | - verbose(env, "tail_calls are not allowed in programs with bpf-to-bpf calls\n"); |
---|
| 5034 | + if (env->subprog_cnt > 1 && !allow_tail_call_in_subprogs(env)) { |
---|
| 5035 | + verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n"); |
---|
2186 | 5036 | return -EINVAL; |
---|
2187 | 5037 | } |
---|
2188 | 5038 | break; |
---|
2189 | 5039 | case BPF_FUNC_perf_event_read: |
---|
2190 | 5040 | case BPF_FUNC_perf_event_output: |
---|
2191 | 5041 | case BPF_FUNC_perf_event_read_value: |
---|
| 5042 | + case BPF_FUNC_skb_output: |
---|
| 5043 | + case BPF_FUNC_xdp_output: |
---|
2192 | 5044 | if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) |
---|
| 5045 | + goto error; |
---|
| 5046 | + break; |
---|
| 5047 | + case BPF_FUNC_ringbuf_output: |
---|
| 5048 | + case BPF_FUNC_ringbuf_reserve: |
---|
| 5049 | + case BPF_FUNC_ringbuf_query: |
---|
| 5050 | + if (map->map_type != BPF_MAP_TYPE_RINGBUF) |
---|
2193 | 5051 | goto error; |
---|
2194 | 5052 | break; |
---|
2195 | 5053 | case BPF_FUNC_get_stackid: |
---|
.. | .. |
---|
2203 | 5061 | break; |
---|
2204 | 5062 | case BPF_FUNC_redirect_map: |
---|
2205 | 5063 | if (map->map_type != BPF_MAP_TYPE_DEVMAP && |
---|
| 5064 | + map->map_type != BPF_MAP_TYPE_DEVMAP_HASH && |
---|
2206 | 5065 | map->map_type != BPF_MAP_TYPE_CPUMAP && |
---|
2207 | 5066 | map->map_type != BPF_MAP_TYPE_XSKMAP) |
---|
2208 | 5067 | goto error; |
---|
.. | .. |
---|
2220 | 5079 | goto error; |
---|
2221 | 5080 | break; |
---|
2222 | 5081 | case BPF_FUNC_get_local_storage: |
---|
2223 | | - if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE) |
---|
| 5082 | + if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE && |
---|
| 5083 | + map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) |
---|
2224 | 5084 | goto error; |
---|
2225 | 5085 | break; |
---|
2226 | 5086 | case BPF_FUNC_sk_select_reuseport: |
---|
2227 | | - if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) |
---|
| 5087 | + if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY && |
---|
| 5088 | + map->map_type != BPF_MAP_TYPE_SOCKMAP && |
---|
| 5089 | + map->map_type != BPF_MAP_TYPE_SOCKHASH) |
---|
| 5090 | + goto error; |
---|
| 5091 | + break; |
---|
| 5092 | + case BPF_FUNC_map_peek_elem: |
---|
| 5093 | + case BPF_FUNC_map_pop_elem: |
---|
| 5094 | + case BPF_FUNC_map_push_elem: |
---|
| 5095 | + if (map->map_type != BPF_MAP_TYPE_QUEUE && |
---|
| 5096 | + map->map_type != BPF_MAP_TYPE_STACK) |
---|
| 5097 | + goto error; |
---|
| 5098 | + break; |
---|
| 5099 | + case BPF_FUNC_sk_storage_get: |
---|
| 5100 | + case BPF_FUNC_sk_storage_delete: |
---|
| 5101 | + if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) |
---|
| 5102 | + goto error; |
---|
| 5103 | + break; |
---|
| 5104 | + case BPF_FUNC_inode_storage_get: |
---|
| 5105 | + case BPF_FUNC_inode_storage_delete: |
---|
| 5106 | + if (map->map_type != BPF_MAP_TYPE_INODE_STORAGE) |
---|
2228 | 5107 | goto error; |
---|
2229 | 5108 | break; |
---|
2230 | 5109 | default: |
---|
.. | .. |
---|
2287 | 5166 | return true; |
---|
2288 | 5167 | } |
---|
2289 | 5168 | |
---|
2290 | | -static int check_func_proto(const struct bpf_func_proto *fn) |
---|
| 5169 | +static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id) |
---|
| 5170 | +{ |
---|
| 5171 | + int count = 0; |
---|
| 5172 | + |
---|
| 5173 | + if (arg_type_may_be_refcounted(fn->arg1_type)) |
---|
| 5174 | + count++; |
---|
| 5175 | + if (arg_type_may_be_refcounted(fn->arg2_type)) |
---|
| 5176 | + count++; |
---|
| 5177 | + if (arg_type_may_be_refcounted(fn->arg3_type)) |
---|
| 5178 | + count++; |
---|
| 5179 | + if (arg_type_may_be_refcounted(fn->arg4_type)) |
---|
| 5180 | + count++; |
---|
| 5181 | + if (arg_type_may_be_refcounted(fn->arg5_type)) |
---|
| 5182 | + count++; |
---|
| 5183 | + |
---|
| 5184 | + /* A reference acquiring function cannot acquire |
---|
| 5185 | + * another refcounted ptr. |
---|
| 5186 | + */ |
---|
| 5187 | + if (may_be_acquire_function(func_id) && count) |
---|
| 5188 | + return false; |
---|
| 5189 | + |
---|
| 5190 | + /* We only support one arg being unreferenced at the moment, |
---|
| 5191 | + * which is sufficient for the helper functions we have right now. |
---|
| 5192 | + */ |
---|
| 5193 | + return count <= 1; |
---|
| 5194 | +} |
---|
| 5195 | + |
---|
| 5196 | +static bool check_btf_id_ok(const struct bpf_func_proto *fn) |
---|
| 5197 | +{ |
---|
| 5198 | + int i; |
---|
| 5199 | + |
---|
| 5200 | + for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) { |
---|
| 5201 | + if (fn->arg_type[i] == ARG_PTR_TO_BTF_ID && !fn->arg_btf_id[i]) |
---|
| 5202 | + return false; |
---|
| 5203 | + |
---|
| 5204 | + if (fn->arg_type[i] != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i]) |
---|
| 5205 | + return false; |
---|
| 5206 | + } |
---|
| 5207 | + |
---|
| 5208 | + return true; |
---|
| 5209 | +} |
---|
| 5210 | + |
---|
| 5211 | +static int check_func_proto(const struct bpf_func_proto *fn, int func_id) |
---|
2291 | 5212 | { |
---|
2292 | 5213 | return check_raw_mode_ok(fn) && |
---|
2293 | | - check_arg_pair_ok(fn) ? 0 : -EINVAL; |
---|
| 5214 | + check_arg_pair_ok(fn) && |
---|
| 5215 | + check_btf_id_ok(fn) && |
---|
| 5216 | + check_refcount_ok(fn, func_id) ? 0 : -EINVAL; |
---|
2294 | 5217 | } |
---|
2295 | 5218 | |
---|
2296 | 5219 | /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END] |
---|
2297 | 5220 | * are now invalid, so turn them into unknown SCALAR_VALUE. |
---|
2298 | 5221 | */ |
---|
2299 | | -static void __clear_all_pkt_pointers(struct bpf_verifier_env *env, |
---|
2300 | | - struct bpf_func_state *state) |
---|
2301 | | -{ |
---|
2302 | | - struct bpf_reg_state *regs = state->regs, *reg; |
---|
2303 | | - int i; |
---|
2304 | | - |
---|
2305 | | - for (i = 0; i < MAX_BPF_REG; i++) |
---|
2306 | | - if (reg_is_pkt_pointer_any(®s[i])) |
---|
2307 | | - mark_reg_unknown(env, regs, i); |
---|
2308 | | - |
---|
2309 | | - for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { |
---|
2310 | | - if (state->stack[i].slot_type[0] != STACK_SPILL) |
---|
2311 | | - continue; |
---|
2312 | | - reg = &state->stack[i].spilled_ptr; |
---|
2313 | | - if (reg_is_pkt_pointer_any(reg)) |
---|
2314 | | - __mark_reg_unknown(reg); |
---|
2315 | | - } |
---|
2316 | | -} |
---|
2317 | | - |
---|
2318 | 5222 | static void clear_all_pkt_pointers(struct bpf_verifier_env *env) |
---|
2319 | 5223 | { |
---|
2320 | | - struct bpf_verifier_state *vstate = env->cur_state; |
---|
| 5224 | + struct bpf_func_state *state; |
---|
| 5225 | + struct bpf_reg_state *reg; |
---|
| 5226 | + |
---|
| 5227 | + bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({ |
---|
| 5228 | + if (reg_is_pkt_pointer_any(reg)) |
---|
| 5229 | + __mark_reg_unknown(env, reg); |
---|
| 5230 | + })); |
---|
| 5231 | +} |
---|
| 5232 | + |
---|
| 5233 | +enum { |
---|
| 5234 | + AT_PKT_END = -1, |
---|
| 5235 | + BEYOND_PKT_END = -2, |
---|
| 5236 | +}; |
---|
| 5237 | + |
---|
| 5238 | +static void mark_pkt_end(struct bpf_verifier_state *vstate, int regn, bool range_open) |
---|
| 5239 | +{ |
---|
| 5240 | + struct bpf_func_state *state = vstate->frame[vstate->curframe]; |
---|
| 5241 | + struct bpf_reg_state *reg = &state->regs[regn]; |
---|
| 5242 | + |
---|
| 5243 | + if (reg->type != PTR_TO_PACKET) |
---|
| 5244 | + /* PTR_TO_PACKET_META is not supported yet */ |
---|
| 5245 | + return; |
---|
| 5246 | + |
---|
| 5247 | + /* The 'reg' is pkt > pkt_end or pkt >= pkt_end. |
---|
| 5248 | + * How far beyond pkt_end it goes is unknown. |
---|
| 5249 | + * if (!range_open) it's the case of pkt >= pkt_end |
---|
| 5250 | + * if (range_open) it's the case of pkt > pkt_end |
---|
| 5251 | + * hence this pointer is at least 1 byte bigger than pkt_end |
---|
| 5252 | + */ |
---|
| 5253 | + if (range_open) |
---|
| 5254 | + reg->range = BEYOND_PKT_END; |
---|
| 5255 | + else |
---|
| 5256 | + reg->range = AT_PKT_END; |
---|
| 5257 | +} |
---|
| 5258 | + |
---|
| 5259 | +/* The pointer with the specified id has released its reference to kernel |
---|
| 5260 | + * resources. Identify all copies of the same pointer and clear the reference. |
---|
| 5261 | + */ |
---|
| 5262 | +static int release_reference(struct bpf_verifier_env *env, |
---|
| 5263 | + int ref_obj_id) |
---|
| 5264 | +{ |
---|
| 5265 | + struct bpf_func_state *state; |
---|
| 5266 | + struct bpf_reg_state *reg; |
---|
| 5267 | + int err; |
---|
| 5268 | + |
---|
| 5269 | + err = release_reference_state(cur_func(env), ref_obj_id); |
---|
| 5270 | + if (err) |
---|
| 5271 | + return err; |
---|
| 5272 | + |
---|
| 5273 | + bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({ |
---|
| 5274 | + if (reg->ref_obj_id == ref_obj_id) { |
---|
| 5275 | + if (!env->allow_ptr_leaks) |
---|
| 5276 | + __mark_reg_not_init(env, reg); |
---|
| 5277 | + else |
---|
| 5278 | + __mark_reg_unknown(env, reg); |
---|
| 5279 | + } |
---|
| 5280 | + })); |
---|
| 5281 | + |
---|
| 5282 | + return 0; |
---|
| 5283 | +} |
---|
| 5284 | + |
---|
| 5285 | +static void clear_caller_saved_regs(struct bpf_verifier_env *env, |
---|
| 5286 | + struct bpf_reg_state *regs) |
---|
| 5287 | +{ |
---|
2321 | 5288 | int i; |
---|
2322 | 5289 | |
---|
2323 | | - for (i = 0; i <= vstate->curframe; i++) |
---|
2324 | | - __clear_all_pkt_pointers(env, vstate->frame[i]); |
---|
| 5290 | + /* after the call registers r0 - r5 were scratched */ |
---|
| 5291 | + for (i = 0; i < CALLER_SAVED_REGS; i++) { |
---|
| 5292 | + mark_reg_not_init(env, regs, caller_saved[i]); |
---|
| 5293 | + check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK); |
---|
| 5294 | + } |
---|
2325 | 5295 | } |
---|
2326 | 5296 | |
---|
2327 | 5297 | static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, |
---|
2328 | 5298 | int *insn_idx) |
---|
2329 | 5299 | { |
---|
2330 | 5300 | struct bpf_verifier_state *state = env->cur_state; |
---|
| 5301 | + struct bpf_func_info_aux *func_info_aux; |
---|
2331 | 5302 | struct bpf_func_state *caller, *callee; |
---|
2332 | | - int i, subprog, target_insn; |
---|
| 5303 | + int i, err, subprog, target_insn; |
---|
| 5304 | + bool is_global = false; |
---|
2333 | 5305 | |
---|
2334 | 5306 | if (state->curframe + 1 >= MAX_CALL_FRAMES) { |
---|
2335 | 5307 | verbose(env, "the call stack of %d frames is too deep\n", |
---|
.. | .. |
---|
2352 | 5324 | return -EFAULT; |
---|
2353 | 5325 | } |
---|
2354 | 5326 | |
---|
| 5327 | + func_info_aux = env->prog->aux->func_info_aux; |
---|
| 5328 | + if (func_info_aux) |
---|
| 5329 | + is_global = func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL; |
---|
| 5330 | + err = btf_check_func_arg_match(env, subprog, caller->regs); |
---|
| 5331 | + if (err == -EFAULT) |
---|
| 5332 | + return err; |
---|
| 5333 | + if (is_global) { |
---|
| 5334 | + if (err) { |
---|
| 5335 | + verbose(env, "Caller passes invalid args into func#%d\n", |
---|
| 5336 | + subprog); |
---|
| 5337 | + return err; |
---|
| 5338 | + } else { |
---|
| 5339 | + if (env->log.level & BPF_LOG_LEVEL) |
---|
| 5340 | + verbose(env, |
---|
| 5341 | + "Func#%d is global and valid. Skipping.\n", |
---|
| 5342 | + subprog); |
---|
| 5343 | + clear_caller_saved_regs(env, caller->regs); |
---|
| 5344 | + |
---|
| 5345 | + /* All global functions return a 64-bit SCALAR_VALUE */ |
---|
| 5346 | + mark_reg_unknown(env, caller->regs, BPF_REG_0); |
---|
| 5347 | + caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG; |
---|
| 5348 | + |
---|
| 5349 | + /* continue with next insn after call */ |
---|
| 5350 | + return 0; |
---|
| 5351 | + } |
---|
| 5352 | + } |
---|
| 5353 | + |
---|
2355 | 5354 | callee = kzalloc(sizeof(*callee), GFP_KERNEL); |
---|
2356 | 5355 | if (!callee) |
---|
2357 | 5356 | return -ENOMEM; |
---|
.. | .. |
---|
2367 | 5366 | state->curframe + 1 /* frameno within this callchain */, |
---|
2368 | 5367 | subprog /* subprog number within this prog */); |
---|
2369 | 5368 | |
---|
| 5369 | + /* Transfer references to the callee */ |
---|
| 5370 | + err = transfer_reference_state(callee, caller); |
---|
| 5371 | + if (err) |
---|
| 5372 | + return err; |
---|
| 5373 | + |
---|
2370 | 5374 | /* copy r1 - r5 args that callee can access. The copy includes parent |
---|
2371 | 5375 | * pointers, which connects us up to the liveness chain |
---|
2372 | 5376 | */ |
---|
2373 | 5377 | for (i = BPF_REG_1; i <= BPF_REG_5; i++) |
---|
2374 | 5378 | callee->regs[i] = caller->regs[i]; |
---|
2375 | 5379 | |
---|
2376 | | - /* after the call registers r0 - r5 were scratched */ |
---|
2377 | | - for (i = 0; i < CALLER_SAVED_REGS; i++) { |
---|
2378 | | - mark_reg_not_init(env, caller->regs, caller_saved[i]); |
---|
2379 | | - check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK); |
---|
2380 | | - } |
---|
| 5380 | + clear_caller_saved_regs(env, caller->regs); |
---|
2381 | 5381 | |
---|
2382 | 5382 | /* only increment it after check_reg_arg() finished */ |
---|
2383 | 5383 | state->curframe++; |
---|
.. | .. |
---|
2385 | 5385 | /* and go analyze first insn of the callee */ |
---|
2386 | 5386 | *insn_idx = target_insn; |
---|
2387 | 5387 | |
---|
2388 | | - if (env->log.level) { |
---|
| 5388 | + if (env->log.level & BPF_LOG_LEVEL) { |
---|
2389 | 5389 | verbose(env, "caller:\n"); |
---|
2390 | 5390 | print_verifier_state(env, caller); |
---|
2391 | 5391 | verbose(env, "callee:\n"); |
---|
.. | .. |
---|
2399 | 5399 | struct bpf_verifier_state *state = env->cur_state; |
---|
2400 | 5400 | struct bpf_func_state *caller, *callee; |
---|
2401 | 5401 | struct bpf_reg_state *r0; |
---|
| 5402 | + int err; |
---|
2402 | 5403 | |
---|
2403 | 5404 | callee = state->frame[state->curframe]; |
---|
2404 | 5405 | r0 = &callee->regs[BPF_REG_0]; |
---|
.. | .. |
---|
2418 | 5419 | /* return to the caller whatever r0 had in the callee */ |
---|
2419 | 5420 | caller->regs[BPF_REG_0] = *r0; |
---|
2420 | 5421 | |
---|
| 5422 | + /* Transfer references to the caller */ |
---|
| 5423 | + err = transfer_reference_state(caller, callee); |
---|
| 5424 | + if (err) |
---|
| 5425 | + return err; |
---|
| 5426 | + |
---|
2421 | 5427 | *insn_idx = callee->callsite + 1; |
---|
2422 | | - if (env->log.level) { |
---|
| 5428 | + if (env->log.level & BPF_LOG_LEVEL) { |
---|
2423 | 5429 | verbose(env, "returning from callee:\n"); |
---|
2424 | 5430 | print_verifier_state(env, callee); |
---|
2425 | 5431 | verbose(env, "to caller at %d:\n", *insn_idx); |
---|
.. | .. |
---|
2431 | 5437 | return 0; |
---|
2432 | 5438 | } |
---|
2433 | 5439 | |
---|
2434 | | -static int do_refine_retval_range(struct bpf_verifier_env *env, |
---|
2435 | | - struct bpf_reg_state *regs, int ret_type, |
---|
2436 | | - int func_id, struct bpf_call_arg_meta *meta) |
---|
| 5440 | +static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type, |
---|
| 5441 | + int func_id, |
---|
| 5442 | + struct bpf_call_arg_meta *meta) |
---|
2437 | 5443 | { |
---|
2438 | 5444 | struct bpf_reg_state *ret_reg = ®s[BPF_REG_0]; |
---|
2439 | | - struct bpf_reg_state tmp_reg = *ret_reg; |
---|
2440 | | - bool ret; |
---|
2441 | 5445 | |
---|
2442 | 5446 | if (ret_type != RET_INTEGER || |
---|
2443 | 5447 | (func_id != BPF_FUNC_get_stack && |
---|
2444 | | - func_id != BPF_FUNC_probe_read_str)) |
---|
2445 | | - return 0; |
---|
| 5448 | + func_id != BPF_FUNC_probe_read_str && |
---|
| 5449 | + func_id != BPF_FUNC_probe_read_kernel_str && |
---|
| 5450 | + func_id != BPF_FUNC_probe_read_user_str)) |
---|
| 5451 | + return; |
---|
2446 | 5452 | |
---|
2447 | | - /* Error case where ret is in interval [S32MIN, -1]. */ |
---|
2448 | | - ret_reg->smin_value = S32_MIN; |
---|
2449 | | - ret_reg->smax_value = -1; |
---|
2450 | | - |
---|
2451 | | - __reg_deduce_bounds(ret_reg); |
---|
2452 | | - __reg_bound_offset(ret_reg); |
---|
2453 | | - __update_reg_bounds(ret_reg); |
---|
2454 | | - |
---|
2455 | | - ret = push_stack(env, env->insn_idx + 1, env->insn_idx, false); |
---|
2456 | | - if (!ret) |
---|
2457 | | - return -EFAULT; |
---|
2458 | | - |
---|
2459 | | - *ret_reg = tmp_reg; |
---|
2460 | | - |
---|
2461 | | - /* Success case where ret is in range [0, msize_max_value]. */ |
---|
2462 | | - ret_reg->smin_value = 0; |
---|
2463 | 5453 | ret_reg->smax_value = meta->msize_max_value; |
---|
2464 | | - ret_reg->umin_value = ret_reg->smin_value; |
---|
2465 | | - ret_reg->umax_value = ret_reg->smax_value; |
---|
2466 | | - |
---|
2467 | | - __reg_deduce_bounds(ret_reg); |
---|
2468 | | - __reg_bound_offset(ret_reg); |
---|
2469 | | - __update_reg_bounds(ret_reg); |
---|
2470 | | - |
---|
2471 | | - return 0; |
---|
| 5454 | + ret_reg->s32_max_value = meta->msize_max_value; |
---|
| 5455 | + ret_reg->smin_value = -MAX_ERRNO; |
---|
| 5456 | + ret_reg->s32_min_value = -MAX_ERRNO; |
---|
| 5457 | + reg_bounds_sync(ret_reg); |
---|
2472 | 5458 | } |
---|
2473 | 5459 | |
---|
2474 | 5460 | static int |
---|
.. | .. |
---|
2476 | 5462 | int func_id, int insn_idx) |
---|
2477 | 5463 | { |
---|
2478 | 5464 | struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx]; |
---|
| 5465 | + struct bpf_map *map = meta->map_ptr; |
---|
2479 | 5466 | |
---|
2480 | 5467 | if (func_id != BPF_FUNC_tail_call && |
---|
2481 | 5468 | func_id != BPF_FUNC_map_lookup_elem && |
---|
2482 | 5469 | func_id != BPF_FUNC_map_update_elem && |
---|
2483 | | - func_id != BPF_FUNC_map_delete_elem) |
---|
| 5470 | + func_id != BPF_FUNC_map_delete_elem && |
---|
| 5471 | + func_id != BPF_FUNC_map_push_elem && |
---|
| 5472 | + func_id != BPF_FUNC_map_pop_elem && |
---|
| 5473 | + func_id != BPF_FUNC_map_peek_elem) |
---|
2484 | 5474 | return 0; |
---|
2485 | 5475 | |
---|
2486 | | - if (meta->map_ptr == NULL) { |
---|
| 5476 | + if (map == NULL) { |
---|
2487 | 5477 | verbose(env, "kernel subsystem misconfigured verifier\n"); |
---|
2488 | 5478 | return -EINVAL; |
---|
2489 | 5479 | } |
---|
2490 | 5480 | |
---|
2491 | | - if (!BPF_MAP_PTR(aux->map_state)) |
---|
| 5481 | + /* In case of read-only, some additional restrictions |
---|
| 5482 | + * need to be applied in order to prevent altering the |
---|
| 5483 | + * state of the map from program side. |
---|
| 5484 | + */ |
---|
| 5485 | + if ((map->map_flags & BPF_F_RDONLY_PROG) && |
---|
| 5486 | + (func_id == BPF_FUNC_map_delete_elem || |
---|
| 5487 | + func_id == BPF_FUNC_map_update_elem || |
---|
| 5488 | + func_id == BPF_FUNC_map_push_elem || |
---|
| 5489 | + func_id == BPF_FUNC_map_pop_elem)) { |
---|
| 5490 | + verbose(env, "write into map forbidden\n"); |
---|
| 5491 | + return -EACCES; |
---|
| 5492 | + } |
---|
| 5493 | + |
---|
| 5494 | + if (!BPF_MAP_PTR(aux->map_ptr_state)) |
---|
2492 | 5495 | bpf_map_ptr_store(aux, meta->map_ptr, |
---|
2493 | | - meta->map_ptr->unpriv_array); |
---|
2494 | | - else if (BPF_MAP_PTR(aux->map_state) != meta->map_ptr) |
---|
| 5496 | + !meta->map_ptr->bypass_spec_v1); |
---|
| 5497 | + else if (BPF_MAP_PTR(aux->map_ptr_state) != meta->map_ptr) |
---|
2495 | 5498 | bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON, |
---|
2496 | | - meta->map_ptr->unpriv_array); |
---|
| 5499 | + !meta->map_ptr->bypass_spec_v1); |
---|
2497 | 5500 | return 0; |
---|
| 5501 | +} |
---|
| 5502 | + |
---|
| 5503 | +static int |
---|
| 5504 | +record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, |
---|
| 5505 | + int func_id, int insn_idx) |
---|
| 5506 | +{ |
---|
| 5507 | + struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx]; |
---|
| 5508 | + struct bpf_reg_state *regs = cur_regs(env), *reg; |
---|
| 5509 | + struct bpf_map *map = meta->map_ptr; |
---|
| 5510 | + u64 val, max; |
---|
| 5511 | + int err; |
---|
| 5512 | + |
---|
| 5513 | + if (func_id != BPF_FUNC_tail_call) |
---|
| 5514 | + return 0; |
---|
| 5515 | + if (!map || map->map_type != BPF_MAP_TYPE_PROG_ARRAY) { |
---|
| 5516 | + verbose(env, "kernel subsystem misconfigured verifier\n"); |
---|
| 5517 | + return -EINVAL; |
---|
| 5518 | + } |
---|
| 5519 | + |
---|
| 5520 | + reg = ®s[BPF_REG_3]; |
---|
| 5521 | + val = reg->var_off.value; |
---|
| 5522 | + max = map->max_entries; |
---|
| 5523 | + |
---|
| 5524 | + if (!(register_is_const(reg) && val < max)) { |
---|
| 5525 | + bpf_map_key_store(aux, BPF_MAP_KEY_POISON); |
---|
| 5526 | + return 0; |
---|
| 5527 | + } |
---|
| 5528 | + |
---|
| 5529 | + err = mark_chain_precision(env, BPF_REG_3); |
---|
| 5530 | + if (err) |
---|
| 5531 | + return err; |
---|
| 5532 | + if (bpf_map_key_unseen(aux)) |
---|
| 5533 | + bpf_map_key_store(aux, val); |
---|
| 5534 | + else if (!bpf_map_key_poisoned(aux) && |
---|
| 5535 | + bpf_map_key_immediate(aux) != val) |
---|
| 5536 | + bpf_map_key_store(aux, BPF_MAP_KEY_POISON); |
---|
| 5537 | + return 0; |
---|
| 5538 | +} |
---|
| 5539 | + |
---|
| 5540 | +static int check_reference_leak(struct bpf_verifier_env *env) |
---|
| 5541 | +{ |
---|
| 5542 | + struct bpf_func_state *state = cur_func(env); |
---|
| 5543 | + int i; |
---|
| 5544 | + |
---|
| 5545 | + for (i = 0; i < state->acquired_refs; i++) { |
---|
| 5546 | + verbose(env, "Unreleased reference id=%d alloc_insn=%d\n", |
---|
| 5547 | + state->refs[i].id, state->refs[i].insn_idx); |
---|
| 5548 | + } |
---|
| 5549 | + return state->acquired_refs ? -EINVAL : 0; |
---|
2498 | 5550 | } |
---|
2499 | 5551 | |
---|
2500 | 5552 | static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx) |
---|
.. | .. |
---|
2526 | 5578 | return -EINVAL; |
---|
2527 | 5579 | } |
---|
2528 | 5580 | |
---|
| 5581 | + if (fn->allowed && !fn->allowed(env->prog)) { |
---|
| 5582 | + verbose(env, "helper call is not allowed in probe\n"); |
---|
| 5583 | + return -EINVAL; |
---|
| 5584 | + } |
---|
| 5585 | + |
---|
2529 | 5586 | /* With LD_ABS/IND some JITs save/restore skb from r1. */ |
---|
2530 | 5587 | changes_data = bpf_helper_changes_pkt_data(fn->func); |
---|
2531 | 5588 | if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) { |
---|
.. | .. |
---|
2537 | 5594 | memset(&meta, 0, sizeof(meta)); |
---|
2538 | 5595 | meta.pkt_access = fn->pkt_access; |
---|
2539 | 5596 | |
---|
2540 | | - err = check_func_proto(fn); |
---|
| 5597 | + err = check_func_proto(fn, func_id); |
---|
2541 | 5598 | if (err) { |
---|
2542 | 5599 | verbose(env, "kernel subsystem misconfigured func %s#%d\n", |
---|
2543 | 5600 | func_id_name(func_id), func_id); |
---|
2544 | 5601 | return err; |
---|
2545 | 5602 | } |
---|
2546 | 5603 | |
---|
| 5604 | + meta.func_id = func_id; |
---|
2547 | 5605 | /* check args */ |
---|
2548 | | - err = check_func_arg(env, BPF_REG_1, fn->arg1_type, &meta); |
---|
2549 | | - if (err) |
---|
2550 | | - return err; |
---|
2551 | | - err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta); |
---|
2552 | | - if (err) |
---|
2553 | | - return err; |
---|
2554 | | - err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta); |
---|
2555 | | - if (err) |
---|
2556 | | - return err; |
---|
2557 | | - err = check_func_arg(env, BPF_REG_4, fn->arg4_type, &meta); |
---|
2558 | | - if (err) |
---|
2559 | | - return err; |
---|
2560 | | - err = check_func_arg(env, BPF_REG_5, fn->arg5_type, &meta); |
---|
| 5606 | + for (i = 0; i < 5; i++) { |
---|
| 5607 | + err = check_func_arg(env, i, &meta, fn); |
---|
| 5608 | + if (err) |
---|
| 5609 | + return err; |
---|
| 5610 | + } |
---|
| 5611 | + |
---|
| 5612 | + err = record_func_map(env, &meta, func_id, insn_idx); |
---|
2561 | 5613 | if (err) |
---|
2562 | 5614 | return err; |
---|
2563 | 5615 | |
---|
2564 | | - err = record_func_map(env, &meta, func_id, insn_idx); |
---|
| 5616 | + err = record_func_key(env, &meta, func_id, insn_idx); |
---|
2565 | 5617 | if (err) |
---|
2566 | 5618 | return err; |
---|
2567 | 5619 | |
---|
.. | .. |
---|
2573 | 5625 | BPF_WRITE, -1, false); |
---|
2574 | 5626 | if (err) |
---|
2575 | 5627 | return err; |
---|
| 5628 | + } |
---|
| 5629 | + |
---|
| 5630 | + if (func_id == BPF_FUNC_tail_call) { |
---|
| 5631 | + err = check_reference_leak(env); |
---|
| 5632 | + if (err) { |
---|
| 5633 | + verbose(env, "tail_call would lead to reference leak\n"); |
---|
| 5634 | + return err; |
---|
| 5635 | + } |
---|
| 5636 | + } else if (is_release_function(func_id)) { |
---|
| 5637 | + err = release_reference(env, meta.ref_obj_id); |
---|
| 5638 | + if (err) { |
---|
| 5639 | + verbose(env, "func %s#%d reference has not been acquired before\n", |
---|
| 5640 | + func_id_name(func_id), func_id); |
---|
| 5641 | + return err; |
---|
| 5642 | + } |
---|
2576 | 5643 | } |
---|
2577 | 5644 | |
---|
2578 | 5645 | regs = cur_regs(env); |
---|
.. | .. |
---|
2592 | 5659 | check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK); |
---|
2593 | 5660 | } |
---|
2594 | 5661 | |
---|
| 5662 | + /* helper call returns 64-bit value. */ |
---|
| 5663 | + regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG; |
---|
| 5664 | + |
---|
2595 | 5665 | /* update return register (already marked as written above) */ |
---|
2596 | 5666 | if (fn->ret_type == RET_INTEGER) { |
---|
2597 | 5667 | /* sets type to SCALAR_VALUE */ |
---|
.. | .. |
---|
2600 | 5670 | regs[BPF_REG_0].type = NOT_INIT; |
---|
2601 | 5671 | } else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL || |
---|
2602 | 5672 | fn->ret_type == RET_PTR_TO_MAP_VALUE) { |
---|
2603 | | - if (fn->ret_type == RET_PTR_TO_MAP_VALUE) |
---|
2604 | | - regs[BPF_REG_0].type = PTR_TO_MAP_VALUE; |
---|
2605 | | - else |
---|
2606 | | - regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL; |
---|
2607 | 5673 | /* There is no offset yet applied, variable or fixed */ |
---|
2608 | 5674 | mark_reg_known_zero(env, regs, BPF_REG_0); |
---|
2609 | 5675 | /* remember map_ptr, so that check_map_access() |
---|
.. | .. |
---|
2616 | 5682 | return -EINVAL; |
---|
2617 | 5683 | } |
---|
2618 | 5684 | regs[BPF_REG_0].map_ptr = meta.map_ptr; |
---|
2619 | | - regs[BPF_REG_0].id = ++env->id_gen; |
---|
| 5685 | + if (fn->ret_type == RET_PTR_TO_MAP_VALUE) { |
---|
| 5686 | + regs[BPF_REG_0].type = PTR_TO_MAP_VALUE; |
---|
| 5687 | + if (map_value_has_spin_lock(meta.map_ptr)) |
---|
| 5688 | + regs[BPF_REG_0].id = ++env->id_gen; |
---|
| 5689 | + } else { |
---|
| 5690 | + regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL; |
---|
| 5691 | + } |
---|
| 5692 | + } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) { |
---|
| 5693 | + mark_reg_known_zero(env, regs, BPF_REG_0); |
---|
| 5694 | + regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL; |
---|
| 5695 | + } else if (fn->ret_type == RET_PTR_TO_SOCK_COMMON_OR_NULL) { |
---|
| 5696 | + mark_reg_known_zero(env, regs, BPF_REG_0); |
---|
| 5697 | + regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON_OR_NULL; |
---|
| 5698 | + } else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) { |
---|
| 5699 | + mark_reg_known_zero(env, regs, BPF_REG_0); |
---|
| 5700 | + regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL; |
---|
| 5701 | + } else if (fn->ret_type == RET_PTR_TO_ALLOC_MEM_OR_NULL) { |
---|
| 5702 | + mark_reg_known_zero(env, regs, BPF_REG_0); |
---|
| 5703 | + regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL; |
---|
| 5704 | + regs[BPF_REG_0].mem_size = meta.mem_size; |
---|
| 5705 | + } else if (fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL || |
---|
| 5706 | + fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID) { |
---|
| 5707 | + const struct btf_type *t; |
---|
| 5708 | + |
---|
| 5709 | + mark_reg_known_zero(env, regs, BPF_REG_0); |
---|
| 5710 | + t = btf_type_skip_modifiers(btf_vmlinux, meta.ret_btf_id, NULL); |
---|
| 5711 | + if (!btf_type_is_struct(t)) { |
---|
| 5712 | + u32 tsize; |
---|
| 5713 | + const struct btf_type *ret; |
---|
| 5714 | + const char *tname; |
---|
| 5715 | + |
---|
| 5716 | + /* resolve the type size of ksym. */ |
---|
| 5717 | + ret = btf_resolve_size(btf_vmlinux, t, &tsize); |
---|
| 5718 | + if (IS_ERR(ret)) { |
---|
| 5719 | + tname = btf_name_by_offset(btf_vmlinux, t->name_off); |
---|
| 5720 | + verbose(env, "unable to resolve the size of type '%s': %ld\n", |
---|
| 5721 | + tname, PTR_ERR(ret)); |
---|
| 5722 | + return -EINVAL; |
---|
| 5723 | + } |
---|
| 5724 | + regs[BPF_REG_0].type = |
---|
| 5725 | + fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID ? |
---|
| 5726 | + PTR_TO_MEM : PTR_TO_MEM_OR_NULL; |
---|
| 5727 | + regs[BPF_REG_0].mem_size = tsize; |
---|
| 5728 | + } else { |
---|
| 5729 | + regs[BPF_REG_0].type = |
---|
| 5730 | + fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID ? |
---|
| 5731 | + PTR_TO_BTF_ID : PTR_TO_BTF_ID_OR_NULL; |
---|
| 5732 | + regs[BPF_REG_0].btf_id = meta.ret_btf_id; |
---|
| 5733 | + } |
---|
| 5734 | + } else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL) { |
---|
| 5735 | + int ret_btf_id; |
---|
| 5736 | + |
---|
| 5737 | + mark_reg_known_zero(env, regs, BPF_REG_0); |
---|
| 5738 | + regs[BPF_REG_0].type = PTR_TO_BTF_ID_OR_NULL; |
---|
| 5739 | + ret_btf_id = *fn->ret_btf_id; |
---|
| 5740 | + if (ret_btf_id == 0) { |
---|
| 5741 | + verbose(env, "invalid return type %d of func %s#%d\n", |
---|
| 5742 | + fn->ret_type, func_id_name(func_id), func_id); |
---|
| 5743 | + return -EINVAL; |
---|
| 5744 | + } |
---|
| 5745 | + regs[BPF_REG_0].btf_id = ret_btf_id; |
---|
2620 | 5746 | } else { |
---|
2621 | 5747 | verbose(env, "unknown return type %d of func %s#%d\n", |
---|
2622 | 5748 | fn->ret_type, func_id_name(func_id), func_id); |
---|
2623 | 5749 | return -EINVAL; |
---|
2624 | 5750 | } |
---|
2625 | 5751 | |
---|
2626 | | - err = do_refine_retval_range(env, regs, fn->ret_type, func_id, &meta); |
---|
2627 | | - if (err) |
---|
2628 | | - return err; |
---|
| 5752 | + if (reg_type_may_be_null(regs[BPF_REG_0].type)) |
---|
| 5753 | + regs[BPF_REG_0].id = ++env->id_gen; |
---|
| 5754 | + |
---|
| 5755 | + if (is_ptr_cast_function(func_id)) { |
---|
| 5756 | + /* For release_reference() */ |
---|
| 5757 | + regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id; |
---|
| 5758 | + } else if (is_acquire_function(func_id, meta.map_ptr)) { |
---|
| 5759 | + int id = acquire_reference_state(env, insn_idx); |
---|
| 5760 | + |
---|
| 5761 | + if (id < 0) |
---|
| 5762 | + return id; |
---|
| 5763 | + /* For mark_ptr_or_null_reg() */ |
---|
| 5764 | + regs[BPF_REG_0].id = id; |
---|
| 5765 | + /* For release_reference() */ |
---|
| 5766 | + regs[BPF_REG_0].ref_obj_id = id; |
---|
| 5767 | + } |
---|
| 5768 | + |
---|
| 5769 | + do_refine_retval_range(regs, fn->ret_type, func_id, &meta); |
---|
2629 | 5770 | |
---|
2630 | 5771 | err = check_map_func_compatibility(env, meta.map_ptr, func_id); |
---|
2631 | 5772 | if (err) |
---|
2632 | 5773 | return err; |
---|
2633 | 5774 | |
---|
2634 | | - if (func_id == BPF_FUNC_get_stack && !env->prog->has_callchain_buf) { |
---|
| 5775 | + if ((func_id == BPF_FUNC_get_stack || |
---|
| 5776 | + func_id == BPF_FUNC_get_task_stack) && |
---|
| 5777 | + !env->prog->has_callchain_buf) { |
---|
2635 | 5778 | const char *err_str; |
---|
2636 | 5779 | |
---|
2637 | 5780 | #ifdef CONFIG_PERF_EVENTS |
---|
.. | .. |
---|
2649 | 5792 | env->prog->has_callchain_buf = true; |
---|
2650 | 5793 | } |
---|
2651 | 5794 | |
---|
| 5795 | + if (func_id == BPF_FUNC_get_stackid || func_id == BPF_FUNC_get_stack) |
---|
| 5796 | + env->prog->call_get_stack = true; |
---|
| 5797 | + |
---|
2652 | 5798 | if (changes_data) |
---|
2653 | 5799 | clear_all_pkt_pointers(env); |
---|
2654 | 5800 | return 0; |
---|
.. | .. |
---|
2664 | 5810 | return res < a; |
---|
2665 | 5811 | } |
---|
2666 | 5812 | |
---|
| 5813 | +static bool signed_add32_overflows(s32 a, s32 b) |
---|
| 5814 | +{ |
---|
| 5815 | + /* Do the add in u32, where overflow is well-defined */ |
---|
| 5816 | + s32 res = (s32)((u32)a + (u32)b); |
---|
| 5817 | + |
---|
| 5818 | + if (b < 0) |
---|
| 5819 | + return res > a; |
---|
| 5820 | + return res < a; |
---|
| 5821 | +} |
---|
| 5822 | + |
---|
2667 | 5823 | static bool signed_sub_overflows(s64 a, s64 b) |
---|
2668 | 5824 | { |
---|
2669 | 5825 | /* Do the sub in u64, where overflow is well-defined */ |
---|
2670 | 5826 | s64 res = (s64)((u64)a - (u64)b); |
---|
| 5827 | + |
---|
| 5828 | + if (b < 0) |
---|
| 5829 | + return res < a; |
---|
| 5830 | + return res > a; |
---|
| 5831 | +} |
---|
| 5832 | + |
---|
| 5833 | +static bool signed_sub32_overflows(s32 a, s32 b) |
---|
| 5834 | +{ |
---|
| 5835 | + /* Do the sub in u32, where overflow is well-defined */ |
---|
| 5836 | + s32 res = (s32)((u32)a - (u32)b); |
---|
2671 | 5837 | |
---|
2672 | 5838 | if (b < 0) |
---|
2673 | 5839 | return res < a; |
---|
.. | .. |
---|
2756 | 5922 | static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env, |
---|
2757 | 5923 | const struct bpf_insn *insn) |
---|
2758 | 5924 | { |
---|
2759 | | - return env->allow_ptr_leaks || BPF_SRC(insn->code) == BPF_K; |
---|
| 5925 | + return env->bypass_spec_v1 || BPF_SRC(insn->code) == BPF_K; |
---|
2760 | 5926 | } |
---|
2761 | 5927 | |
---|
2762 | 5928 | static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux, |
---|
.. | .. |
---|
2905 | 6071 | */ |
---|
2906 | 6072 | if (!ptr_is_dst_reg) { |
---|
2907 | 6073 | tmp = *dst_reg; |
---|
2908 | | - *dst_reg = *ptr_reg; |
---|
| 6074 | + copy_register_state(dst_reg, ptr_reg); |
---|
2909 | 6075 | } |
---|
2910 | 6076 | ret = sanitize_speculative_path(env, NULL, env->insn_idx + 1, |
---|
2911 | 6077 | env->insn_idx); |
---|
.. | .. |
---|
2924 | 6090 | * rewrite/sanitize them. |
---|
2925 | 6091 | */ |
---|
2926 | 6092 | if (!vstate->speculative) |
---|
2927 | | - env->insn_aux_data[env->insn_idx].seen = true; |
---|
| 6093 | + env->insn_aux_data[env->insn_idx].seen = env->pass_cnt; |
---|
2928 | 6094 | } |
---|
2929 | 6095 | |
---|
2930 | 6096 | static int sanitize_err(struct bpf_verifier_env *env, |
---|
.. | .. |
---|
2966 | 6132 | return -EACCES; |
---|
2967 | 6133 | } |
---|
2968 | 6134 | |
---|
| 6135 | +/* check that stack access falls within stack limits and that 'reg' doesn't |
---|
| 6136 | + * have a variable offset. |
---|
| 6137 | + * |
---|
| 6138 | + * Variable offset is prohibited for unprivileged mode for simplicity since it |
---|
| 6139 | + * requires corresponding support in Spectre masking for stack ALU. See also |
---|
| 6140 | + * retrieve_ptr_limit(). |
---|
| 6141 | + * |
---|
| 6142 | + * |
---|
| 6143 | + * 'off' includes 'reg->off'. |
---|
| 6144 | + */ |
---|
| 6145 | +static int check_stack_access_for_ptr_arithmetic( |
---|
| 6146 | + struct bpf_verifier_env *env, |
---|
| 6147 | + int regno, |
---|
| 6148 | + const struct bpf_reg_state *reg, |
---|
| 6149 | + int off) |
---|
| 6150 | +{ |
---|
| 6151 | + if (!tnum_is_const(reg->var_off)) { |
---|
| 6152 | + char tn_buf[48]; |
---|
| 6153 | + |
---|
| 6154 | + tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); |
---|
| 6155 | + verbose(env, "R%d variable stack access prohibited for !root, var_off=%s off=%d\n", |
---|
| 6156 | + regno, tn_buf, off); |
---|
| 6157 | + return -EACCES; |
---|
| 6158 | + } |
---|
| 6159 | + |
---|
| 6160 | + if (off >= 0 || off < -MAX_BPF_STACK) { |
---|
| 6161 | + verbose(env, "R%d stack pointer arithmetic goes out of range, " |
---|
| 6162 | + "prohibited for !root; off=%d\n", regno, off); |
---|
| 6163 | + return -EACCES; |
---|
| 6164 | + } |
---|
| 6165 | + |
---|
| 6166 | + return 0; |
---|
| 6167 | +} |
---|
| 6168 | + |
---|
2969 | 6169 | static int sanitize_check_bounds(struct bpf_verifier_env *env, |
---|
2970 | 6170 | const struct bpf_insn *insn, |
---|
2971 | 6171 | const struct bpf_reg_state *dst_reg) |
---|
.. | .. |
---|
2975 | 6175 | /* For unprivileged we require that resulting offset must be in bounds |
---|
2976 | 6176 | * in order to be able to sanitize access later on. |
---|
2977 | 6177 | */ |
---|
2978 | | - if (env->allow_ptr_leaks) |
---|
| 6178 | + if (env->bypass_spec_v1) |
---|
2979 | 6179 | return 0; |
---|
2980 | 6180 | |
---|
2981 | 6181 | switch (dst_reg->type) { |
---|
2982 | 6182 | case PTR_TO_STACK: |
---|
2983 | | - if (check_stack_access(env, dst_reg, dst_reg->off + |
---|
2984 | | - dst_reg->var_off.value, 1)) { |
---|
2985 | | - verbose(env, "R%d stack pointer arithmetic goes out of range, " |
---|
2986 | | - "prohibited for !root\n", dst); |
---|
| 6183 | + if (check_stack_access_for_ptr_arithmetic(env, dst, dst_reg, |
---|
| 6184 | + dst_reg->off + dst_reg->var_off.value)) |
---|
2987 | 6185 | return -EACCES; |
---|
2988 | | - } |
---|
2989 | 6186 | break; |
---|
2990 | 6187 | case PTR_TO_MAP_VALUE: |
---|
2991 | 6188 | if (check_map_access(env, dst, dst_reg->off, 1, false)) { |
---|
.. | .. |
---|
3031 | 6228 | /* Taint dst register if offset had invalid bounds derived from |
---|
3032 | 6229 | * e.g. dead branches. |
---|
3033 | 6230 | */ |
---|
3034 | | - __mark_reg_unknown(dst_reg); |
---|
| 6231 | + __mark_reg_unknown(env, dst_reg); |
---|
3035 | 6232 | return 0; |
---|
3036 | 6233 | } |
---|
3037 | 6234 | |
---|
3038 | 6235 | if (BPF_CLASS(insn->code) != BPF_ALU64) { |
---|
3039 | 6236 | /* 32-bit ALU ops on pointers produce (meaningless) scalars */ |
---|
| 6237 | + if (opcode == BPF_SUB && env->allow_ptr_leaks) { |
---|
| 6238 | + __mark_reg_unknown(env, dst_reg); |
---|
| 6239 | + return 0; |
---|
| 6240 | + } |
---|
| 6241 | + |
---|
3040 | 6242 | verbose(env, |
---|
3041 | 6243 | "R%d 32-bit pointer arithmetic prohibited\n", |
---|
3042 | 6244 | dst); |
---|
3043 | 6245 | return -EACCES; |
---|
3044 | 6246 | } |
---|
3045 | 6247 | |
---|
3046 | | - if (ptr_reg->type == PTR_TO_MAP_VALUE_OR_NULL) { |
---|
3047 | | - verbose(env, "R%d pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL prohibited, null-check it first\n", |
---|
3048 | | - dst); |
---|
| 6248 | + switch (ptr_reg->type) { |
---|
| 6249 | + case PTR_TO_MAP_VALUE_OR_NULL: |
---|
| 6250 | + verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n", |
---|
| 6251 | + dst, reg_type_str[ptr_reg->type]); |
---|
3049 | 6252 | return -EACCES; |
---|
3050 | | - } |
---|
3051 | | - if (ptr_reg->type == CONST_PTR_TO_MAP) { |
---|
3052 | | - verbose(env, "R%d pointer arithmetic on CONST_PTR_TO_MAP prohibited\n", |
---|
3053 | | - dst); |
---|
| 6253 | + case CONST_PTR_TO_MAP: |
---|
| 6254 | + /* smin_val represents the known value */ |
---|
| 6255 | + if (known && smin_val == 0 && opcode == BPF_ADD) |
---|
| 6256 | + break; |
---|
| 6257 | + fallthrough; |
---|
| 6258 | + case PTR_TO_PACKET_END: |
---|
| 6259 | + case PTR_TO_SOCKET: |
---|
| 6260 | + case PTR_TO_SOCK_COMMON: |
---|
| 6261 | + case PTR_TO_TCP_SOCK: |
---|
| 6262 | + case PTR_TO_XDP_SOCK: |
---|
| 6263 | +reject: |
---|
| 6264 | + verbose(env, "R%d pointer arithmetic on %s prohibited\n", |
---|
| 6265 | + dst, reg_type_str[ptr_reg->type]); |
---|
3054 | 6266 | return -EACCES; |
---|
3055 | | - } |
---|
3056 | | - if (ptr_reg->type == PTR_TO_PACKET_END) { |
---|
3057 | | - verbose(env, "R%d pointer arithmetic on PTR_TO_PACKET_END prohibited\n", |
---|
3058 | | - dst); |
---|
3059 | | - return -EACCES; |
---|
| 6267 | + default: |
---|
| 6268 | + if (reg_type_may_be_null(ptr_reg->type)) |
---|
| 6269 | + goto reject; |
---|
| 6270 | + break; |
---|
3060 | 6271 | } |
---|
3061 | 6272 | |
---|
3062 | 6273 | /* In case of 'scalar += pointer', dst_reg inherits pointer type and id. |
---|
.. | .. |
---|
3068 | 6279 | if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) || |
---|
3069 | 6280 | !check_reg_sane_offset(env, ptr_reg, ptr_reg->type)) |
---|
3070 | 6281 | return -EINVAL; |
---|
| 6282 | + |
---|
| 6283 | + /* pointer types do not carry 32-bit bounds at the moment. */ |
---|
| 6284 | + __mark_reg32_unbounded(dst_reg); |
---|
3071 | 6285 | |
---|
3072 | 6286 | if (sanitize_needed(opcode)) { |
---|
3073 | 6287 | ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg, |
---|
.. | .. |
---|
3203 | 6417 | |
---|
3204 | 6418 | if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type)) |
---|
3205 | 6419 | return -EINVAL; |
---|
3206 | | - |
---|
3207 | | - __update_reg_bounds(dst_reg); |
---|
3208 | | - __reg_deduce_bounds(dst_reg); |
---|
3209 | | - __reg_bound_offset(dst_reg); |
---|
3210 | | - |
---|
| 6420 | + reg_bounds_sync(dst_reg); |
---|
3211 | 6421 | if (sanitize_check_bounds(env, insn, dst_reg) < 0) |
---|
3212 | 6422 | return -EACCES; |
---|
3213 | 6423 | if (sanitize_needed(opcode)) { |
---|
.. | .. |
---|
3218 | 6428 | } |
---|
3219 | 6429 | |
---|
3220 | 6430 | return 0; |
---|
| 6431 | +} |
---|
| 6432 | + |
---|
| 6433 | +static void scalar32_min_max_add(struct bpf_reg_state *dst_reg, |
---|
| 6434 | + struct bpf_reg_state *src_reg) |
---|
| 6435 | +{ |
---|
| 6436 | + s32 smin_val = src_reg->s32_min_value; |
---|
| 6437 | + s32 smax_val = src_reg->s32_max_value; |
---|
| 6438 | + u32 umin_val = src_reg->u32_min_value; |
---|
| 6439 | + u32 umax_val = src_reg->u32_max_value; |
---|
| 6440 | + |
---|
| 6441 | + if (signed_add32_overflows(dst_reg->s32_min_value, smin_val) || |
---|
| 6442 | + signed_add32_overflows(dst_reg->s32_max_value, smax_val)) { |
---|
| 6443 | + dst_reg->s32_min_value = S32_MIN; |
---|
| 6444 | + dst_reg->s32_max_value = S32_MAX; |
---|
| 6445 | + } else { |
---|
| 6446 | + dst_reg->s32_min_value += smin_val; |
---|
| 6447 | + dst_reg->s32_max_value += smax_val; |
---|
| 6448 | + } |
---|
| 6449 | + if (dst_reg->u32_min_value + umin_val < umin_val || |
---|
| 6450 | + dst_reg->u32_max_value + umax_val < umax_val) { |
---|
| 6451 | + dst_reg->u32_min_value = 0; |
---|
| 6452 | + dst_reg->u32_max_value = U32_MAX; |
---|
| 6453 | + } else { |
---|
| 6454 | + dst_reg->u32_min_value += umin_val; |
---|
| 6455 | + dst_reg->u32_max_value += umax_val; |
---|
| 6456 | + } |
---|
| 6457 | +} |
---|
| 6458 | + |
---|
| 6459 | +static void scalar_min_max_add(struct bpf_reg_state *dst_reg, |
---|
| 6460 | + struct bpf_reg_state *src_reg) |
---|
| 6461 | +{ |
---|
| 6462 | + s64 smin_val = src_reg->smin_value; |
---|
| 6463 | + s64 smax_val = src_reg->smax_value; |
---|
| 6464 | + u64 umin_val = src_reg->umin_value; |
---|
| 6465 | + u64 umax_val = src_reg->umax_value; |
---|
| 6466 | + |
---|
| 6467 | + if (signed_add_overflows(dst_reg->smin_value, smin_val) || |
---|
| 6468 | + signed_add_overflows(dst_reg->smax_value, smax_val)) { |
---|
| 6469 | + dst_reg->smin_value = S64_MIN; |
---|
| 6470 | + dst_reg->smax_value = S64_MAX; |
---|
| 6471 | + } else { |
---|
| 6472 | + dst_reg->smin_value += smin_val; |
---|
| 6473 | + dst_reg->smax_value += smax_val; |
---|
| 6474 | + } |
---|
| 6475 | + if (dst_reg->umin_value + umin_val < umin_val || |
---|
| 6476 | + dst_reg->umax_value + umax_val < umax_val) { |
---|
| 6477 | + dst_reg->umin_value = 0; |
---|
| 6478 | + dst_reg->umax_value = U64_MAX; |
---|
| 6479 | + } else { |
---|
| 6480 | + dst_reg->umin_value += umin_val; |
---|
| 6481 | + dst_reg->umax_value += umax_val; |
---|
| 6482 | + } |
---|
| 6483 | +} |
---|
| 6484 | + |
---|
| 6485 | +static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg, |
---|
| 6486 | + struct bpf_reg_state *src_reg) |
---|
| 6487 | +{ |
---|
| 6488 | + s32 smin_val = src_reg->s32_min_value; |
---|
| 6489 | + s32 smax_val = src_reg->s32_max_value; |
---|
| 6490 | + u32 umin_val = src_reg->u32_min_value; |
---|
| 6491 | + u32 umax_val = src_reg->u32_max_value; |
---|
| 6492 | + |
---|
| 6493 | + if (signed_sub32_overflows(dst_reg->s32_min_value, smax_val) || |
---|
| 6494 | + signed_sub32_overflows(dst_reg->s32_max_value, smin_val)) { |
---|
| 6495 | + /* Overflow possible, we know nothing */ |
---|
| 6496 | + dst_reg->s32_min_value = S32_MIN; |
---|
| 6497 | + dst_reg->s32_max_value = S32_MAX; |
---|
| 6498 | + } else { |
---|
| 6499 | + dst_reg->s32_min_value -= smax_val; |
---|
| 6500 | + dst_reg->s32_max_value -= smin_val; |
---|
| 6501 | + } |
---|
| 6502 | + if (dst_reg->u32_min_value < umax_val) { |
---|
| 6503 | + /* Overflow possible, we know nothing */ |
---|
| 6504 | + dst_reg->u32_min_value = 0; |
---|
| 6505 | + dst_reg->u32_max_value = U32_MAX; |
---|
| 6506 | + } else { |
---|
| 6507 | + /* Cannot overflow (as long as bounds are consistent) */ |
---|
| 6508 | + dst_reg->u32_min_value -= umax_val; |
---|
| 6509 | + dst_reg->u32_max_value -= umin_val; |
---|
| 6510 | + } |
---|
| 6511 | +} |
---|
| 6512 | + |
---|
| 6513 | +static void scalar_min_max_sub(struct bpf_reg_state *dst_reg, |
---|
| 6514 | + struct bpf_reg_state *src_reg) |
---|
| 6515 | +{ |
---|
| 6516 | + s64 smin_val = src_reg->smin_value; |
---|
| 6517 | + s64 smax_val = src_reg->smax_value; |
---|
| 6518 | + u64 umin_val = src_reg->umin_value; |
---|
| 6519 | + u64 umax_val = src_reg->umax_value; |
---|
| 6520 | + |
---|
| 6521 | + if (signed_sub_overflows(dst_reg->smin_value, smax_val) || |
---|
| 6522 | + signed_sub_overflows(dst_reg->smax_value, smin_val)) { |
---|
| 6523 | + /* Overflow possible, we know nothing */ |
---|
| 6524 | + dst_reg->smin_value = S64_MIN; |
---|
| 6525 | + dst_reg->smax_value = S64_MAX; |
---|
| 6526 | + } else { |
---|
| 6527 | + dst_reg->smin_value -= smax_val; |
---|
| 6528 | + dst_reg->smax_value -= smin_val; |
---|
| 6529 | + } |
---|
| 6530 | + if (dst_reg->umin_value < umax_val) { |
---|
| 6531 | + /* Overflow possible, we know nothing */ |
---|
| 6532 | + dst_reg->umin_value = 0; |
---|
| 6533 | + dst_reg->umax_value = U64_MAX; |
---|
| 6534 | + } else { |
---|
| 6535 | + /* Cannot overflow (as long as bounds are consistent) */ |
---|
| 6536 | + dst_reg->umin_value -= umax_val; |
---|
| 6537 | + dst_reg->umax_value -= umin_val; |
---|
| 6538 | + } |
---|
| 6539 | +} |
---|
| 6540 | + |
---|
| 6541 | +static void scalar32_min_max_mul(struct bpf_reg_state *dst_reg, |
---|
| 6542 | + struct bpf_reg_state *src_reg) |
---|
| 6543 | +{ |
---|
| 6544 | + s32 smin_val = src_reg->s32_min_value; |
---|
| 6545 | + u32 umin_val = src_reg->u32_min_value; |
---|
| 6546 | + u32 umax_val = src_reg->u32_max_value; |
---|
| 6547 | + |
---|
| 6548 | + if (smin_val < 0 || dst_reg->s32_min_value < 0) { |
---|
| 6549 | + /* Ain't nobody got time to multiply that sign */ |
---|
| 6550 | + __mark_reg32_unbounded(dst_reg); |
---|
| 6551 | + return; |
---|
| 6552 | + } |
---|
| 6553 | + /* Both values are positive, so we can work with unsigned and |
---|
| 6554 | + * copy the result to signed (unless it exceeds S32_MAX). |
---|
| 6555 | + */ |
---|
| 6556 | + if (umax_val > U16_MAX || dst_reg->u32_max_value > U16_MAX) { |
---|
| 6557 | + /* Potential overflow, we know nothing */ |
---|
| 6558 | + __mark_reg32_unbounded(dst_reg); |
---|
| 6559 | + return; |
---|
| 6560 | + } |
---|
| 6561 | + dst_reg->u32_min_value *= umin_val; |
---|
| 6562 | + dst_reg->u32_max_value *= umax_val; |
---|
| 6563 | + if (dst_reg->u32_max_value > S32_MAX) { |
---|
| 6564 | + /* Overflow possible, we know nothing */ |
---|
| 6565 | + dst_reg->s32_min_value = S32_MIN; |
---|
| 6566 | + dst_reg->s32_max_value = S32_MAX; |
---|
| 6567 | + } else { |
---|
| 6568 | + dst_reg->s32_min_value = dst_reg->u32_min_value; |
---|
| 6569 | + dst_reg->s32_max_value = dst_reg->u32_max_value; |
---|
| 6570 | + } |
---|
| 6571 | +} |
---|
| 6572 | + |
---|
| 6573 | +static void scalar_min_max_mul(struct bpf_reg_state *dst_reg, |
---|
| 6574 | + struct bpf_reg_state *src_reg) |
---|
| 6575 | +{ |
---|
| 6576 | + s64 smin_val = src_reg->smin_value; |
---|
| 6577 | + u64 umin_val = src_reg->umin_value; |
---|
| 6578 | + u64 umax_val = src_reg->umax_value; |
---|
| 6579 | + |
---|
| 6580 | + if (smin_val < 0 || dst_reg->smin_value < 0) { |
---|
| 6581 | + /* Ain't nobody got time to multiply that sign */ |
---|
| 6582 | + __mark_reg64_unbounded(dst_reg); |
---|
| 6583 | + return; |
---|
| 6584 | + } |
---|
| 6585 | + /* Both values are positive, so we can work with unsigned and |
---|
| 6586 | + * copy the result to signed (unless it exceeds S64_MAX). |
---|
| 6587 | + */ |
---|
| 6588 | + if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) { |
---|
| 6589 | + /* Potential overflow, we know nothing */ |
---|
| 6590 | + __mark_reg64_unbounded(dst_reg); |
---|
| 6591 | + return; |
---|
| 6592 | + } |
---|
| 6593 | + dst_reg->umin_value *= umin_val; |
---|
| 6594 | + dst_reg->umax_value *= umax_val; |
---|
| 6595 | + if (dst_reg->umax_value > S64_MAX) { |
---|
| 6596 | + /* Overflow possible, we know nothing */ |
---|
| 6597 | + dst_reg->smin_value = S64_MIN; |
---|
| 6598 | + dst_reg->smax_value = S64_MAX; |
---|
| 6599 | + } else { |
---|
| 6600 | + dst_reg->smin_value = dst_reg->umin_value; |
---|
| 6601 | + dst_reg->smax_value = dst_reg->umax_value; |
---|
| 6602 | + } |
---|
| 6603 | +} |
---|
| 6604 | + |
---|
| 6605 | +static void scalar32_min_max_and(struct bpf_reg_state *dst_reg, |
---|
| 6606 | + struct bpf_reg_state *src_reg) |
---|
| 6607 | +{ |
---|
| 6608 | + bool src_known = tnum_subreg_is_const(src_reg->var_off); |
---|
| 6609 | + bool dst_known = tnum_subreg_is_const(dst_reg->var_off); |
---|
| 6610 | + struct tnum var32_off = tnum_subreg(dst_reg->var_off); |
---|
| 6611 | + s32 smin_val = src_reg->s32_min_value; |
---|
| 6612 | + u32 umax_val = src_reg->u32_max_value; |
---|
| 6613 | + |
---|
| 6614 | + if (src_known && dst_known) { |
---|
| 6615 | + __mark_reg32_known(dst_reg, var32_off.value); |
---|
| 6616 | + return; |
---|
| 6617 | + } |
---|
| 6618 | + |
---|
| 6619 | + /* We get our minimum from the var_off, since that's inherently |
---|
| 6620 | + * bitwise. Our maximum is the minimum of the operands' maxima. |
---|
| 6621 | + */ |
---|
| 6622 | + dst_reg->u32_min_value = var32_off.value; |
---|
| 6623 | + dst_reg->u32_max_value = min(dst_reg->u32_max_value, umax_val); |
---|
| 6624 | + if (dst_reg->s32_min_value < 0 || smin_val < 0) { |
---|
| 6625 | + /* Lose signed bounds when ANDing negative numbers, |
---|
| 6626 | + * ain't nobody got time for that. |
---|
| 6627 | + */ |
---|
| 6628 | + dst_reg->s32_min_value = S32_MIN; |
---|
| 6629 | + dst_reg->s32_max_value = S32_MAX; |
---|
| 6630 | + } else { |
---|
| 6631 | + /* ANDing two positives gives a positive, so safe to |
---|
| 6632 | + * cast result into s64. |
---|
| 6633 | + */ |
---|
| 6634 | + dst_reg->s32_min_value = dst_reg->u32_min_value; |
---|
| 6635 | + dst_reg->s32_max_value = dst_reg->u32_max_value; |
---|
| 6636 | + } |
---|
| 6637 | +} |
---|
| 6638 | + |
---|
| 6639 | +static void scalar_min_max_and(struct bpf_reg_state *dst_reg, |
---|
| 6640 | + struct bpf_reg_state *src_reg) |
---|
| 6641 | +{ |
---|
| 6642 | + bool src_known = tnum_is_const(src_reg->var_off); |
---|
| 6643 | + bool dst_known = tnum_is_const(dst_reg->var_off); |
---|
| 6644 | + s64 smin_val = src_reg->smin_value; |
---|
| 6645 | + u64 umax_val = src_reg->umax_value; |
---|
| 6646 | + |
---|
| 6647 | + if (src_known && dst_known) { |
---|
| 6648 | + __mark_reg_known(dst_reg, dst_reg->var_off.value); |
---|
| 6649 | + return; |
---|
| 6650 | + } |
---|
| 6651 | + |
---|
| 6652 | + /* We get our minimum from the var_off, since that's inherently |
---|
| 6653 | + * bitwise. Our maximum is the minimum of the operands' maxima. |
---|
| 6654 | + */ |
---|
| 6655 | + dst_reg->umin_value = dst_reg->var_off.value; |
---|
| 6656 | + dst_reg->umax_value = min(dst_reg->umax_value, umax_val); |
---|
| 6657 | + if (dst_reg->smin_value < 0 || smin_val < 0) { |
---|
| 6658 | + /* Lose signed bounds when ANDing negative numbers, |
---|
| 6659 | + * ain't nobody got time for that. |
---|
| 6660 | + */ |
---|
| 6661 | + dst_reg->smin_value = S64_MIN; |
---|
| 6662 | + dst_reg->smax_value = S64_MAX; |
---|
| 6663 | + } else { |
---|
| 6664 | + /* ANDing two positives gives a positive, so safe to |
---|
| 6665 | + * cast result into s64. |
---|
| 6666 | + */ |
---|
| 6667 | + dst_reg->smin_value = dst_reg->umin_value; |
---|
| 6668 | + dst_reg->smax_value = dst_reg->umax_value; |
---|
| 6669 | + } |
---|
| 6670 | + /* We may learn something more from the var_off */ |
---|
| 6671 | + __update_reg_bounds(dst_reg); |
---|
| 6672 | +} |
---|
| 6673 | + |
---|
| 6674 | +static void scalar32_min_max_or(struct bpf_reg_state *dst_reg, |
---|
| 6675 | + struct bpf_reg_state *src_reg) |
---|
| 6676 | +{ |
---|
| 6677 | + bool src_known = tnum_subreg_is_const(src_reg->var_off); |
---|
| 6678 | + bool dst_known = tnum_subreg_is_const(dst_reg->var_off); |
---|
| 6679 | + struct tnum var32_off = tnum_subreg(dst_reg->var_off); |
---|
| 6680 | + s32 smin_val = src_reg->s32_min_value; |
---|
| 6681 | + u32 umin_val = src_reg->u32_min_value; |
---|
| 6682 | + |
---|
| 6683 | + if (src_known && dst_known) { |
---|
| 6684 | + __mark_reg32_known(dst_reg, var32_off.value); |
---|
| 6685 | + return; |
---|
| 6686 | + } |
---|
| 6687 | + |
---|
| 6688 | + /* We get our maximum from the var_off, and our minimum is the |
---|
| 6689 | + * maximum of the operands' minima |
---|
| 6690 | + */ |
---|
| 6691 | + dst_reg->u32_min_value = max(dst_reg->u32_min_value, umin_val); |
---|
| 6692 | + dst_reg->u32_max_value = var32_off.value | var32_off.mask; |
---|
| 6693 | + if (dst_reg->s32_min_value < 0 || smin_val < 0) { |
---|
| 6694 | + /* Lose signed bounds when ORing negative numbers, |
---|
| 6695 | + * ain't nobody got time for that. |
---|
| 6696 | + */ |
---|
| 6697 | + dst_reg->s32_min_value = S32_MIN; |
---|
| 6698 | + dst_reg->s32_max_value = S32_MAX; |
---|
| 6699 | + } else { |
---|
| 6700 | + /* ORing two positives gives a positive, so safe to |
---|
| 6701 | + * cast result into s64. |
---|
| 6702 | + */ |
---|
| 6703 | + dst_reg->s32_min_value = dst_reg->u32_min_value; |
---|
| 6704 | + dst_reg->s32_max_value = dst_reg->u32_max_value; |
---|
| 6705 | + } |
---|
| 6706 | +} |
---|
| 6707 | + |
---|
| 6708 | +static void scalar_min_max_or(struct bpf_reg_state *dst_reg, |
---|
| 6709 | + struct bpf_reg_state *src_reg) |
---|
| 6710 | +{ |
---|
| 6711 | + bool src_known = tnum_is_const(src_reg->var_off); |
---|
| 6712 | + bool dst_known = tnum_is_const(dst_reg->var_off); |
---|
| 6713 | + s64 smin_val = src_reg->smin_value; |
---|
| 6714 | + u64 umin_val = src_reg->umin_value; |
---|
| 6715 | + |
---|
| 6716 | + if (src_known && dst_known) { |
---|
| 6717 | + __mark_reg_known(dst_reg, dst_reg->var_off.value); |
---|
| 6718 | + return; |
---|
| 6719 | + } |
---|
| 6720 | + |
---|
| 6721 | + /* We get our maximum from the var_off, and our minimum is the |
---|
| 6722 | + * maximum of the operands' minima |
---|
| 6723 | + */ |
---|
| 6724 | + dst_reg->umin_value = max(dst_reg->umin_value, umin_val); |
---|
| 6725 | + dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask; |
---|
| 6726 | + if (dst_reg->smin_value < 0 || smin_val < 0) { |
---|
| 6727 | + /* Lose signed bounds when ORing negative numbers, |
---|
| 6728 | + * ain't nobody got time for that. |
---|
| 6729 | + */ |
---|
| 6730 | + dst_reg->smin_value = S64_MIN; |
---|
| 6731 | + dst_reg->smax_value = S64_MAX; |
---|
| 6732 | + } else { |
---|
| 6733 | + /* ORing two positives gives a positive, so safe to |
---|
| 6734 | + * cast result into s64. |
---|
| 6735 | + */ |
---|
| 6736 | + dst_reg->smin_value = dst_reg->umin_value; |
---|
| 6737 | + dst_reg->smax_value = dst_reg->umax_value; |
---|
| 6738 | + } |
---|
| 6739 | + /* We may learn something more from the var_off */ |
---|
| 6740 | + __update_reg_bounds(dst_reg); |
---|
| 6741 | +} |
---|
| 6742 | + |
---|
| 6743 | +static void scalar32_min_max_xor(struct bpf_reg_state *dst_reg, |
---|
| 6744 | + struct bpf_reg_state *src_reg) |
---|
| 6745 | +{ |
---|
| 6746 | + bool src_known = tnum_subreg_is_const(src_reg->var_off); |
---|
| 6747 | + bool dst_known = tnum_subreg_is_const(dst_reg->var_off); |
---|
| 6748 | + struct tnum var32_off = tnum_subreg(dst_reg->var_off); |
---|
| 6749 | + s32 smin_val = src_reg->s32_min_value; |
---|
| 6750 | + |
---|
| 6751 | + if (src_known && dst_known) { |
---|
| 6752 | + __mark_reg32_known(dst_reg, var32_off.value); |
---|
| 6753 | + return; |
---|
| 6754 | + } |
---|
| 6755 | + |
---|
| 6756 | + /* We get both minimum and maximum from the var32_off. */ |
---|
| 6757 | + dst_reg->u32_min_value = var32_off.value; |
---|
| 6758 | + dst_reg->u32_max_value = var32_off.value | var32_off.mask; |
---|
| 6759 | + |
---|
| 6760 | + if (dst_reg->s32_min_value >= 0 && smin_val >= 0) { |
---|
| 6761 | + /* XORing two positive sign numbers gives a positive, |
---|
| 6762 | + * so safe to cast u32 result into s32. |
---|
| 6763 | + */ |
---|
| 6764 | + dst_reg->s32_min_value = dst_reg->u32_min_value; |
---|
| 6765 | + dst_reg->s32_max_value = dst_reg->u32_max_value; |
---|
| 6766 | + } else { |
---|
| 6767 | + dst_reg->s32_min_value = S32_MIN; |
---|
| 6768 | + dst_reg->s32_max_value = S32_MAX; |
---|
| 6769 | + } |
---|
| 6770 | +} |
---|
| 6771 | + |
---|
| 6772 | +static void scalar_min_max_xor(struct bpf_reg_state *dst_reg, |
---|
| 6773 | + struct bpf_reg_state *src_reg) |
---|
| 6774 | +{ |
---|
| 6775 | + bool src_known = tnum_is_const(src_reg->var_off); |
---|
| 6776 | + bool dst_known = tnum_is_const(dst_reg->var_off); |
---|
| 6777 | + s64 smin_val = src_reg->smin_value; |
---|
| 6778 | + |
---|
| 6779 | + if (src_known && dst_known) { |
---|
| 6780 | + /* dst_reg->var_off.value has been updated earlier */ |
---|
| 6781 | + __mark_reg_known(dst_reg, dst_reg->var_off.value); |
---|
| 6782 | + return; |
---|
| 6783 | + } |
---|
| 6784 | + |
---|
| 6785 | + /* We get both minimum and maximum from the var_off. */ |
---|
| 6786 | + dst_reg->umin_value = dst_reg->var_off.value; |
---|
| 6787 | + dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask; |
---|
| 6788 | + |
---|
| 6789 | + if (dst_reg->smin_value >= 0 && smin_val >= 0) { |
---|
| 6790 | + /* XORing two positive sign numbers gives a positive, |
---|
| 6791 | + * so safe to cast u64 result into s64. |
---|
| 6792 | + */ |
---|
| 6793 | + dst_reg->smin_value = dst_reg->umin_value; |
---|
| 6794 | + dst_reg->smax_value = dst_reg->umax_value; |
---|
| 6795 | + } else { |
---|
| 6796 | + dst_reg->smin_value = S64_MIN; |
---|
| 6797 | + dst_reg->smax_value = S64_MAX; |
---|
| 6798 | + } |
---|
| 6799 | + |
---|
| 6800 | + __update_reg_bounds(dst_reg); |
---|
| 6801 | +} |
---|
| 6802 | + |
---|
| 6803 | +static void __scalar32_min_max_lsh(struct bpf_reg_state *dst_reg, |
---|
| 6804 | + u64 umin_val, u64 umax_val) |
---|
| 6805 | +{ |
---|
| 6806 | + /* We lose all sign bit information (except what we can pick |
---|
| 6807 | + * up from var_off) |
---|
| 6808 | + */ |
---|
| 6809 | + dst_reg->s32_min_value = S32_MIN; |
---|
| 6810 | + dst_reg->s32_max_value = S32_MAX; |
---|
| 6811 | + /* If we might shift our top bit out, then we know nothing */ |
---|
| 6812 | + if (umax_val > 31 || dst_reg->u32_max_value > 1ULL << (31 - umax_val)) { |
---|
| 6813 | + dst_reg->u32_min_value = 0; |
---|
| 6814 | + dst_reg->u32_max_value = U32_MAX; |
---|
| 6815 | + } else { |
---|
| 6816 | + dst_reg->u32_min_value <<= umin_val; |
---|
| 6817 | + dst_reg->u32_max_value <<= umax_val; |
---|
| 6818 | + } |
---|
| 6819 | +} |
---|
| 6820 | + |
---|
| 6821 | +static void scalar32_min_max_lsh(struct bpf_reg_state *dst_reg, |
---|
| 6822 | + struct bpf_reg_state *src_reg) |
---|
| 6823 | +{ |
---|
| 6824 | + u32 umax_val = src_reg->u32_max_value; |
---|
| 6825 | + u32 umin_val = src_reg->u32_min_value; |
---|
| 6826 | + /* u32 alu operation will zext upper bits */ |
---|
| 6827 | + struct tnum subreg = tnum_subreg(dst_reg->var_off); |
---|
| 6828 | + |
---|
| 6829 | + __scalar32_min_max_lsh(dst_reg, umin_val, umax_val); |
---|
| 6830 | + dst_reg->var_off = tnum_subreg(tnum_lshift(subreg, umin_val)); |
---|
| 6831 | + /* Not required but being careful mark reg64 bounds as unknown so |
---|
| 6832 | + * that we are forced to pick them up from tnum and zext later and |
---|
| 6833 | + * if some path skips this step we are still safe. |
---|
| 6834 | + */ |
---|
| 6835 | + __mark_reg64_unbounded(dst_reg); |
---|
| 6836 | + __update_reg32_bounds(dst_reg); |
---|
| 6837 | +} |
---|
| 6838 | + |
---|
| 6839 | +static void __scalar64_min_max_lsh(struct bpf_reg_state *dst_reg, |
---|
| 6840 | + u64 umin_val, u64 umax_val) |
---|
| 6841 | +{ |
---|
| 6842 | + /* Special case <<32 because it is a common compiler pattern to sign |
---|
| 6843 | + * extend subreg by doing <<32 s>>32. In this case if 32bit bounds are |
---|
| 6844 | + * positive we know this shift will also be positive so we can track |
---|
| 6845 | + * bounds correctly. Otherwise we lose all sign bit information except |
---|
| 6846 | + * what we can pick up from var_off. Perhaps we can generalize this |
---|
| 6847 | + * later to shifts of any length. |
---|
| 6848 | + */ |
---|
| 6849 | + if (umin_val == 32 && umax_val == 32 && dst_reg->s32_max_value >= 0) |
---|
| 6850 | + dst_reg->smax_value = (s64)dst_reg->s32_max_value << 32; |
---|
| 6851 | + else |
---|
| 6852 | + dst_reg->smax_value = S64_MAX; |
---|
| 6853 | + |
---|
| 6854 | + if (umin_val == 32 && umax_val == 32 && dst_reg->s32_min_value >= 0) |
---|
| 6855 | + dst_reg->smin_value = (s64)dst_reg->s32_min_value << 32; |
---|
| 6856 | + else |
---|
| 6857 | + dst_reg->smin_value = S64_MIN; |
---|
| 6858 | + |
---|
| 6859 | + /* If we might shift our top bit out, then we know nothing */ |
---|
| 6860 | + if (dst_reg->umax_value > 1ULL << (63 - umax_val)) { |
---|
| 6861 | + dst_reg->umin_value = 0; |
---|
| 6862 | + dst_reg->umax_value = U64_MAX; |
---|
| 6863 | + } else { |
---|
| 6864 | + dst_reg->umin_value <<= umin_val; |
---|
| 6865 | + dst_reg->umax_value <<= umax_val; |
---|
| 6866 | + } |
---|
| 6867 | +} |
---|
| 6868 | + |
---|
| 6869 | +static void scalar_min_max_lsh(struct bpf_reg_state *dst_reg, |
---|
| 6870 | + struct bpf_reg_state *src_reg) |
---|
| 6871 | +{ |
---|
| 6872 | + u64 umax_val = src_reg->umax_value; |
---|
| 6873 | + u64 umin_val = src_reg->umin_value; |
---|
| 6874 | + |
---|
| 6875 | + /* scalar64 calc uses 32bit unshifted bounds so must be called first */ |
---|
| 6876 | + __scalar64_min_max_lsh(dst_reg, umin_val, umax_val); |
---|
| 6877 | + __scalar32_min_max_lsh(dst_reg, umin_val, umax_val); |
---|
| 6878 | + |
---|
| 6879 | + dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val); |
---|
| 6880 | + /* We may learn something more from the var_off */ |
---|
| 6881 | + __update_reg_bounds(dst_reg); |
---|
| 6882 | +} |
---|
| 6883 | + |
---|
| 6884 | +static void scalar32_min_max_rsh(struct bpf_reg_state *dst_reg, |
---|
| 6885 | + struct bpf_reg_state *src_reg) |
---|
| 6886 | +{ |
---|
| 6887 | + struct tnum subreg = tnum_subreg(dst_reg->var_off); |
---|
| 6888 | + u32 umax_val = src_reg->u32_max_value; |
---|
| 6889 | + u32 umin_val = src_reg->u32_min_value; |
---|
| 6890 | + |
---|
| 6891 | + /* BPF_RSH is an unsigned shift. If the value in dst_reg might |
---|
| 6892 | + * be negative, then either: |
---|
| 6893 | + * 1) src_reg might be zero, so the sign bit of the result is |
---|
| 6894 | + * unknown, so we lose our signed bounds |
---|
| 6895 | + * 2) it's known negative, thus the unsigned bounds capture the |
---|
| 6896 | + * signed bounds |
---|
| 6897 | + * 3) the signed bounds cross zero, so they tell us nothing |
---|
| 6898 | + * about the result |
---|
| 6899 | + * If the value in dst_reg is known nonnegative, then again the |
---|
| 6900 | + * unsigned bounts capture the signed bounds. |
---|
| 6901 | + * Thus, in all cases it suffices to blow away our signed bounds |
---|
| 6902 | + * and rely on inferring new ones from the unsigned bounds and |
---|
| 6903 | + * var_off of the result. |
---|
| 6904 | + */ |
---|
| 6905 | + dst_reg->s32_min_value = S32_MIN; |
---|
| 6906 | + dst_reg->s32_max_value = S32_MAX; |
---|
| 6907 | + |
---|
| 6908 | + dst_reg->var_off = tnum_rshift(subreg, umin_val); |
---|
| 6909 | + dst_reg->u32_min_value >>= umax_val; |
---|
| 6910 | + dst_reg->u32_max_value >>= umin_val; |
---|
| 6911 | + |
---|
| 6912 | + __mark_reg64_unbounded(dst_reg); |
---|
| 6913 | + __update_reg32_bounds(dst_reg); |
---|
| 6914 | +} |
---|
| 6915 | + |
---|
| 6916 | +static void scalar_min_max_rsh(struct bpf_reg_state *dst_reg, |
---|
| 6917 | + struct bpf_reg_state *src_reg) |
---|
| 6918 | +{ |
---|
| 6919 | + u64 umax_val = src_reg->umax_value; |
---|
| 6920 | + u64 umin_val = src_reg->umin_value; |
---|
| 6921 | + |
---|
| 6922 | + /* BPF_RSH is an unsigned shift. If the value in dst_reg might |
---|
| 6923 | + * be negative, then either: |
---|
| 6924 | + * 1) src_reg might be zero, so the sign bit of the result is |
---|
| 6925 | + * unknown, so we lose our signed bounds |
---|
| 6926 | + * 2) it's known negative, thus the unsigned bounds capture the |
---|
| 6927 | + * signed bounds |
---|
| 6928 | + * 3) the signed bounds cross zero, so they tell us nothing |
---|
| 6929 | + * about the result |
---|
| 6930 | + * If the value in dst_reg is known nonnegative, then again the |
---|
| 6931 | + * unsigned bounts capture the signed bounds. |
---|
| 6932 | + * Thus, in all cases it suffices to blow away our signed bounds |
---|
| 6933 | + * and rely on inferring new ones from the unsigned bounds and |
---|
| 6934 | + * var_off of the result. |
---|
| 6935 | + */ |
---|
| 6936 | + dst_reg->smin_value = S64_MIN; |
---|
| 6937 | + dst_reg->smax_value = S64_MAX; |
---|
| 6938 | + dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val); |
---|
| 6939 | + dst_reg->umin_value >>= umax_val; |
---|
| 6940 | + dst_reg->umax_value >>= umin_val; |
---|
| 6941 | + |
---|
| 6942 | + /* Its not easy to operate on alu32 bounds here because it depends |
---|
| 6943 | + * on bits being shifted in. Take easy way out and mark unbounded |
---|
| 6944 | + * so we can recalculate later from tnum. |
---|
| 6945 | + */ |
---|
| 6946 | + __mark_reg32_unbounded(dst_reg); |
---|
| 6947 | + __update_reg_bounds(dst_reg); |
---|
| 6948 | +} |
---|
| 6949 | + |
---|
| 6950 | +static void scalar32_min_max_arsh(struct bpf_reg_state *dst_reg, |
---|
| 6951 | + struct bpf_reg_state *src_reg) |
---|
| 6952 | +{ |
---|
| 6953 | + u64 umin_val = src_reg->u32_min_value; |
---|
| 6954 | + |
---|
| 6955 | + /* Upon reaching here, src_known is true and |
---|
| 6956 | + * umax_val is equal to umin_val. |
---|
| 6957 | + */ |
---|
| 6958 | + dst_reg->s32_min_value = (u32)(((s32)dst_reg->s32_min_value) >> umin_val); |
---|
| 6959 | + dst_reg->s32_max_value = (u32)(((s32)dst_reg->s32_max_value) >> umin_val); |
---|
| 6960 | + |
---|
| 6961 | + dst_reg->var_off = tnum_arshift(tnum_subreg(dst_reg->var_off), umin_val, 32); |
---|
| 6962 | + |
---|
| 6963 | + /* blow away the dst_reg umin_value/umax_value and rely on |
---|
| 6964 | + * dst_reg var_off to refine the result. |
---|
| 6965 | + */ |
---|
| 6966 | + dst_reg->u32_min_value = 0; |
---|
| 6967 | + dst_reg->u32_max_value = U32_MAX; |
---|
| 6968 | + |
---|
| 6969 | + __mark_reg64_unbounded(dst_reg); |
---|
| 6970 | + __update_reg32_bounds(dst_reg); |
---|
| 6971 | +} |
---|
| 6972 | + |
---|
| 6973 | +static void scalar_min_max_arsh(struct bpf_reg_state *dst_reg, |
---|
| 6974 | + struct bpf_reg_state *src_reg) |
---|
| 6975 | +{ |
---|
| 6976 | + u64 umin_val = src_reg->umin_value; |
---|
| 6977 | + |
---|
| 6978 | + /* Upon reaching here, src_known is true and umax_val is equal |
---|
| 6979 | + * to umin_val. |
---|
| 6980 | + */ |
---|
| 6981 | + dst_reg->smin_value >>= umin_val; |
---|
| 6982 | + dst_reg->smax_value >>= umin_val; |
---|
| 6983 | + |
---|
| 6984 | + dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val, 64); |
---|
| 6985 | + |
---|
| 6986 | + /* blow away the dst_reg umin_value/umax_value and rely on |
---|
| 6987 | + * dst_reg var_off to refine the result. |
---|
| 6988 | + */ |
---|
| 6989 | + dst_reg->umin_value = 0; |
---|
| 6990 | + dst_reg->umax_value = U64_MAX; |
---|
| 6991 | + |
---|
| 6992 | + /* Its not easy to operate on alu32 bounds here because it depends |
---|
| 6993 | + * on bits being shifted in from upper 32-bits. Take easy way out |
---|
| 6994 | + * and mark unbounded so we can recalculate later from tnum. |
---|
| 6995 | + */ |
---|
| 6996 | + __mark_reg32_unbounded(dst_reg); |
---|
| 6997 | + __update_reg_bounds(dst_reg); |
---|
3221 | 6998 | } |
---|
3222 | 6999 | |
---|
3223 | 7000 | /* WARNING: This function does calculations on 64-bit values, but the actual |
---|
.. | .. |
---|
3231 | 7008 | { |
---|
3232 | 7009 | struct bpf_reg_state *regs = cur_regs(env); |
---|
3233 | 7010 | u8 opcode = BPF_OP(insn->code); |
---|
3234 | | - bool src_known, dst_known; |
---|
| 7011 | + bool src_known; |
---|
3235 | 7012 | s64 smin_val, smax_val; |
---|
3236 | 7013 | u64 umin_val, umax_val; |
---|
| 7014 | + s32 s32_min_val, s32_max_val; |
---|
| 7015 | + u32 u32_min_val, u32_max_val; |
---|
3237 | 7016 | u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32; |
---|
| 7017 | + bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64); |
---|
3238 | 7018 | int ret; |
---|
3239 | | - |
---|
3240 | | - if (insn_bitness == 32) { |
---|
3241 | | - /* Relevant for 32-bit RSH: Information can propagate towards |
---|
3242 | | - * LSB, so it isn't sufficient to only truncate the output to |
---|
3243 | | - * 32 bits. |
---|
3244 | | - */ |
---|
3245 | | - coerce_reg_to_size(dst_reg, 4); |
---|
3246 | | - coerce_reg_to_size(&src_reg, 4); |
---|
3247 | | - } |
---|
3248 | 7019 | |
---|
3249 | 7020 | smin_val = src_reg.smin_value; |
---|
3250 | 7021 | smax_val = src_reg.smax_value; |
---|
3251 | 7022 | umin_val = src_reg.umin_value; |
---|
3252 | 7023 | umax_val = src_reg.umax_value; |
---|
3253 | | - src_known = tnum_is_const(src_reg.var_off); |
---|
3254 | | - dst_known = tnum_is_const(dst_reg->var_off); |
---|
3255 | 7024 | |
---|
3256 | | - if ((src_known && (smin_val != smax_val || umin_val != umax_val)) || |
---|
3257 | | - smin_val > smax_val || umin_val > umax_val) { |
---|
3258 | | - /* Taint dst register if offset had invalid bounds derived from |
---|
3259 | | - * e.g. dead branches. |
---|
3260 | | - */ |
---|
3261 | | - __mark_reg_unknown(dst_reg); |
---|
3262 | | - return 0; |
---|
| 7025 | + s32_min_val = src_reg.s32_min_value; |
---|
| 7026 | + s32_max_val = src_reg.s32_max_value; |
---|
| 7027 | + u32_min_val = src_reg.u32_min_value; |
---|
| 7028 | + u32_max_val = src_reg.u32_max_value; |
---|
| 7029 | + |
---|
| 7030 | + if (alu32) { |
---|
| 7031 | + src_known = tnum_subreg_is_const(src_reg.var_off); |
---|
| 7032 | + if ((src_known && |
---|
| 7033 | + (s32_min_val != s32_max_val || u32_min_val != u32_max_val)) || |
---|
| 7034 | + s32_min_val > s32_max_val || u32_min_val > u32_max_val) { |
---|
| 7035 | + /* Taint dst register if offset had invalid bounds |
---|
| 7036 | + * derived from e.g. dead branches. |
---|
| 7037 | + */ |
---|
| 7038 | + __mark_reg_unknown(env, dst_reg); |
---|
| 7039 | + return 0; |
---|
| 7040 | + } |
---|
| 7041 | + } else { |
---|
| 7042 | + src_known = tnum_is_const(src_reg.var_off); |
---|
| 7043 | + if ((src_known && |
---|
| 7044 | + (smin_val != smax_val || umin_val != umax_val)) || |
---|
| 7045 | + smin_val > smax_val || umin_val > umax_val) { |
---|
| 7046 | + /* Taint dst register if offset had invalid bounds |
---|
| 7047 | + * derived from e.g. dead branches. |
---|
| 7048 | + */ |
---|
| 7049 | + __mark_reg_unknown(env, dst_reg); |
---|
| 7050 | + return 0; |
---|
| 7051 | + } |
---|
3263 | 7052 | } |
---|
3264 | 7053 | |
---|
3265 | 7054 | if (!src_known && |
---|
3266 | 7055 | opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) { |
---|
3267 | | - __mark_reg_unknown(dst_reg); |
---|
| 7056 | + __mark_reg_unknown(env, dst_reg); |
---|
3268 | 7057 | return 0; |
---|
3269 | 7058 | } |
---|
3270 | 7059 | |
---|
.. | .. |
---|
3274 | 7063 | return sanitize_err(env, insn, ret, NULL, NULL); |
---|
3275 | 7064 | } |
---|
3276 | 7065 | |
---|
| 7066 | + /* Calculate sign/unsigned bounds and tnum for alu32 and alu64 bit ops. |
---|
| 7067 | + * There are two classes of instructions: The first class we track both |
---|
| 7068 | + * alu32 and alu64 sign/unsigned bounds independently this provides the |
---|
| 7069 | + * greatest amount of precision when alu operations are mixed with jmp32 |
---|
| 7070 | + * operations. These operations are BPF_ADD, BPF_SUB, BPF_MUL, BPF_ADD, |
---|
| 7071 | + * and BPF_OR. This is possible because these ops have fairly easy to |
---|
| 7072 | + * understand and calculate behavior in both 32-bit and 64-bit alu ops. |
---|
| 7073 | + * See alu32 verifier tests for examples. The second class of |
---|
| 7074 | + * operations, BPF_LSH, BPF_RSH, and BPF_ARSH, however are not so easy |
---|
| 7075 | + * with regards to tracking sign/unsigned bounds because the bits may |
---|
| 7076 | + * cross subreg boundaries in the alu64 case. When this happens we mark |
---|
| 7077 | + * the reg unbounded in the subreg bound space and use the resulting |
---|
| 7078 | + * tnum to calculate an approximation of the sign/unsigned bounds. |
---|
| 7079 | + */ |
---|
3277 | 7080 | switch (opcode) { |
---|
3278 | 7081 | case BPF_ADD: |
---|
3279 | | - if (signed_add_overflows(dst_reg->smin_value, smin_val) || |
---|
3280 | | - signed_add_overflows(dst_reg->smax_value, smax_val)) { |
---|
3281 | | - dst_reg->smin_value = S64_MIN; |
---|
3282 | | - dst_reg->smax_value = S64_MAX; |
---|
3283 | | - } else { |
---|
3284 | | - dst_reg->smin_value += smin_val; |
---|
3285 | | - dst_reg->smax_value += smax_val; |
---|
3286 | | - } |
---|
3287 | | - if (dst_reg->umin_value + umin_val < umin_val || |
---|
3288 | | - dst_reg->umax_value + umax_val < umax_val) { |
---|
3289 | | - dst_reg->umin_value = 0; |
---|
3290 | | - dst_reg->umax_value = U64_MAX; |
---|
3291 | | - } else { |
---|
3292 | | - dst_reg->umin_value += umin_val; |
---|
3293 | | - dst_reg->umax_value += umax_val; |
---|
3294 | | - } |
---|
| 7082 | + scalar32_min_max_add(dst_reg, &src_reg); |
---|
| 7083 | + scalar_min_max_add(dst_reg, &src_reg); |
---|
3295 | 7084 | dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off); |
---|
3296 | 7085 | break; |
---|
3297 | 7086 | case BPF_SUB: |
---|
3298 | | - if (signed_sub_overflows(dst_reg->smin_value, smax_val) || |
---|
3299 | | - signed_sub_overflows(dst_reg->smax_value, smin_val)) { |
---|
3300 | | - /* Overflow possible, we know nothing */ |
---|
3301 | | - dst_reg->smin_value = S64_MIN; |
---|
3302 | | - dst_reg->smax_value = S64_MAX; |
---|
3303 | | - } else { |
---|
3304 | | - dst_reg->smin_value -= smax_val; |
---|
3305 | | - dst_reg->smax_value -= smin_val; |
---|
3306 | | - } |
---|
3307 | | - if (dst_reg->umin_value < umax_val) { |
---|
3308 | | - /* Overflow possible, we know nothing */ |
---|
3309 | | - dst_reg->umin_value = 0; |
---|
3310 | | - dst_reg->umax_value = U64_MAX; |
---|
3311 | | - } else { |
---|
3312 | | - /* Cannot overflow (as long as bounds are consistent) */ |
---|
3313 | | - dst_reg->umin_value -= umax_val; |
---|
3314 | | - dst_reg->umax_value -= umin_val; |
---|
3315 | | - } |
---|
| 7087 | + scalar32_min_max_sub(dst_reg, &src_reg); |
---|
| 7088 | + scalar_min_max_sub(dst_reg, &src_reg); |
---|
3316 | 7089 | dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off); |
---|
3317 | 7090 | break; |
---|
3318 | 7091 | case BPF_MUL: |
---|
3319 | 7092 | dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off); |
---|
3320 | | - if (smin_val < 0 || dst_reg->smin_value < 0) { |
---|
3321 | | - /* Ain't nobody got time to multiply that sign */ |
---|
3322 | | - __mark_reg_unbounded(dst_reg); |
---|
3323 | | - __update_reg_bounds(dst_reg); |
---|
3324 | | - break; |
---|
3325 | | - } |
---|
3326 | | - /* Both values are positive, so we can work with unsigned and |
---|
3327 | | - * copy the result to signed (unless it exceeds S64_MAX). |
---|
3328 | | - */ |
---|
3329 | | - if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) { |
---|
3330 | | - /* Potential overflow, we know nothing */ |
---|
3331 | | - __mark_reg_unbounded(dst_reg); |
---|
3332 | | - /* (except what we can learn from the var_off) */ |
---|
3333 | | - __update_reg_bounds(dst_reg); |
---|
3334 | | - break; |
---|
3335 | | - } |
---|
3336 | | - dst_reg->umin_value *= umin_val; |
---|
3337 | | - dst_reg->umax_value *= umax_val; |
---|
3338 | | - if (dst_reg->umax_value > S64_MAX) { |
---|
3339 | | - /* Overflow possible, we know nothing */ |
---|
3340 | | - dst_reg->smin_value = S64_MIN; |
---|
3341 | | - dst_reg->smax_value = S64_MAX; |
---|
3342 | | - } else { |
---|
3343 | | - dst_reg->smin_value = dst_reg->umin_value; |
---|
3344 | | - dst_reg->smax_value = dst_reg->umax_value; |
---|
3345 | | - } |
---|
| 7093 | + scalar32_min_max_mul(dst_reg, &src_reg); |
---|
| 7094 | + scalar_min_max_mul(dst_reg, &src_reg); |
---|
3346 | 7095 | break; |
---|
3347 | 7096 | case BPF_AND: |
---|
3348 | | - if (src_known && dst_known) { |
---|
3349 | | - __mark_reg_known(dst_reg, dst_reg->var_off.value & |
---|
3350 | | - src_reg.var_off.value); |
---|
3351 | | - break; |
---|
3352 | | - } |
---|
3353 | | - /* We get our minimum from the var_off, since that's inherently |
---|
3354 | | - * bitwise. Our maximum is the minimum of the operands' maxima. |
---|
3355 | | - */ |
---|
3356 | 7097 | dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off); |
---|
3357 | | - dst_reg->umin_value = dst_reg->var_off.value; |
---|
3358 | | - dst_reg->umax_value = min(dst_reg->umax_value, umax_val); |
---|
3359 | | - if (dst_reg->smin_value < 0 || smin_val < 0) { |
---|
3360 | | - /* Lose signed bounds when ANDing negative numbers, |
---|
3361 | | - * ain't nobody got time for that. |
---|
3362 | | - */ |
---|
3363 | | - dst_reg->smin_value = S64_MIN; |
---|
3364 | | - dst_reg->smax_value = S64_MAX; |
---|
3365 | | - } else { |
---|
3366 | | - /* ANDing two positives gives a positive, so safe to |
---|
3367 | | - * cast result into s64. |
---|
3368 | | - */ |
---|
3369 | | - dst_reg->smin_value = dst_reg->umin_value; |
---|
3370 | | - dst_reg->smax_value = dst_reg->umax_value; |
---|
3371 | | - } |
---|
3372 | | - /* We may learn something more from the var_off */ |
---|
3373 | | - __update_reg_bounds(dst_reg); |
---|
| 7098 | + scalar32_min_max_and(dst_reg, &src_reg); |
---|
| 7099 | + scalar_min_max_and(dst_reg, &src_reg); |
---|
3374 | 7100 | break; |
---|
3375 | 7101 | case BPF_OR: |
---|
3376 | | - if (src_known && dst_known) { |
---|
3377 | | - __mark_reg_known(dst_reg, dst_reg->var_off.value | |
---|
3378 | | - src_reg.var_off.value); |
---|
3379 | | - break; |
---|
3380 | | - } |
---|
3381 | | - /* We get our maximum from the var_off, and our minimum is the |
---|
3382 | | - * maximum of the operands' minima |
---|
3383 | | - */ |
---|
3384 | 7102 | dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off); |
---|
3385 | | - dst_reg->umin_value = max(dst_reg->umin_value, umin_val); |
---|
3386 | | - dst_reg->umax_value = dst_reg->var_off.value | |
---|
3387 | | - dst_reg->var_off.mask; |
---|
3388 | | - if (dst_reg->smin_value < 0 || smin_val < 0) { |
---|
3389 | | - /* Lose signed bounds when ORing negative numbers, |
---|
3390 | | - * ain't nobody got time for that. |
---|
3391 | | - */ |
---|
3392 | | - dst_reg->smin_value = S64_MIN; |
---|
3393 | | - dst_reg->smax_value = S64_MAX; |
---|
3394 | | - } else { |
---|
3395 | | - /* ORing two positives gives a positive, so safe to |
---|
3396 | | - * cast result into s64. |
---|
3397 | | - */ |
---|
3398 | | - dst_reg->smin_value = dst_reg->umin_value; |
---|
3399 | | - dst_reg->smax_value = dst_reg->umax_value; |
---|
3400 | | - } |
---|
3401 | | - /* We may learn something more from the var_off */ |
---|
3402 | | - __update_reg_bounds(dst_reg); |
---|
| 7103 | + scalar32_min_max_or(dst_reg, &src_reg); |
---|
| 7104 | + scalar_min_max_or(dst_reg, &src_reg); |
---|
| 7105 | + break; |
---|
| 7106 | + case BPF_XOR: |
---|
| 7107 | + dst_reg->var_off = tnum_xor(dst_reg->var_off, src_reg.var_off); |
---|
| 7108 | + scalar32_min_max_xor(dst_reg, &src_reg); |
---|
| 7109 | + scalar_min_max_xor(dst_reg, &src_reg); |
---|
3403 | 7110 | break; |
---|
3404 | 7111 | case BPF_LSH: |
---|
3405 | 7112 | if (umax_val >= insn_bitness) { |
---|
.. | .. |
---|
3409 | 7116 | mark_reg_unknown(env, regs, insn->dst_reg); |
---|
3410 | 7117 | break; |
---|
3411 | 7118 | } |
---|
3412 | | - /* We lose all sign bit information (except what we can pick |
---|
3413 | | - * up from var_off) |
---|
3414 | | - */ |
---|
3415 | | - dst_reg->smin_value = S64_MIN; |
---|
3416 | | - dst_reg->smax_value = S64_MAX; |
---|
3417 | | - /* If we might shift our top bit out, then we know nothing */ |
---|
3418 | | - if (dst_reg->umax_value > 1ULL << (63 - umax_val)) { |
---|
3419 | | - dst_reg->umin_value = 0; |
---|
3420 | | - dst_reg->umax_value = U64_MAX; |
---|
3421 | | - } else { |
---|
3422 | | - dst_reg->umin_value <<= umin_val; |
---|
3423 | | - dst_reg->umax_value <<= umax_val; |
---|
3424 | | - } |
---|
3425 | | - dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val); |
---|
3426 | | - /* We may learn something more from the var_off */ |
---|
3427 | | - __update_reg_bounds(dst_reg); |
---|
| 7119 | + if (alu32) |
---|
| 7120 | + scalar32_min_max_lsh(dst_reg, &src_reg); |
---|
| 7121 | + else |
---|
| 7122 | + scalar_min_max_lsh(dst_reg, &src_reg); |
---|
3428 | 7123 | break; |
---|
3429 | 7124 | case BPF_RSH: |
---|
3430 | 7125 | if (umax_val >= insn_bitness) { |
---|
.. | .. |
---|
3434 | 7129 | mark_reg_unknown(env, regs, insn->dst_reg); |
---|
3435 | 7130 | break; |
---|
3436 | 7131 | } |
---|
3437 | | - /* BPF_RSH is an unsigned shift. If the value in dst_reg might |
---|
3438 | | - * be negative, then either: |
---|
3439 | | - * 1) src_reg might be zero, so the sign bit of the result is |
---|
3440 | | - * unknown, so we lose our signed bounds |
---|
3441 | | - * 2) it's known negative, thus the unsigned bounds capture the |
---|
3442 | | - * signed bounds |
---|
3443 | | - * 3) the signed bounds cross zero, so they tell us nothing |
---|
3444 | | - * about the result |
---|
3445 | | - * If the value in dst_reg is known nonnegative, then again the |
---|
3446 | | - * unsigned bounts capture the signed bounds. |
---|
3447 | | - * Thus, in all cases it suffices to blow away our signed bounds |
---|
3448 | | - * and rely on inferring new ones from the unsigned bounds and |
---|
3449 | | - * var_off of the result. |
---|
3450 | | - */ |
---|
3451 | | - dst_reg->smin_value = S64_MIN; |
---|
3452 | | - dst_reg->smax_value = S64_MAX; |
---|
3453 | | - dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val); |
---|
3454 | | - dst_reg->umin_value >>= umax_val; |
---|
3455 | | - dst_reg->umax_value >>= umin_val; |
---|
3456 | | - /* We may learn something more from the var_off */ |
---|
3457 | | - __update_reg_bounds(dst_reg); |
---|
| 7132 | + if (alu32) |
---|
| 7133 | + scalar32_min_max_rsh(dst_reg, &src_reg); |
---|
| 7134 | + else |
---|
| 7135 | + scalar_min_max_rsh(dst_reg, &src_reg); |
---|
3458 | 7136 | break; |
---|
3459 | 7137 | case BPF_ARSH: |
---|
3460 | 7138 | if (umax_val >= insn_bitness) { |
---|
.. | .. |
---|
3464 | 7142 | mark_reg_unknown(env, regs, insn->dst_reg); |
---|
3465 | 7143 | break; |
---|
3466 | 7144 | } |
---|
3467 | | - |
---|
3468 | | - /* Upon reaching here, src_known is true and |
---|
3469 | | - * umax_val is equal to umin_val. |
---|
3470 | | - */ |
---|
3471 | | - if (insn_bitness == 32) { |
---|
3472 | | - dst_reg->smin_value = (u32)(((s32)dst_reg->smin_value) >> umin_val); |
---|
3473 | | - dst_reg->smax_value = (u32)(((s32)dst_reg->smax_value) >> umin_val); |
---|
3474 | | - } else { |
---|
3475 | | - dst_reg->smin_value >>= umin_val; |
---|
3476 | | - dst_reg->smax_value >>= umin_val; |
---|
3477 | | - } |
---|
3478 | | - |
---|
3479 | | - dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val, |
---|
3480 | | - insn_bitness); |
---|
3481 | | - |
---|
3482 | | - /* blow away the dst_reg umin_value/umax_value and rely on |
---|
3483 | | - * dst_reg var_off to refine the result. |
---|
3484 | | - */ |
---|
3485 | | - dst_reg->umin_value = 0; |
---|
3486 | | - dst_reg->umax_value = U64_MAX; |
---|
3487 | | - __update_reg_bounds(dst_reg); |
---|
| 7145 | + if (alu32) |
---|
| 7146 | + scalar32_min_max_arsh(dst_reg, &src_reg); |
---|
| 7147 | + else |
---|
| 7148 | + scalar_min_max_arsh(dst_reg, &src_reg); |
---|
3488 | 7149 | break; |
---|
3489 | 7150 | default: |
---|
3490 | 7151 | mark_reg_unknown(env, regs, insn->dst_reg); |
---|
3491 | 7152 | break; |
---|
3492 | 7153 | } |
---|
3493 | 7154 | |
---|
3494 | | - if (BPF_CLASS(insn->code) != BPF_ALU64) { |
---|
3495 | | - /* 32-bit ALU ops are (32,32)->32 */ |
---|
3496 | | - coerce_reg_to_size(dst_reg, 4); |
---|
3497 | | - } |
---|
3498 | | - |
---|
3499 | | - __reg_deduce_bounds(dst_reg); |
---|
3500 | | - __reg_bound_offset(dst_reg); |
---|
| 7155 | + /* ALU32 ops are zero extended into 64bit register */ |
---|
| 7156 | + if (alu32) |
---|
| 7157 | + zext_32_to_64(dst_reg); |
---|
| 7158 | + reg_bounds_sync(dst_reg); |
---|
3501 | 7159 | return 0; |
---|
3502 | 7160 | } |
---|
3503 | 7161 | |
---|
.. | .. |
---|
3512 | 7170 | struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg; |
---|
3513 | 7171 | struct bpf_reg_state *ptr_reg = NULL, off_reg = {0}; |
---|
3514 | 7172 | u8 opcode = BPF_OP(insn->code); |
---|
| 7173 | + int err; |
---|
3515 | 7174 | |
---|
3516 | 7175 | dst_reg = ®s[insn->dst_reg]; |
---|
3517 | 7176 | src_reg = NULL; |
---|
3518 | 7177 | if (dst_reg->type != SCALAR_VALUE) |
---|
3519 | 7178 | ptr_reg = dst_reg; |
---|
| 7179 | + else |
---|
| 7180 | + /* Make sure ID is cleared otherwise dst_reg min/max could be |
---|
| 7181 | + * incorrectly propagated into other registers by find_equal_scalars() |
---|
| 7182 | + */ |
---|
| 7183 | + dst_reg->id = 0; |
---|
3520 | 7184 | if (BPF_SRC(insn->code) == BPF_X) { |
---|
3521 | 7185 | src_reg = ®s[insn->src_reg]; |
---|
3522 | 7186 | if (src_reg->type != SCALAR_VALUE) { |
---|
.. | .. |
---|
3538 | 7202 | * This is legal, but we have to reverse our |
---|
3539 | 7203 | * src/dest handling in computing the range |
---|
3540 | 7204 | */ |
---|
| 7205 | + err = mark_chain_precision(env, insn->dst_reg); |
---|
| 7206 | + if (err) |
---|
| 7207 | + return err; |
---|
3541 | 7208 | return adjust_ptr_min_max_vals(env, insn, |
---|
3542 | 7209 | src_reg, dst_reg); |
---|
3543 | 7210 | } |
---|
3544 | 7211 | } else if (ptr_reg) { |
---|
3545 | 7212 | /* pointer += scalar */ |
---|
| 7213 | + err = mark_chain_precision(env, insn->src_reg); |
---|
| 7214 | + if (err) |
---|
| 7215 | + return err; |
---|
3546 | 7216 | return adjust_ptr_min_max_vals(env, insn, |
---|
3547 | 7217 | dst_reg, src_reg); |
---|
| 7218 | + } else if (dst_reg->precise) { |
---|
| 7219 | + /* if dst_reg is precise, src_reg should be precise as well */ |
---|
| 7220 | + err = mark_chain_precision(env, insn->src_reg); |
---|
| 7221 | + if (err) |
---|
| 7222 | + return err; |
---|
3548 | 7223 | } |
---|
3549 | 7224 | } else { |
---|
3550 | 7225 | /* Pretend the src is a reg with a known value, since we only |
---|
.. | .. |
---|
3644 | 7319 | /* case: R1 = R2 |
---|
3645 | 7320 | * copy register state to dest reg |
---|
3646 | 7321 | */ |
---|
3647 | | - *dst_reg = *src_reg; |
---|
| 7322 | + if (src_reg->type == SCALAR_VALUE && !src_reg->id) |
---|
| 7323 | + /* Assign src and dst registers the same ID |
---|
| 7324 | + * that will be used by find_equal_scalars() |
---|
| 7325 | + * to propagate min/max range. |
---|
| 7326 | + */ |
---|
| 7327 | + src_reg->id = ++env->id_gen; |
---|
| 7328 | + copy_register_state(dst_reg, src_reg); |
---|
3648 | 7329 | dst_reg->live |= REG_LIVE_WRITTEN; |
---|
| 7330 | + dst_reg->subreg_def = DEF_NOT_SUBREG; |
---|
3649 | 7331 | } else { |
---|
3650 | 7332 | /* R1 = (u32) R2 */ |
---|
3651 | 7333 | if (is_pointer_value(env, insn->src_reg)) { |
---|
.. | .. |
---|
3654 | 7336 | insn->src_reg); |
---|
3655 | 7337 | return -EACCES; |
---|
3656 | 7338 | } else if (src_reg->type == SCALAR_VALUE) { |
---|
3657 | | - *dst_reg = *src_reg; |
---|
| 7339 | + copy_register_state(dst_reg, src_reg); |
---|
| 7340 | + /* Make sure ID is cleared otherwise |
---|
| 7341 | + * dst_reg min/max could be incorrectly |
---|
| 7342 | + * propagated into src_reg by find_equal_scalars() |
---|
| 7343 | + */ |
---|
| 7344 | + dst_reg->id = 0; |
---|
3658 | 7345 | dst_reg->live |= REG_LIVE_WRITTEN; |
---|
| 7346 | + dst_reg->subreg_def = env->insn_idx + 1; |
---|
3659 | 7347 | } else { |
---|
3660 | 7348 | mark_reg_unknown(env, regs, |
---|
3661 | 7349 | insn->dst_reg); |
---|
3662 | 7350 | } |
---|
3663 | | - coerce_reg_to_size(dst_reg, 4); |
---|
| 7351 | + zext_32_to_64(dst_reg); |
---|
| 7352 | + reg_bounds_sync(dst_reg); |
---|
3664 | 7353 | } |
---|
3665 | 7354 | } else { |
---|
3666 | 7355 | /* case: R = imm |
---|
.. | .. |
---|
3711 | 7400 | return -EINVAL; |
---|
3712 | 7401 | } |
---|
3713 | 7402 | |
---|
3714 | | - if (opcode == BPF_ARSH && BPF_CLASS(insn->code) != BPF_ALU64) { |
---|
3715 | | - verbose(env, "BPF_ARSH not supported for 32 bit ALU\n"); |
---|
3716 | | - return -EINVAL; |
---|
3717 | | - } |
---|
3718 | | - |
---|
3719 | 7403 | if ((opcode == BPF_LSH || opcode == BPF_RSH || |
---|
3720 | 7404 | opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) { |
---|
3721 | 7405 | int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32; |
---|
.. | .. |
---|
3742 | 7426 | enum bpf_reg_type type, |
---|
3743 | 7427 | bool range_right_open) |
---|
3744 | 7428 | { |
---|
3745 | | - struct bpf_func_state *state = vstate->frame[vstate->curframe]; |
---|
3746 | | - struct bpf_reg_state *regs = state->regs, *reg; |
---|
3747 | | - u16 new_range; |
---|
3748 | | - int i, j; |
---|
| 7429 | + struct bpf_func_state *state; |
---|
| 7430 | + struct bpf_reg_state *reg; |
---|
| 7431 | + int new_range; |
---|
3749 | 7432 | |
---|
3750 | 7433 | if (dst_reg->off < 0 || |
---|
3751 | 7434 | (dst_reg->off == 0 && range_right_open)) |
---|
.. | .. |
---|
3810 | 7493 | * the range won't allow anything. |
---|
3811 | 7494 | * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16. |
---|
3812 | 7495 | */ |
---|
3813 | | - for (i = 0; i < MAX_BPF_REG; i++) |
---|
3814 | | - if (regs[i].type == type && regs[i].id == dst_reg->id) |
---|
| 7496 | + bpf_for_each_reg_in_vstate(vstate, state, reg, ({ |
---|
| 7497 | + if (reg->type == type && reg->id == dst_reg->id) |
---|
3815 | 7498 | /* keep the maximum range already checked */ |
---|
3816 | | - regs[i].range = max(regs[i].range, new_range); |
---|
3817 | | - |
---|
3818 | | - for (j = 0; j <= vstate->curframe; j++) { |
---|
3819 | | - state = vstate->frame[j]; |
---|
3820 | | - for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { |
---|
3821 | | - if (state->stack[i].slot_type[0] != STACK_SPILL) |
---|
3822 | | - continue; |
---|
3823 | | - reg = &state->stack[i].spilled_ptr; |
---|
3824 | | - if (reg->type == type && reg->id == dst_reg->id) |
---|
3825 | | - reg->range = max(reg->range, new_range); |
---|
3826 | | - } |
---|
3827 | | - } |
---|
| 7499 | + reg->range = max(reg->range, new_range); |
---|
| 7500 | + })); |
---|
3828 | 7501 | } |
---|
3829 | 7502 | |
---|
3830 | | -/* compute branch direction of the expression "if (reg opcode val) goto target;" |
---|
3831 | | - * and return: |
---|
3832 | | - * 1 - branch will be taken and "goto target" will be executed |
---|
3833 | | - * 0 - branch will not be taken and fall-through to next insn |
---|
3834 | | - * -1 - unknown. Example: "if (reg < 5)" is unknown when register value range [0,10] |
---|
3835 | | - */ |
---|
3836 | | -static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode) |
---|
| 7503 | +static int is_branch32_taken(struct bpf_reg_state *reg, u32 val, u8 opcode) |
---|
3837 | 7504 | { |
---|
3838 | | - if (__is_pointer_value(false, reg)) |
---|
3839 | | - return -1; |
---|
| 7505 | + struct tnum subreg = tnum_subreg(reg->var_off); |
---|
| 7506 | + s32 sval = (s32)val; |
---|
| 7507 | + |
---|
| 7508 | + switch (opcode) { |
---|
| 7509 | + case BPF_JEQ: |
---|
| 7510 | + if (tnum_is_const(subreg)) |
---|
| 7511 | + return !!tnum_equals_const(subreg, val); |
---|
| 7512 | + break; |
---|
| 7513 | + case BPF_JNE: |
---|
| 7514 | + if (tnum_is_const(subreg)) |
---|
| 7515 | + return !tnum_equals_const(subreg, val); |
---|
| 7516 | + break; |
---|
| 7517 | + case BPF_JSET: |
---|
| 7518 | + if ((~subreg.mask & subreg.value) & val) |
---|
| 7519 | + return 1; |
---|
| 7520 | + if (!((subreg.mask | subreg.value) & val)) |
---|
| 7521 | + return 0; |
---|
| 7522 | + break; |
---|
| 7523 | + case BPF_JGT: |
---|
| 7524 | + if (reg->u32_min_value > val) |
---|
| 7525 | + return 1; |
---|
| 7526 | + else if (reg->u32_max_value <= val) |
---|
| 7527 | + return 0; |
---|
| 7528 | + break; |
---|
| 7529 | + case BPF_JSGT: |
---|
| 7530 | + if (reg->s32_min_value > sval) |
---|
| 7531 | + return 1; |
---|
| 7532 | + else if (reg->s32_max_value <= sval) |
---|
| 7533 | + return 0; |
---|
| 7534 | + break; |
---|
| 7535 | + case BPF_JLT: |
---|
| 7536 | + if (reg->u32_max_value < val) |
---|
| 7537 | + return 1; |
---|
| 7538 | + else if (reg->u32_min_value >= val) |
---|
| 7539 | + return 0; |
---|
| 7540 | + break; |
---|
| 7541 | + case BPF_JSLT: |
---|
| 7542 | + if (reg->s32_max_value < sval) |
---|
| 7543 | + return 1; |
---|
| 7544 | + else if (reg->s32_min_value >= sval) |
---|
| 7545 | + return 0; |
---|
| 7546 | + break; |
---|
| 7547 | + case BPF_JGE: |
---|
| 7548 | + if (reg->u32_min_value >= val) |
---|
| 7549 | + return 1; |
---|
| 7550 | + else if (reg->u32_max_value < val) |
---|
| 7551 | + return 0; |
---|
| 7552 | + break; |
---|
| 7553 | + case BPF_JSGE: |
---|
| 7554 | + if (reg->s32_min_value >= sval) |
---|
| 7555 | + return 1; |
---|
| 7556 | + else if (reg->s32_max_value < sval) |
---|
| 7557 | + return 0; |
---|
| 7558 | + break; |
---|
| 7559 | + case BPF_JLE: |
---|
| 7560 | + if (reg->u32_max_value <= val) |
---|
| 7561 | + return 1; |
---|
| 7562 | + else if (reg->u32_min_value > val) |
---|
| 7563 | + return 0; |
---|
| 7564 | + break; |
---|
| 7565 | + case BPF_JSLE: |
---|
| 7566 | + if (reg->s32_max_value <= sval) |
---|
| 7567 | + return 1; |
---|
| 7568 | + else if (reg->s32_min_value > sval) |
---|
| 7569 | + return 0; |
---|
| 7570 | + break; |
---|
| 7571 | + } |
---|
| 7572 | + |
---|
| 7573 | + return -1; |
---|
| 7574 | +} |
---|
| 7575 | + |
---|
| 7576 | + |
---|
| 7577 | +static int is_branch64_taken(struct bpf_reg_state *reg, u64 val, u8 opcode) |
---|
| 7578 | +{ |
---|
| 7579 | + s64 sval = (s64)val; |
---|
3840 | 7580 | |
---|
3841 | 7581 | switch (opcode) { |
---|
3842 | 7582 | case BPF_JEQ: |
---|
.. | .. |
---|
3847 | 7587 | if (tnum_is_const(reg->var_off)) |
---|
3848 | 7588 | return !tnum_equals_const(reg->var_off, val); |
---|
3849 | 7589 | break; |
---|
| 7590 | + case BPF_JSET: |
---|
| 7591 | + if ((~reg->var_off.mask & reg->var_off.value) & val) |
---|
| 7592 | + return 1; |
---|
| 7593 | + if (!((reg->var_off.mask | reg->var_off.value) & val)) |
---|
| 7594 | + return 0; |
---|
| 7595 | + break; |
---|
3850 | 7596 | case BPF_JGT: |
---|
3851 | 7597 | if (reg->umin_value > val) |
---|
3852 | 7598 | return 1; |
---|
.. | .. |
---|
3854 | 7600 | return 0; |
---|
3855 | 7601 | break; |
---|
3856 | 7602 | case BPF_JSGT: |
---|
3857 | | - if (reg->smin_value > (s64)val) |
---|
| 7603 | + if (reg->smin_value > sval) |
---|
3858 | 7604 | return 1; |
---|
3859 | | - else if (reg->smax_value < (s64)val) |
---|
| 7605 | + else if (reg->smax_value <= sval) |
---|
3860 | 7606 | return 0; |
---|
3861 | 7607 | break; |
---|
3862 | 7608 | case BPF_JLT: |
---|
.. | .. |
---|
3866 | 7612 | return 0; |
---|
3867 | 7613 | break; |
---|
3868 | 7614 | case BPF_JSLT: |
---|
3869 | | - if (reg->smax_value < (s64)val) |
---|
| 7615 | + if (reg->smax_value < sval) |
---|
3870 | 7616 | return 1; |
---|
3871 | | - else if (reg->smin_value >= (s64)val) |
---|
| 7617 | + else if (reg->smin_value >= sval) |
---|
3872 | 7618 | return 0; |
---|
3873 | 7619 | break; |
---|
3874 | 7620 | case BPF_JGE: |
---|
.. | .. |
---|
3878 | 7624 | return 0; |
---|
3879 | 7625 | break; |
---|
3880 | 7626 | case BPF_JSGE: |
---|
3881 | | - if (reg->smin_value >= (s64)val) |
---|
| 7627 | + if (reg->smin_value >= sval) |
---|
3882 | 7628 | return 1; |
---|
3883 | | - else if (reg->smax_value < (s64)val) |
---|
| 7629 | + else if (reg->smax_value < sval) |
---|
3884 | 7630 | return 0; |
---|
3885 | 7631 | break; |
---|
3886 | 7632 | case BPF_JLE: |
---|
.. | .. |
---|
3890 | 7636 | return 0; |
---|
3891 | 7637 | break; |
---|
3892 | 7638 | case BPF_JSLE: |
---|
3893 | | - if (reg->smax_value <= (s64)val) |
---|
| 7639 | + if (reg->smax_value <= sval) |
---|
3894 | 7640 | return 1; |
---|
3895 | | - else if (reg->smin_value > (s64)val) |
---|
| 7641 | + else if (reg->smin_value > sval) |
---|
3896 | 7642 | return 0; |
---|
3897 | 7643 | break; |
---|
3898 | 7644 | } |
---|
3899 | 7645 | |
---|
| 7646 | + return -1; |
---|
| 7647 | +} |
---|
| 7648 | + |
---|
| 7649 | +/* compute branch direction of the expression "if (reg opcode val) goto target;" |
---|
| 7650 | + * and return: |
---|
| 7651 | + * 1 - branch will be taken and "goto target" will be executed |
---|
| 7652 | + * 0 - branch will not be taken and fall-through to next insn |
---|
| 7653 | + * -1 - unknown. Example: "if (reg < 5)" is unknown when register value |
---|
| 7654 | + * range [0,10] |
---|
| 7655 | + */ |
---|
| 7656 | +static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode, |
---|
| 7657 | + bool is_jmp32) |
---|
| 7658 | +{ |
---|
| 7659 | + if (__is_pointer_value(false, reg)) { |
---|
| 7660 | + if (!reg_type_not_null(reg->type)) |
---|
| 7661 | + return -1; |
---|
| 7662 | + |
---|
| 7663 | + /* If pointer is valid tests against zero will fail so we can |
---|
| 7664 | + * use this to direct branch taken. |
---|
| 7665 | + */ |
---|
| 7666 | + if (val != 0) |
---|
| 7667 | + return -1; |
---|
| 7668 | + |
---|
| 7669 | + switch (opcode) { |
---|
| 7670 | + case BPF_JEQ: |
---|
| 7671 | + return 0; |
---|
| 7672 | + case BPF_JNE: |
---|
| 7673 | + return 1; |
---|
| 7674 | + default: |
---|
| 7675 | + return -1; |
---|
| 7676 | + } |
---|
| 7677 | + } |
---|
| 7678 | + |
---|
| 7679 | + if (is_jmp32) |
---|
| 7680 | + return is_branch32_taken(reg, val, opcode); |
---|
| 7681 | + return is_branch64_taken(reg, val, opcode); |
---|
| 7682 | +} |
---|
| 7683 | + |
---|
| 7684 | +static int flip_opcode(u32 opcode) |
---|
| 7685 | +{ |
---|
| 7686 | + /* How can we transform "a <op> b" into "b <op> a"? */ |
---|
| 7687 | + static const u8 opcode_flip[16] = { |
---|
| 7688 | + /* these stay the same */ |
---|
| 7689 | + [BPF_JEQ >> 4] = BPF_JEQ, |
---|
| 7690 | + [BPF_JNE >> 4] = BPF_JNE, |
---|
| 7691 | + [BPF_JSET >> 4] = BPF_JSET, |
---|
| 7692 | + /* these swap "lesser" and "greater" (L and G in the opcodes) */ |
---|
| 7693 | + [BPF_JGE >> 4] = BPF_JLE, |
---|
| 7694 | + [BPF_JGT >> 4] = BPF_JLT, |
---|
| 7695 | + [BPF_JLE >> 4] = BPF_JGE, |
---|
| 7696 | + [BPF_JLT >> 4] = BPF_JGT, |
---|
| 7697 | + [BPF_JSGE >> 4] = BPF_JSLE, |
---|
| 7698 | + [BPF_JSGT >> 4] = BPF_JSLT, |
---|
| 7699 | + [BPF_JSLE >> 4] = BPF_JSGE, |
---|
| 7700 | + [BPF_JSLT >> 4] = BPF_JSGT |
---|
| 7701 | + }; |
---|
| 7702 | + return opcode_flip[opcode >> 4]; |
---|
| 7703 | +} |
---|
| 7704 | + |
---|
| 7705 | +static int is_pkt_ptr_branch_taken(struct bpf_reg_state *dst_reg, |
---|
| 7706 | + struct bpf_reg_state *src_reg, |
---|
| 7707 | + u8 opcode) |
---|
| 7708 | +{ |
---|
| 7709 | + struct bpf_reg_state *pkt; |
---|
| 7710 | + |
---|
| 7711 | + if (src_reg->type == PTR_TO_PACKET_END) { |
---|
| 7712 | + pkt = dst_reg; |
---|
| 7713 | + } else if (dst_reg->type == PTR_TO_PACKET_END) { |
---|
| 7714 | + pkt = src_reg; |
---|
| 7715 | + opcode = flip_opcode(opcode); |
---|
| 7716 | + } else { |
---|
| 7717 | + return -1; |
---|
| 7718 | + } |
---|
| 7719 | + |
---|
| 7720 | + if (pkt->range >= 0) |
---|
| 7721 | + return -1; |
---|
| 7722 | + |
---|
| 7723 | + switch (opcode) { |
---|
| 7724 | + case BPF_JLE: |
---|
| 7725 | + /* pkt <= pkt_end */ |
---|
| 7726 | + fallthrough; |
---|
| 7727 | + case BPF_JGT: |
---|
| 7728 | + /* pkt > pkt_end */ |
---|
| 7729 | + if (pkt->range == BEYOND_PKT_END) |
---|
| 7730 | + /* pkt has at last one extra byte beyond pkt_end */ |
---|
| 7731 | + return opcode == BPF_JGT; |
---|
| 7732 | + break; |
---|
| 7733 | + case BPF_JLT: |
---|
| 7734 | + /* pkt < pkt_end */ |
---|
| 7735 | + fallthrough; |
---|
| 7736 | + case BPF_JGE: |
---|
| 7737 | + /* pkt >= pkt_end */ |
---|
| 7738 | + if (pkt->range == BEYOND_PKT_END || pkt->range == AT_PKT_END) |
---|
| 7739 | + return opcode == BPF_JGE; |
---|
| 7740 | + break; |
---|
| 7741 | + } |
---|
3900 | 7742 | return -1; |
---|
3901 | 7743 | } |
---|
3902 | 7744 | |
---|
.. | .. |
---|
3906 | 7748 | * In JEQ/JNE cases we also adjust the var_off values. |
---|
3907 | 7749 | */ |
---|
3908 | 7750 | static void reg_set_min_max(struct bpf_reg_state *true_reg, |
---|
3909 | | - struct bpf_reg_state *false_reg, u64 val, |
---|
3910 | | - u8 opcode) |
---|
| 7751 | + struct bpf_reg_state *false_reg, |
---|
| 7752 | + u64 val, u32 val32, |
---|
| 7753 | + u8 opcode, bool is_jmp32) |
---|
3911 | 7754 | { |
---|
| 7755 | + struct tnum false_32off = tnum_subreg(false_reg->var_off); |
---|
| 7756 | + struct tnum false_64off = false_reg->var_off; |
---|
| 7757 | + struct tnum true_32off = tnum_subreg(true_reg->var_off); |
---|
| 7758 | + struct tnum true_64off = true_reg->var_off; |
---|
| 7759 | + s64 sval = (s64)val; |
---|
| 7760 | + s32 sval32 = (s32)val32; |
---|
| 7761 | + |
---|
3912 | 7762 | /* If the dst_reg is a pointer, we can't learn anything about its |
---|
3913 | 7763 | * variable offset from the compare (unless src_reg were a pointer into |
---|
3914 | 7764 | * the same object, but we don't bother with that. |
---|
.. | .. |
---|
3919 | 7769 | return; |
---|
3920 | 7770 | |
---|
3921 | 7771 | switch (opcode) { |
---|
| 7772 | + /* JEQ/JNE comparison doesn't change the register equivalence. |
---|
| 7773 | + * |
---|
| 7774 | + * r1 = r2; |
---|
| 7775 | + * if (r1 == 42) goto label; |
---|
| 7776 | + * ... |
---|
| 7777 | + * label: // here both r1 and r2 are known to be 42. |
---|
| 7778 | + * |
---|
| 7779 | + * Hence when marking register as known preserve it's ID. |
---|
| 7780 | + */ |
---|
3922 | 7781 | case BPF_JEQ: |
---|
3923 | | - /* If this is false then we know nothing Jon Snow, but if it is |
---|
3924 | | - * true then we know for sure. |
---|
3925 | | - */ |
---|
3926 | | - __mark_reg_known(true_reg, val); |
---|
| 7782 | + if (is_jmp32) { |
---|
| 7783 | + __mark_reg32_known(true_reg, val32); |
---|
| 7784 | + true_32off = tnum_subreg(true_reg->var_off); |
---|
| 7785 | + } else { |
---|
| 7786 | + ___mark_reg_known(true_reg, val); |
---|
| 7787 | + true_64off = true_reg->var_off; |
---|
| 7788 | + } |
---|
3927 | 7789 | break; |
---|
3928 | 7790 | case BPF_JNE: |
---|
3929 | | - /* If this is true we know nothing Jon Snow, but if it is false |
---|
3930 | | - * we know the value for sure; |
---|
3931 | | - */ |
---|
3932 | | - __mark_reg_known(false_reg, val); |
---|
| 7791 | + if (is_jmp32) { |
---|
| 7792 | + __mark_reg32_known(false_reg, val32); |
---|
| 7793 | + false_32off = tnum_subreg(false_reg->var_off); |
---|
| 7794 | + } else { |
---|
| 7795 | + ___mark_reg_known(false_reg, val); |
---|
| 7796 | + false_64off = false_reg->var_off; |
---|
| 7797 | + } |
---|
3933 | 7798 | break; |
---|
3934 | | - case BPF_JGT: |
---|
3935 | | - false_reg->umax_value = min(false_reg->umax_value, val); |
---|
3936 | | - true_reg->umin_value = max(true_reg->umin_value, val + 1); |
---|
3937 | | - break; |
---|
3938 | | - case BPF_JSGT: |
---|
3939 | | - false_reg->smax_value = min_t(s64, false_reg->smax_value, val); |
---|
3940 | | - true_reg->smin_value = max_t(s64, true_reg->smin_value, val + 1); |
---|
3941 | | - break; |
---|
3942 | | - case BPF_JLT: |
---|
3943 | | - false_reg->umin_value = max(false_reg->umin_value, val); |
---|
3944 | | - true_reg->umax_value = min(true_reg->umax_value, val - 1); |
---|
3945 | | - break; |
---|
3946 | | - case BPF_JSLT: |
---|
3947 | | - false_reg->smin_value = max_t(s64, false_reg->smin_value, val); |
---|
3948 | | - true_reg->smax_value = min_t(s64, true_reg->smax_value, val - 1); |
---|
| 7799 | + case BPF_JSET: |
---|
| 7800 | + if (is_jmp32) { |
---|
| 7801 | + false_32off = tnum_and(false_32off, tnum_const(~val32)); |
---|
| 7802 | + if (is_power_of_2(val32)) |
---|
| 7803 | + true_32off = tnum_or(true_32off, |
---|
| 7804 | + tnum_const(val32)); |
---|
| 7805 | + } else { |
---|
| 7806 | + false_64off = tnum_and(false_64off, tnum_const(~val)); |
---|
| 7807 | + if (is_power_of_2(val)) |
---|
| 7808 | + true_64off = tnum_or(true_64off, |
---|
| 7809 | + tnum_const(val)); |
---|
| 7810 | + } |
---|
3949 | 7811 | break; |
---|
3950 | 7812 | case BPF_JGE: |
---|
3951 | | - false_reg->umax_value = min(false_reg->umax_value, val - 1); |
---|
3952 | | - true_reg->umin_value = max(true_reg->umin_value, val); |
---|
3953 | | - break; |
---|
3954 | | - case BPF_JSGE: |
---|
3955 | | - false_reg->smax_value = min_t(s64, false_reg->smax_value, val - 1); |
---|
3956 | | - true_reg->smin_value = max_t(s64, true_reg->smin_value, val); |
---|
3957 | | - break; |
---|
3958 | | - case BPF_JLE: |
---|
3959 | | - false_reg->umin_value = max(false_reg->umin_value, val + 1); |
---|
3960 | | - true_reg->umax_value = min(true_reg->umax_value, val); |
---|
3961 | | - break; |
---|
3962 | | - case BPF_JSLE: |
---|
3963 | | - false_reg->smin_value = max_t(s64, false_reg->smin_value, val + 1); |
---|
3964 | | - true_reg->smax_value = min_t(s64, true_reg->smax_value, val); |
---|
3965 | | - break; |
---|
3966 | | - default: |
---|
| 7813 | + case BPF_JGT: |
---|
| 7814 | + { |
---|
| 7815 | + if (is_jmp32) { |
---|
| 7816 | + u32 false_umax = opcode == BPF_JGT ? val32 : val32 - 1; |
---|
| 7817 | + u32 true_umin = opcode == BPF_JGT ? val32 + 1 : val32; |
---|
| 7818 | + |
---|
| 7819 | + false_reg->u32_max_value = min(false_reg->u32_max_value, |
---|
| 7820 | + false_umax); |
---|
| 7821 | + true_reg->u32_min_value = max(true_reg->u32_min_value, |
---|
| 7822 | + true_umin); |
---|
| 7823 | + } else { |
---|
| 7824 | + u64 false_umax = opcode == BPF_JGT ? val : val - 1; |
---|
| 7825 | + u64 true_umin = opcode == BPF_JGT ? val + 1 : val; |
---|
| 7826 | + |
---|
| 7827 | + false_reg->umax_value = min(false_reg->umax_value, false_umax); |
---|
| 7828 | + true_reg->umin_value = max(true_reg->umin_value, true_umin); |
---|
| 7829 | + } |
---|
3967 | 7830 | break; |
---|
3968 | 7831 | } |
---|
| 7832 | + case BPF_JSGE: |
---|
| 7833 | + case BPF_JSGT: |
---|
| 7834 | + { |
---|
| 7835 | + if (is_jmp32) { |
---|
| 7836 | + s32 false_smax = opcode == BPF_JSGT ? sval32 : sval32 - 1; |
---|
| 7837 | + s32 true_smin = opcode == BPF_JSGT ? sval32 + 1 : sval32; |
---|
3969 | 7838 | |
---|
3970 | | - __reg_deduce_bounds(false_reg); |
---|
3971 | | - __reg_deduce_bounds(true_reg); |
---|
3972 | | - /* We might have learned some bits from the bounds. */ |
---|
3973 | | - __reg_bound_offset(false_reg); |
---|
3974 | | - __reg_bound_offset(true_reg); |
---|
3975 | | - /* Intersecting with the old var_off might have improved our bounds |
---|
3976 | | - * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc), |
---|
3977 | | - * then new var_off is (0; 0x7f...fc) which improves our umax. |
---|
3978 | | - */ |
---|
3979 | | - __update_reg_bounds(false_reg); |
---|
3980 | | - __update_reg_bounds(true_reg); |
---|
| 7839 | + false_reg->s32_max_value = min(false_reg->s32_max_value, false_smax); |
---|
| 7840 | + true_reg->s32_min_value = max(true_reg->s32_min_value, true_smin); |
---|
| 7841 | + } else { |
---|
| 7842 | + s64 false_smax = opcode == BPF_JSGT ? sval : sval - 1; |
---|
| 7843 | + s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval; |
---|
| 7844 | + |
---|
| 7845 | + false_reg->smax_value = min(false_reg->smax_value, false_smax); |
---|
| 7846 | + true_reg->smin_value = max(true_reg->smin_value, true_smin); |
---|
| 7847 | + } |
---|
| 7848 | + break; |
---|
| 7849 | + } |
---|
| 7850 | + case BPF_JLE: |
---|
| 7851 | + case BPF_JLT: |
---|
| 7852 | + { |
---|
| 7853 | + if (is_jmp32) { |
---|
| 7854 | + u32 false_umin = opcode == BPF_JLT ? val32 : val32 + 1; |
---|
| 7855 | + u32 true_umax = opcode == BPF_JLT ? val32 - 1 : val32; |
---|
| 7856 | + |
---|
| 7857 | + false_reg->u32_min_value = max(false_reg->u32_min_value, |
---|
| 7858 | + false_umin); |
---|
| 7859 | + true_reg->u32_max_value = min(true_reg->u32_max_value, |
---|
| 7860 | + true_umax); |
---|
| 7861 | + } else { |
---|
| 7862 | + u64 false_umin = opcode == BPF_JLT ? val : val + 1; |
---|
| 7863 | + u64 true_umax = opcode == BPF_JLT ? val - 1 : val; |
---|
| 7864 | + |
---|
| 7865 | + false_reg->umin_value = max(false_reg->umin_value, false_umin); |
---|
| 7866 | + true_reg->umax_value = min(true_reg->umax_value, true_umax); |
---|
| 7867 | + } |
---|
| 7868 | + break; |
---|
| 7869 | + } |
---|
| 7870 | + case BPF_JSLE: |
---|
| 7871 | + case BPF_JSLT: |
---|
| 7872 | + { |
---|
| 7873 | + if (is_jmp32) { |
---|
| 7874 | + s32 false_smin = opcode == BPF_JSLT ? sval32 : sval32 + 1; |
---|
| 7875 | + s32 true_smax = opcode == BPF_JSLT ? sval32 - 1 : sval32; |
---|
| 7876 | + |
---|
| 7877 | + false_reg->s32_min_value = max(false_reg->s32_min_value, false_smin); |
---|
| 7878 | + true_reg->s32_max_value = min(true_reg->s32_max_value, true_smax); |
---|
| 7879 | + } else { |
---|
| 7880 | + s64 false_smin = opcode == BPF_JSLT ? sval : sval + 1; |
---|
| 7881 | + s64 true_smax = opcode == BPF_JSLT ? sval - 1 : sval; |
---|
| 7882 | + |
---|
| 7883 | + false_reg->smin_value = max(false_reg->smin_value, false_smin); |
---|
| 7884 | + true_reg->smax_value = min(true_reg->smax_value, true_smax); |
---|
| 7885 | + } |
---|
| 7886 | + break; |
---|
| 7887 | + } |
---|
| 7888 | + default: |
---|
| 7889 | + return; |
---|
| 7890 | + } |
---|
| 7891 | + |
---|
| 7892 | + if (is_jmp32) { |
---|
| 7893 | + false_reg->var_off = tnum_or(tnum_clear_subreg(false_64off), |
---|
| 7894 | + tnum_subreg(false_32off)); |
---|
| 7895 | + true_reg->var_off = tnum_or(tnum_clear_subreg(true_64off), |
---|
| 7896 | + tnum_subreg(true_32off)); |
---|
| 7897 | + __reg_combine_32_into_64(false_reg); |
---|
| 7898 | + __reg_combine_32_into_64(true_reg); |
---|
| 7899 | + } else { |
---|
| 7900 | + false_reg->var_off = false_64off; |
---|
| 7901 | + true_reg->var_off = true_64off; |
---|
| 7902 | + __reg_combine_64_into_32(false_reg); |
---|
| 7903 | + __reg_combine_64_into_32(true_reg); |
---|
| 7904 | + } |
---|
3981 | 7905 | } |
---|
3982 | 7906 | |
---|
3983 | 7907 | /* Same as above, but for the case that dst_reg holds a constant and src_reg is |
---|
3984 | 7908 | * the variable reg. |
---|
3985 | 7909 | */ |
---|
3986 | 7910 | static void reg_set_min_max_inv(struct bpf_reg_state *true_reg, |
---|
3987 | | - struct bpf_reg_state *false_reg, u64 val, |
---|
3988 | | - u8 opcode) |
---|
| 7911 | + struct bpf_reg_state *false_reg, |
---|
| 7912 | + u64 val, u32 val32, |
---|
| 7913 | + u8 opcode, bool is_jmp32) |
---|
3989 | 7914 | { |
---|
3990 | | - if (__is_pointer_value(false, false_reg)) |
---|
3991 | | - return; |
---|
3992 | | - |
---|
3993 | | - switch (opcode) { |
---|
3994 | | - case BPF_JEQ: |
---|
3995 | | - /* If this is false then we know nothing Jon Snow, but if it is |
---|
3996 | | - * true then we know for sure. |
---|
3997 | | - */ |
---|
3998 | | - __mark_reg_known(true_reg, val); |
---|
3999 | | - break; |
---|
4000 | | - case BPF_JNE: |
---|
4001 | | - /* If this is true we know nothing Jon Snow, but if it is false |
---|
4002 | | - * we know the value for sure; |
---|
4003 | | - */ |
---|
4004 | | - __mark_reg_known(false_reg, val); |
---|
4005 | | - break; |
---|
4006 | | - case BPF_JGT: |
---|
4007 | | - true_reg->umax_value = min(true_reg->umax_value, val - 1); |
---|
4008 | | - false_reg->umin_value = max(false_reg->umin_value, val); |
---|
4009 | | - break; |
---|
4010 | | - case BPF_JSGT: |
---|
4011 | | - true_reg->smax_value = min_t(s64, true_reg->smax_value, val - 1); |
---|
4012 | | - false_reg->smin_value = max_t(s64, false_reg->smin_value, val); |
---|
4013 | | - break; |
---|
4014 | | - case BPF_JLT: |
---|
4015 | | - true_reg->umin_value = max(true_reg->umin_value, val + 1); |
---|
4016 | | - false_reg->umax_value = min(false_reg->umax_value, val); |
---|
4017 | | - break; |
---|
4018 | | - case BPF_JSLT: |
---|
4019 | | - true_reg->smin_value = max_t(s64, true_reg->smin_value, val + 1); |
---|
4020 | | - false_reg->smax_value = min_t(s64, false_reg->smax_value, val); |
---|
4021 | | - break; |
---|
4022 | | - case BPF_JGE: |
---|
4023 | | - true_reg->umax_value = min(true_reg->umax_value, val); |
---|
4024 | | - false_reg->umin_value = max(false_reg->umin_value, val + 1); |
---|
4025 | | - break; |
---|
4026 | | - case BPF_JSGE: |
---|
4027 | | - true_reg->smax_value = min_t(s64, true_reg->smax_value, val); |
---|
4028 | | - false_reg->smin_value = max_t(s64, false_reg->smin_value, val + 1); |
---|
4029 | | - break; |
---|
4030 | | - case BPF_JLE: |
---|
4031 | | - true_reg->umin_value = max(true_reg->umin_value, val); |
---|
4032 | | - false_reg->umax_value = min(false_reg->umax_value, val - 1); |
---|
4033 | | - break; |
---|
4034 | | - case BPF_JSLE: |
---|
4035 | | - true_reg->smin_value = max_t(s64, true_reg->smin_value, val); |
---|
4036 | | - false_reg->smax_value = min_t(s64, false_reg->smax_value, val - 1); |
---|
4037 | | - break; |
---|
4038 | | - default: |
---|
4039 | | - break; |
---|
4040 | | - } |
---|
4041 | | - |
---|
4042 | | - __reg_deduce_bounds(false_reg); |
---|
4043 | | - __reg_deduce_bounds(true_reg); |
---|
4044 | | - /* We might have learned some bits from the bounds. */ |
---|
4045 | | - __reg_bound_offset(false_reg); |
---|
4046 | | - __reg_bound_offset(true_reg); |
---|
4047 | | - /* Intersecting with the old var_off might have improved our bounds |
---|
4048 | | - * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc), |
---|
4049 | | - * then new var_off is (0; 0x7f...fc) which improves our umax. |
---|
| 7915 | + opcode = flip_opcode(opcode); |
---|
| 7916 | + /* This uses zero as "not present in table"; luckily the zero opcode, |
---|
| 7917 | + * BPF_JA, can't get here. |
---|
4050 | 7918 | */ |
---|
4051 | | - __update_reg_bounds(false_reg); |
---|
4052 | | - __update_reg_bounds(true_reg); |
---|
| 7919 | + if (opcode) |
---|
| 7920 | + reg_set_min_max(true_reg, false_reg, val, val32, opcode, is_jmp32); |
---|
4053 | 7921 | } |
---|
4054 | 7922 | |
---|
4055 | 7923 | /* Regs are known to be equal, so intersect their min/max/var_off */ |
---|
.. | .. |
---|
4066 | 7934 | dst_reg->smax_value); |
---|
4067 | 7935 | src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off, |
---|
4068 | 7936 | dst_reg->var_off); |
---|
4069 | | - /* We might have learned new bounds from the var_off. */ |
---|
4070 | | - __update_reg_bounds(src_reg); |
---|
4071 | | - __update_reg_bounds(dst_reg); |
---|
4072 | | - /* We might have learned something about the sign bit. */ |
---|
4073 | | - __reg_deduce_bounds(src_reg); |
---|
4074 | | - __reg_deduce_bounds(dst_reg); |
---|
4075 | | - /* We might have learned some bits from the bounds. */ |
---|
4076 | | - __reg_bound_offset(src_reg); |
---|
4077 | | - __reg_bound_offset(dst_reg); |
---|
4078 | | - /* Intersecting with the old var_off might have improved our bounds |
---|
4079 | | - * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc), |
---|
4080 | | - * then new var_off is (0; 0x7f...fc) which improves our umax. |
---|
4081 | | - */ |
---|
4082 | | - __update_reg_bounds(src_reg); |
---|
4083 | | - __update_reg_bounds(dst_reg); |
---|
| 7937 | + reg_bounds_sync(src_reg); |
---|
| 7938 | + reg_bounds_sync(dst_reg); |
---|
4084 | 7939 | } |
---|
4085 | 7940 | |
---|
4086 | 7941 | static void reg_combine_min_max(struct bpf_reg_state *true_src, |
---|
.. | .. |
---|
4099 | 7954 | } |
---|
4100 | 7955 | } |
---|
4101 | 7956 | |
---|
4102 | | -static void mark_map_reg(struct bpf_reg_state *regs, u32 regno, u32 id, |
---|
4103 | | - bool is_null) |
---|
| 7957 | +static void mark_ptr_or_null_reg(struct bpf_func_state *state, |
---|
| 7958 | + struct bpf_reg_state *reg, u32 id, |
---|
| 7959 | + bool is_null) |
---|
4104 | 7960 | { |
---|
4105 | | - struct bpf_reg_state *reg = ®s[regno]; |
---|
4106 | | - |
---|
4107 | | - if (reg->type == PTR_TO_MAP_VALUE_OR_NULL && reg->id == id) { |
---|
4108 | | - /* Old offset (both fixed and variable parts) should |
---|
4109 | | - * have been known-zero, because we don't allow pointer |
---|
4110 | | - * arithmetic on pointers that might be NULL. |
---|
4111 | | - */ |
---|
| 7961 | + if (reg_type_may_be_null(reg->type) && reg->id == id && |
---|
| 7962 | + !WARN_ON_ONCE(!reg->id)) { |
---|
4112 | 7963 | if (WARN_ON_ONCE(reg->smin_value || reg->smax_value || |
---|
4113 | 7964 | !tnum_equals_const(reg->var_off, 0) || |
---|
4114 | 7965 | reg->off)) { |
---|
4115 | | - __mark_reg_known_zero(reg); |
---|
4116 | | - reg->off = 0; |
---|
| 7966 | + /* Old offset (both fixed and variable parts) should |
---|
| 7967 | + * have been known-zero, because we don't allow pointer |
---|
| 7968 | + * arithmetic on pointers that might be NULL. If we |
---|
| 7969 | + * see this happening, don't convert the register. |
---|
| 7970 | + */ |
---|
| 7971 | + return; |
---|
4117 | 7972 | } |
---|
4118 | 7973 | if (is_null) { |
---|
4119 | 7974 | reg->type = SCALAR_VALUE; |
---|
4120 | | - } else if (reg->map_ptr->inner_map_meta) { |
---|
4121 | | - reg->type = CONST_PTR_TO_MAP; |
---|
4122 | | - reg->map_ptr = reg->map_ptr->inner_map_meta; |
---|
4123 | | - } else { |
---|
4124 | | - reg->type = PTR_TO_MAP_VALUE; |
---|
| 7975 | + } else if (reg->type == PTR_TO_MAP_VALUE_OR_NULL) { |
---|
| 7976 | + const struct bpf_map *map = reg->map_ptr; |
---|
| 7977 | + |
---|
| 7978 | + if (map->inner_map_meta) { |
---|
| 7979 | + reg->type = CONST_PTR_TO_MAP; |
---|
| 7980 | + reg->map_ptr = map->inner_map_meta; |
---|
| 7981 | + } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) { |
---|
| 7982 | + reg->type = PTR_TO_XDP_SOCK; |
---|
| 7983 | + } else if (map->map_type == BPF_MAP_TYPE_SOCKMAP || |
---|
| 7984 | + map->map_type == BPF_MAP_TYPE_SOCKHASH) { |
---|
| 7985 | + reg->type = PTR_TO_SOCKET; |
---|
| 7986 | + } else { |
---|
| 7987 | + reg->type = PTR_TO_MAP_VALUE; |
---|
| 7988 | + } |
---|
| 7989 | + } else if (reg->type == PTR_TO_SOCKET_OR_NULL) { |
---|
| 7990 | + reg->type = PTR_TO_SOCKET; |
---|
| 7991 | + } else if (reg->type == PTR_TO_SOCK_COMMON_OR_NULL) { |
---|
| 7992 | + reg->type = PTR_TO_SOCK_COMMON; |
---|
| 7993 | + } else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) { |
---|
| 7994 | + reg->type = PTR_TO_TCP_SOCK; |
---|
| 7995 | + } else if (reg->type == PTR_TO_BTF_ID_OR_NULL) { |
---|
| 7996 | + reg->type = PTR_TO_BTF_ID; |
---|
| 7997 | + } else if (reg->type == PTR_TO_MEM_OR_NULL) { |
---|
| 7998 | + reg->type = PTR_TO_MEM; |
---|
| 7999 | + } else if (reg->type == PTR_TO_RDONLY_BUF_OR_NULL) { |
---|
| 8000 | + reg->type = PTR_TO_RDONLY_BUF; |
---|
| 8001 | + } else if (reg->type == PTR_TO_RDWR_BUF_OR_NULL) { |
---|
| 8002 | + reg->type = PTR_TO_RDWR_BUF; |
---|
4125 | 8003 | } |
---|
4126 | | - /* We don't need id from this point onwards anymore, thus we |
---|
4127 | | - * should better reset it, so that state pruning has chances |
---|
4128 | | - * to take effect. |
---|
4129 | | - */ |
---|
4130 | | - reg->id = 0; |
---|
| 8004 | + if (is_null) { |
---|
| 8005 | + /* We don't need id and ref_obj_id from this point |
---|
| 8006 | + * onwards anymore, thus we should better reset it, |
---|
| 8007 | + * so that state pruning has chances to take effect. |
---|
| 8008 | + */ |
---|
| 8009 | + reg->id = 0; |
---|
| 8010 | + reg->ref_obj_id = 0; |
---|
| 8011 | + } else if (!reg_may_point_to_spin_lock(reg)) { |
---|
| 8012 | + /* For not-NULL ptr, reg->ref_obj_id will be reset |
---|
| 8013 | + * in release_reference(). |
---|
| 8014 | + * |
---|
| 8015 | + * reg->id is still used by spin_lock ptr. Other |
---|
| 8016 | + * than spin_lock ptr type, reg->id can be reset. |
---|
| 8017 | + */ |
---|
| 8018 | + reg->id = 0; |
---|
| 8019 | + } |
---|
4131 | 8020 | } |
---|
4132 | 8021 | } |
---|
4133 | 8022 | |
---|
4134 | 8023 | /* The logic is similar to find_good_pkt_pointers(), both could eventually |
---|
4135 | 8024 | * be folded together at some point. |
---|
4136 | 8025 | */ |
---|
4137 | | -static void mark_map_regs(struct bpf_verifier_state *vstate, u32 regno, |
---|
4138 | | - bool is_null) |
---|
| 8026 | +static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno, |
---|
| 8027 | + bool is_null) |
---|
4139 | 8028 | { |
---|
4140 | 8029 | struct bpf_func_state *state = vstate->frame[vstate->curframe]; |
---|
4141 | | - struct bpf_reg_state *regs = state->regs; |
---|
| 8030 | + struct bpf_reg_state *regs = state->regs, *reg; |
---|
| 8031 | + u32 ref_obj_id = regs[regno].ref_obj_id; |
---|
4142 | 8032 | u32 id = regs[regno].id; |
---|
4143 | | - int i, j; |
---|
4144 | 8033 | |
---|
4145 | | - for (i = 0; i < MAX_BPF_REG; i++) |
---|
4146 | | - mark_map_reg(regs, i, id, is_null); |
---|
| 8034 | + if (ref_obj_id && ref_obj_id == id && is_null) |
---|
| 8035 | + /* regs[regno] is in the " == NULL" branch. |
---|
| 8036 | + * No one could have freed the reference state before |
---|
| 8037 | + * doing the NULL check. |
---|
| 8038 | + */ |
---|
| 8039 | + WARN_ON_ONCE(release_reference_state(state, id)); |
---|
4147 | 8040 | |
---|
4148 | | - for (j = 0; j <= vstate->curframe; j++) { |
---|
4149 | | - state = vstate->frame[j]; |
---|
4150 | | - for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { |
---|
4151 | | - if (state->stack[i].slot_type[0] != STACK_SPILL) |
---|
4152 | | - continue; |
---|
4153 | | - mark_map_reg(&state->stack[i].spilled_ptr, 0, id, is_null); |
---|
4154 | | - } |
---|
4155 | | - } |
---|
| 8041 | + bpf_for_each_reg_in_vstate(vstate, state, reg, ({ |
---|
| 8042 | + mark_ptr_or_null_reg(state, reg, id, is_null); |
---|
| 8043 | + })); |
---|
4156 | 8044 | } |
---|
4157 | 8045 | |
---|
4158 | 8046 | static bool try_match_pkt_pointers(const struct bpf_insn *insn, |
---|
.. | .. |
---|
4164 | 8052 | if (BPF_SRC(insn->code) != BPF_X) |
---|
4165 | 8053 | return false; |
---|
4166 | 8054 | |
---|
| 8055 | + /* Pointers are always 64-bit. */ |
---|
| 8056 | + if (BPF_CLASS(insn->code) == BPF_JMP32) |
---|
| 8057 | + return false; |
---|
| 8058 | + |
---|
4167 | 8059 | switch (BPF_OP(insn->code)) { |
---|
4168 | 8060 | case BPF_JGT: |
---|
4169 | 8061 | if ((dst_reg->type == PTR_TO_PACKET && |
---|
.. | .. |
---|
4173 | 8065 | /* pkt_data' > pkt_end, pkt_meta' > pkt_data */ |
---|
4174 | 8066 | find_good_pkt_pointers(this_branch, dst_reg, |
---|
4175 | 8067 | dst_reg->type, false); |
---|
| 8068 | + mark_pkt_end(other_branch, insn->dst_reg, true); |
---|
4176 | 8069 | } else if ((dst_reg->type == PTR_TO_PACKET_END && |
---|
4177 | 8070 | src_reg->type == PTR_TO_PACKET) || |
---|
4178 | 8071 | (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && |
---|
.. | .. |
---|
4180 | 8073 | /* pkt_end > pkt_data', pkt_data > pkt_meta' */ |
---|
4181 | 8074 | find_good_pkt_pointers(other_branch, src_reg, |
---|
4182 | 8075 | src_reg->type, true); |
---|
| 8076 | + mark_pkt_end(this_branch, insn->src_reg, false); |
---|
4183 | 8077 | } else { |
---|
4184 | 8078 | return false; |
---|
4185 | 8079 | } |
---|
.. | .. |
---|
4192 | 8086 | /* pkt_data' < pkt_end, pkt_meta' < pkt_data */ |
---|
4193 | 8087 | find_good_pkt_pointers(other_branch, dst_reg, |
---|
4194 | 8088 | dst_reg->type, true); |
---|
| 8089 | + mark_pkt_end(this_branch, insn->dst_reg, false); |
---|
4195 | 8090 | } else if ((dst_reg->type == PTR_TO_PACKET_END && |
---|
4196 | 8091 | src_reg->type == PTR_TO_PACKET) || |
---|
4197 | 8092 | (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && |
---|
.. | .. |
---|
4199 | 8094 | /* pkt_end < pkt_data', pkt_data > pkt_meta' */ |
---|
4200 | 8095 | find_good_pkt_pointers(this_branch, src_reg, |
---|
4201 | 8096 | src_reg->type, false); |
---|
| 8097 | + mark_pkt_end(other_branch, insn->src_reg, true); |
---|
4202 | 8098 | } else { |
---|
4203 | 8099 | return false; |
---|
4204 | 8100 | } |
---|
.. | .. |
---|
4211 | 8107 | /* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */ |
---|
4212 | 8108 | find_good_pkt_pointers(this_branch, dst_reg, |
---|
4213 | 8109 | dst_reg->type, true); |
---|
| 8110 | + mark_pkt_end(other_branch, insn->dst_reg, false); |
---|
4214 | 8111 | } else if ((dst_reg->type == PTR_TO_PACKET_END && |
---|
4215 | 8112 | src_reg->type == PTR_TO_PACKET) || |
---|
4216 | 8113 | (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && |
---|
.. | .. |
---|
4218 | 8115 | /* pkt_end >= pkt_data', pkt_data >= pkt_meta' */ |
---|
4219 | 8116 | find_good_pkt_pointers(other_branch, src_reg, |
---|
4220 | 8117 | src_reg->type, false); |
---|
| 8118 | + mark_pkt_end(this_branch, insn->src_reg, true); |
---|
4221 | 8119 | } else { |
---|
4222 | 8120 | return false; |
---|
4223 | 8121 | } |
---|
.. | .. |
---|
4230 | 8128 | /* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */ |
---|
4231 | 8129 | find_good_pkt_pointers(other_branch, dst_reg, |
---|
4232 | 8130 | dst_reg->type, false); |
---|
| 8131 | + mark_pkt_end(this_branch, insn->dst_reg, true); |
---|
4233 | 8132 | } else if ((dst_reg->type == PTR_TO_PACKET_END && |
---|
4234 | 8133 | src_reg->type == PTR_TO_PACKET) || |
---|
4235 | 8134 | (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && |
---|
.. | .. |
---|
4237 | 8136 | /* pkt_end <= pkt_data', pkt_data <= pkt_meta' */ |
---|
4238 | 8137 | find_good_pkt_pointers(this_branch, src_reg, |
---|
4239 | 8138 | src_reg->type, true); |
---|
| 8139 | + mark_pkt_end(other_branch, insn->src_reg, false); |
---|
4240 | 8140 | } else { |
---|
4241 | 8141 | return false; |
---|
4242 | 8142 | } |
---|
.. | .. |
---|
4248 | 8148 | return true; |
---|
4249 | 8149 | } |
---|
4250 | 8150 | |
---|
| 8151 | +static void find_equal_scalars(struct bpf_verifier_state *vstate, |
---|
| 8152 | + struct bpf_reg_state *known_reg) |
---|
| 8153 | +{ |
---|
| 8154 | + struct bpf_func_state *state; |
---|
| 8155 | + struct bpf_reg_state *reg; |
---|
| 8156 | + |
---|
| 8157 | + bpf_for_each_reg_in_vstate(vstate, state, reg, ({ |
---|
| 8158 | + if (reg->type == SCALAR_VALUE && reg->id == known_reg->id) |
---|
| 8159 | + copy_register_state(reg, known_reg); |
---|
| 8160 | + })); |
---|
| 8161 | +} |
---|
| 8162 | + |
---|
4251 | 8163 | static int check_cond_jmp_op(struct bpf_verifier_env *env, |
---|
4252 | 8164 | struct bpf_insn *insn, int *insn_idx) |
---|
4253 | 8165 | { |
---|
.. | .. |
---|
4256 | 8168 | struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs; |
---|
4257 | 8169 | struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL; |
---|
4258 | 8170 | u8 opcode = BPF_OP(insn->code); |
---|
| 8171 | + bool is_jmp32; |
---|
4259 | 8172 | int pred = -1; |
---|
4260 | 8173 | int err; |
---|
4261 | 8174 | |
---|
4262 | | - if (opcode > BPF_JSLE) { |
---|
4263 | | - verbose(env, "invalid BPF_JMP opcode %x\n", opcode); |
---|
| 8175 | + /* Only conditional jumps are expected to reach here. */ |
---|
| 8176 | + if (opcode == BPF_JA || opcode > BPF_JSLE) { |
---|
| 8177 | + verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode); |
---|
4264 | 8178 | return -EINVAL; |
---|
4265 | 8179 | } |
---|
4266 | 8180 | |
---|
4267 | 8181 | if (BPF_SRC(insn->code) == BPF_X) { |
---|
4268 | 8182 | if (insn->imm != 0) { |
---|
4269 | | - verbose(env, "BPF_JMP uses reserved fields\n"); |
---|
| 8183 | + verbose(env, "BPF_JMP/JMP32 uses reserved fields\n"); |
---|
4270 | 8184 | return -EINVAL; |
---|
4271 | 8185 | } |
---|
4272 | 8186 | |
---|
.. | .. |
---|
4283 | 8197 | src_reg = ®s[insn->src_reg]; |
---|
4284 | 8198 | } else { |
---|
4285 | 8199 | if (insn->src_reg != BPF_REG_0) { |
---|
4286 | | - verbose(env, "BPF_JMP uses reserved fields\n"); |
---|
| 8200 | + verbose(env, "BPF_JMP/JMP32 uses reserved fields\n"); |
---|
4287 | 8201 | return -EINVAL; |
---|
4288 | 8202 | } |
---|
4289 | 8203 | } |
---|
.. | .. |
---|
4294 | 8208 | return err; |
---|
4295 | 8209 | |
---|
4296 | 8210 | dst_reg = ®s[insn->dst_reg]; |
---|
| 8211 | + is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32; |
---|
4297 | 8212 | |
---|
4298 | | - if (BPF_SRC(insn->code) == BPF_K) |
---|
4299 | | - pred = is_branch_taken(dst_reg, insn->imm, opcode); |
---|
4300 | | - else if (src_reg->type == SCALAR_VALUE && |
---|
4301 | | - tnum_is_const(src_reg->var_off)) |
---|
4302 | | - pred = is_branch_taken(dst_reg, src_reg->var_off.value, |
---|
4303 | | - opcode); |
---|
| 8213 | + if (BPF_SRC(insn->code) == BPF_K) { |
---|
| 8214 | + pred = is_branch_taken(dst_reg, insn->imm, opcode, is_jmp32); |
---|
| 8215 | + } else if (src_reg->type == SCALAR_VALUE && |
---|
| 8216 | + is_jmp32 && tnum_is_const(tnum_subreg(src_reg->var_off))) { |
---|
| 8217 | + pred = is_branch_taken(dst_reg, |
---|
| 8218 | + tnum_subreg(src_reg->var_off).value, |
---|
| 8219 | + opcode, |
---|
| 8220 | + is_jmp32); |
---|
| 8221 | + } else if (src_reg->type == SCALAR_VALUE && |
---|
| 8222 | + !is_jmp32 && tnum_is_const(src_reg->var_off)) { |
---|
| 8223 | + pred = is_branch_taken(dst_reg, |
---|
| 8224 | + src_reg->var_off.value, |
---|
| 8225 | + opcode, |
---|
| 8226 | + is_jmp32); |
---|
| 8227 | + } else if (reg_is_pkt_pointer_any(dst_reg) && |
---|
| 8228 | + reg_is_pkt_pointer_any(src_reg) && |
---|
| 8229 | + !is_jmp32) { |
---|
| 8230 | + pred = is_pkt_ptr_branch_taken(dst_reg, src_reg, opcode); |
---|
| 8231 | + } |
---|
| 8232 | + |
---|
| 8233 | + if (pred >= 0) { |
---|
| 8234 | + /* If we get here with a dst_reg pointer type it is because |
---|
| 8235 | + * above is_branch_taken() special cased the 0 comparison. |
---|
| 8236 | + */ |
---|
| 8237 | + if (!__is_pointer_value(false, dst_reg)) |
---|
| 8238 | + err = mark_chain_precision(env, insn->dst_reg); |
---|
| 8239 | + if (BPF_SRC(insn->code) == BPF_X && !err && |
---|
| 8240 | + !__is_pointer_value(false, src_reg)) |
---|
| 8241 | + err = mark_chain_precision(env, insn->src_reg); |
---|
| 8242 | + if (err) |
---|
| 8243 | + return err; |
---|
| 8244 | + } |
---|
4304 | 8245 | |
---|
4305 | 8246 | if (pred == 1) { |
---|
4306 | 8247 | /* Only follow the goto, ignore fall-through. If needed, push |
---|
4307 | 8248 | * the fall-through branch for simulation under speculative |
---|
4308 | 8249 | * execution. |
---|
4309 | 8250 | */ |
---|
4310 | | - if (!env->allow_ptr_leaks && |
---|
| 8251 | + if (!env->bypass_spec_v1 && |
---|
4311 | 8252 | !sanitize_speculative_path(env, insn, *insn_idx + 1, |
---|
4312 | 8253 | *insn_idx)) |
---|
4313 | 8254 | return -EFAULT; |
---|
.. | .. |
---|
4318 | 8259 | * program will go. If needed, push the goto branch for |
---|
4319 | 8260 | * simulation under speculative execution. |
---|
4320 | 8261 | */ |
---|
4321 | | - if (!env->allow_ptr_leaks && |
---|
| 8262 | + if (!env->bypass_spec_v1 && |
---|
4322 | 8263 | !sanitize_speculative_path(env, insn, |
---|
4323 | 8264 | *insn_idx + insn->off + 1, |
---|
4324 | 8265 | *insn_idx)) |
---|
.. | .. |
---|
4340 | 8281 | * comparable. |
---|
4341 | 8282 | */ |
---|
4342 | 8283 | if (BPF_SRC(insn->code) == BPF_X) { |
---|
| 8284 | + struct bpf_reg_state *src_reg = ®s[insn->src_reg]; |
---|
| 8285 | + |
---|
4343 | 8286 | if (dst_reg->type == SCALAR_VALUE && |
---|
4344 | | - regs[insn->src_reg].type == SCALAR_VALUE) { |
---|
4345 | | - if (tnum_is_const(regs[insn->src_reg].var_off)) |
---|
| 8287 | + src_reg->type == SCALAR_VALUE) { |
---|
| 8288 | + if (tnum_is_const(src_reg->var_off) || |
---|
| 8289 | + (is_jmp32 && |
---|
| 8290 | + tnum_is_const(tnum_subreg(src_reg->var_off)))) |
---|
4346 | 8291 | reg_set_min_max(&other_branch_regs[insn->dst_reg], |
---|
4347 | | - dst_reg, regs[insn->src_reg].var_off.value, |
---|
4348 | | - opcode); |
---|
4349 | | - else if (tnum_is_const(dst_reg->var_off)) |
---|
| 8292 | + dst_reg, |
---|
| 8293 | + src_reg->var_off.value, |
---|
| 8294 | + tnum_subreg(src_reg->var_off).value, |
---|
| 8295 | + opcode, is_jmp32); |
---|
| 8296 | + else if (tnum_is_const(dst_reg->var_off) || |
---|
| 8297 | + (is_jmp32 && |
---|
| 8298 | + tnum_is_const(tnum_subreg(dst_reg->var_off)))) |
---|
4350 | 8299 | reg_set_min_max_inv(&other_branch_regs[insn->src_reg], |
---|
4351 | | - ®s[insn->src_reg], |
---|
4352 | | - dst_reg->var_off.value, opcode); |
---|
4353 | | - else if (opcode == BPF_JEQ || opcode == BPF_JNE) |
---|
| 8300 | + src_reg, |
---|
| 8301 | + dst_reg->var_off.value, |
---|
| 8302 | + tnum_subreg(dst_reg->var_off).value, |
---|
| 8303 | + opcode, is_jmp32); |
---|
| 8304 | + else if (!is_jmp32 && |
---|
| 8305 | + (opcode == BPF_JEQ || opcode == BPF_JNE)) |
---|
4354 | 8306 | /* Comparing for equality, we can combine knowledge */ |
---|
4355 | 8307 | reg_combine_min_max(&other_branch_regs[insn->src_reg], |
---|
4356 | 8308 | &other_branch_regs[insn->dst_reg], |
---|
4357 | | - ®s[insn->src_reg], |
---|
4358 | | - ®s[insn->dst_reg], opcode); |
---|
| 8309 | + src_reg, dst_reg, opcode); |
---|
| 8310 | + if (src_reg->id && |
---|
| 8311 | + !WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) { |
---|
| 8312 | + find_equal_scalars(this_branch, src_reg); |
---|
| 8313 | + find_equal_scalars(other_branch, &other_branch_regs[insn->src_reg]); |
---|
| 8314 | + } |
---|
| 8315 | + |
---|
4359 | 8316 | } |
---|
4360 | 8317 | } else if (dst_reg->type == SCALAR_VALUE) { |
---|
4361 | 8318 | reg_set_min_max(&other_branch_regs[insn->dst_reg], |
---|
4362 | | - dst_reg, insn->imm, opcode); |
---|
| 8319 | + dst_reg, insn->imm, (u32)insn->imm, |
---|
| 8320 | + opcode, is_jmp32); |
---|
4363 | 8321 | } |
---|
4364 | 8322 | |
---|
4365 | | - /* detect if R == 0 where R is returned from bpf_map_lookup_elem() */ |
---|
4366 | | - if (BPF_SRC(insn->code) == BPF_K && |
---|
| 8323 | + if (dst_reg->type == SCALAR_VALUE && dst_reg->id && |
---|
| 8324 | + !WARN_ON_ONCE(dst_reg->id != other_branch_regs[insn->dst_reg].id)) { |
---|
| 8325 | + find_equal_scalars(this_branch, dst_reg); |
---|
| 8326 | + find_equal_scalars(other_branch, &other_branch_regs[insn->dst_reg]); |
---|
| 8327 | + } |
---|
| 8328 | + |
---|
| 8329 | + /* detect if R == 0 where R is returned from bpf_map_lookup_elem(). |
---|
| 8330 | + * NOTE: these optimizations below are related with pointer comparison |
---|
| 8331 | + * which will never be JMP32. |
---|
| 8332 | + */ |
---|
| 8333 | + if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K && |
---|
4367 | 8334 | insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) && |
---|
4368 | | - dst_reg->type == PTR_TO_MAP_VALUE_OR_NULL) { |
---|
4369 | | - /* Mark all identical map registers in each branch as either |
---|
| 8335 | + reg_type_may_be_null(dst_reg->type)) { |
---|
| 8336 | + /* Mark all identical registers in each branch as either |
---|
4370 | 8337 | * safe or unknown depending R == 0 or R != 0 conditional. |
---|
4371 | 8338 | */ |
---|
4372 | | - mark_map_regs(this_branch, insn->dst_reg, opcode == BPF_JNE); |
---|
4373 | | - mark_map_regs(other_branch, insn->dst_reg, opcode == BPF_JEQ); |
---|
| 8339 | + mark_ptr_or_null_regs(this_branch, insn->dst_reg, |
---|
| 8340 | + opcode == BPF_JNE); |
---|
| 8341 | + mark_ptr_or_null_regs(other_branch, insn->dst_reg, |
---|
| 8342 | + opcode == BPF_JEQ); |
---|
4374 | 8343 | } else if (!try_match_pkt_pointers(insn, dst_reg, ®s[insn->src_reg], |
---|
4375 | 8344 | this_branch, other_branch) && |
---|
4376 | 8345 | is_pointer_value(env, insn->dst_reg)) { |
---|
.. | .. |
---|
4378 | 8347 | insn->dst_reg); |
---|
4379 | 8348 | return -EACCES; |
---|
4380 | 8349 | } |
---|
4381 | | - if (env->log.level) |
---|
| 8350 | + if (env->log.level & BPF_LOG_LEVEL) |
---|
4382 | 8351 | print_verifier_state(env, this_branch->frame[this_branch->curframe]); |
---|
4383 | 8352 | return 0; |
---|
4384 | | -} |
---|
4385 | | - |
---|
4386 | | -/* return the map pointer stored inside BPF_LD_IMM64 instruction */ |
---|
4387 | | -static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn) |
---|
4388 | | -{ |
---|
4389 | | - u64 imm64 = ((u64) (u32) insn[0].imm) | ((u64) (u32) insn[1].imm) << 32; |
---|
4390 | | - |
---|
4391 | | - return (struct bpf_map *) (unsigned long) imm64; |
---|
4392 | 8353 | } |
---|
4393 | 8354 | |
---|
4394 | 8355 | /* verify BPF_LD_IMM64 instruction */ |
---|
4395 | 8356 | static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) |
---|
4396 | 8357 | { |
---|
| 8358 | + struct bpf_insn_aux_data *aux = cur_aux(env); |
---|
4397 | 8359 | struct bpf_reg_state *regs = cur_regs(env); |
---|
| 8360 | + struct bpf_reg_state *dst_reg; |
---|
| 8361 | + struct bpf_map *map; |
---|
4398 | 8362 | int err; |
---|
4399 | 8363 | |
---|
4400 | 8364 | if (BPF_SIZE(insn->code) != BPF_DW) { |
---|
.. | .. |
---|
4410 | 8374 | if (err) |
---|
4411 | 8375 | return err; |
---|
4412 | 8376 | |
---|
| 8377 | + dst_reg = ®s[insn->dst_reg]; |
---|
4413 | 8378 | if (insn->src_reg == 0) { |
---|
4414 | 8379 | u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm; |
---|
4415 | 8380 | |
---|
4416 | | - regs[insn->dst_reg].type = SCALAR_VALUE; |
---|
| 8381 | + dst_reg->type = SCALAR_VALUE; |
---|
4417 | 8382 | __mark_reg_known(®s[insn->dst_reg], imm); |
---|
4418 | 8383 | return 0; |
---|
4419 | 8384 | } |
---|
4420 | 8385 | |
---|
4421 | | - /* replace_map_fd_with_map_ptr() should have caught bad ld_imm64 */ |
---|
4422 | | - BUG_ON(insn->src_reg != BPF_PSEUDO_MAP_FD); |
---|
| 8386 | + if (insn->src_reg == BPF_PSEUDO_BTF_ID) { |
---|
| 8387 | + mark_reg_known_zero(env, regs, insn->dst_reg); |
---|
4423 | 8388 | |
---|
4424 | | - regs[insn->dst_reg].type = CONST_PTR_TO_MAP; |
---|
4425 | | - regs[insn->dst_reg].map_ptr = ld_imm64_to_map_ptr(insn); |
---|
| 8389 | + dst_reg->type = aux->btf_var.reg_type; |
---|
| 8390 | + switch (dst_reg->type) { |
---|
| 8391 | + case PTR_TO_MEM: |
---|
| 8392 | + dst_reg->mem_size = aux->btf_var.mem_size; |
---|
| 8393 | + break; |
---|
| 8394 | + case PTR_TO_BTF_ID: |
---|
| 8395 | + case PTR_TO_PERCPU_BTF_ID: |
---|
| 8396 | + dst_reg->btf_id = aux->btf_var.btf_id; |
---|
| 8397 | + break; |
---|
| 8398 | + default: |
---|
| 8399 | + verbose(env, "bpf verifier is misconfigured\n"); |
---|
| 8400 | + return -EFAULT; |
---|
| 8401 | + } |
---|
| 8402 | + return 0; |
---|
| 8403 | + } |
---|
| 8404 | + |
---|
| 8405 | + map = env->used_maps[aux->map_index]; |
---|
| 8406 | + mark_reg_known_zero(env, regs, insn->dst_reg); |
---|
| 8407 | + dst_reg->map_ptr = map; |
---|
| 8408 | + |
---|
| 8409 | + if (insn->src_reg == BPF_PSEUDO_MAP_VALUE) { |
---|
| 8410 | + dst_reg->type = PTR_TO_MAP_VALUE; |
---|
| 8411 | + dst_reg->off = aux->map_off; |
---|
| 8412 | + if (map_value_has_spin_lock(map)) |
---|
| 8413 | + dst_reg->id = ++env->id_gen; |
---|
| 8414 | + } else if (insn->src_reg == BPF_PSEUDO_MAP_FD) { |
---|
| 8415 | + dst_reg->type = CONST_PTR_TO_MAP; |
---|
| 8416 | + } else { |
---|
| 8417 | + verbose(env, "bpf verifier is misconfigured\n"); |
---|
| 8418 | + return -EINVAL; |
---|
| 8419 | + } |
---|
| 8420 | + |
---|
4426 | 8421 | return 0; |
---|
4427 | 8422 | } |
---|
4428 | 8423 | |
---|
.. | .. |
---|
4460 | 8455 | u8 mode = BPF_MODE(insn->code); |
---|
4461 | 8456 | int i, err; |
---|
4462 | 8457 | |
---|
4463 | | - if (!may_access_skb(env->prog->type)) { |
---|
| 8458 | + if (!may_access_skb(resolve_prog_type(env->prog))) { |
---|
4464 | 8459 | verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n"); |
---|
4465 | 8460 | return -EINVAL; |
---|
4466 | 8461 | } |
---|
4467 | 8462 | |
---|
4468 | 8463 | if (!env->ops->gen_ld_abs) { |
---|
4469 | 8464 | verbose(env, "bpf verifier is misconfigured\n"); |
---|
4470 | | - return -EINVAL; |
---|
4471 | | - } |
---|
4472 | | - |
---|
4473 | | - if (env->subprog_cnt > 1) { |
---|
4474 | | - /* when program has LD_ABS insn JITs and interpreter assume |
---|
4475 | | - * that r1 == ctx == skb which is not the case for callees |
---|
4476 | | - * that can have arbitrary arguments. It's problematic |
---|
4477 | | - * for main prog as well since JITs would need to analyze |
---|
4478 | | - * all functions in order to make proper register save/restore |
---|
4479 | | - * decisions in the main prog. Hence disallow LD_ABS with calls |
---|
4480 | | - */ |
---|
4481 | | - verbose(env, "BPF_LD_[ABS|IND] instructions cannot be mixed with bpf-to-bpf calls\n"); |
---|
4482 | 8465 | return -EINVAL; |
---|
4483 | 8466 | } |
---|
4484 | 8467 | |
---|
.. | .. |
---|
4493 | 8476 | err = check_reg_arg(env, ctx_reg, SRC_OP); |
---|
4494 | 8477 | if (err) |
---|
4495 | 8478 | return err; |
---|
| 8479 | + |
---|
| 8480 | + /* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as |
---|
| 8481 | + * gen_ld_abs() may terminate the program at runtime, leading to |
---|
| 8482 | + * reference leak. |
---|
| 8483 | + */ |
---|
| 8484 | + err = check_reference_leak(env); |
---|
| 8485 | + if (err) { |
---|
| 8486 | + verbose(env, "BPF_LD_[ABS|IND] cannot be mixed with socket references\n"); |
---|
| 8487 | + return err; |
---|
| 8488 | + } |
---|
| 8489 | + |
---|
| 8490 | + if (env->cur_state->active_spin_lock) { |
---|
| 8491 | + verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_spin_lock-ed region\n"); |
---|
| 8492 | + return -EINVAL; |
---|
| 8493 | + } |
---|
4496 | 8494 | |
---|
4497 | 8495 | if (regs[ctx_reg].type != PTR_TO_CTX) { |
---|
4498 | 8496 | verbose(env, |
---|
.. | .. |
---|
4522 | 8520 | * Already marked as written above. |
---|
4523 | 8521 | */ |
---|
4524 | 8522 | mark_reg_unknown(env, regs, BPF_REG_0); |
---|
| 8523 | + /* ld_abs load up to 32-bit skb data. */ |
---|
| 8524 | + regs[BPF_REG_0].subreg_def = env->insn_idx + 1; |
---|
4525 | 8525 | return 0; |
---|
4526 | 8526 | } |
---|
4527 | 8527 | |
---|
4528 | 8528 | static int check_return_code(struct bpf_verifier_env *env) |
---|
4529 | 8529 | { |
---|
| 8530 | + struct tnum enforce_attach_type_range = tnum_unknown; |
---|
| 8531 | + const struct bpf_prog *prog = env->prog; |
---|
4530 | 8532 | struct bpf_reg_state *reg; |
---|
4531 | 8533 | struct tnum range = tnum_range(0, 1); |
---|
| 8534 | + enum bpf_prog_type prog_type = resolve_prog_type(env->prog); |
---|
| 8535 | + int err; |
---|
| 8536 | + const bool is_subprog = env->cur_state->frame[0]->subprogno; |
---|
4532 | 8537 | |
---|
4533 | | - switch (env->prog->type) { |
---|
| 8538 | + /* LSM and struct_ops func-ptr's return type could be "void" */ |
---|
| 8539 | + if (!is_subprog && |
---|
| 8540 | + (prog_type == BPF_PROG_TYPE_STRUCT_OPS || |
---|
| 8541 | + prog_type == BPF_PROG_TYPE_LSM) && |
---|
| 8542 | + !prog->aux->attach_func_proto->type) |
---|
| 8543 | + return 0; |
---|
| 8544 | + |
---|
| 8545 | + /* eBPF calling convetion is such that R0 is used |
---|
| 8546 | + * to return the value from eBPF program. |
---|
| 8547 | + * Make sure that it's readable at this time |
---|
| 8548 | + * of bpf_exit, which means that program wrote |
---|
| 8549 | + * something into it earlier |
---|
| 8550 | + */ |
---|
| 8551 | + err = check_reg_arg(env, BPF_REG_0, SRC_OP); |
---|
| 8552 | + if (err) |
---|
| 8553 | + return err; |
---|
| 8554 | + |
---|
| 8555 | + if (is_pointer_value(env, BPF_REG_0)) { |
---|
| 8556 | + verbose(env, "R0 leaks addr as return value\n"); |
---|
| 8557 | + return -EACCES; |
---|
| 8558 | + } |
---|
| 8559 | + |
---|
| 8560 | + reg = cur_regs(env) + BPF_REG_0; |
---|
| 8561 | + if (is_subprog) { |
---|
| 8562 | + if (reg->type != SCALAR_VALUE) { |
---|
| 8563 | + verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n", |
---|
| 8564 | + reg_type_str[reg->type]); |
---|
| 8565 | + return -EINVAL; |
---|
| 8566 | + } |
---|
| 8567 | + return 0; |
---|
| 8568 | + } |
---|
| 8569 | + |
---|
| 8570 | + switch (prog_type) { |
---|
4534 | 8571 | case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: |
---|
4535 | 8572 | if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG || |
---|
4536 | | - env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG) |
---|
| 8573 | + env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG || |
---|
| 8574 | + env->prog->expected_attach_type == BPF_CGROUP_INET4_GETPEERNAME || |
---|
| 8575 | + env->prog->expected_attach_type == BPF_CGROUP_INET6_GETPEERNAME || |
---|
| 8576 | + env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME || |
---|
| 8577 | + env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME) |
---|
4537 | 8578 | range = tnum_range(1, 1); |
---|
| 8579 | + break; |
---|
4538 | 8580 | case BPF_PROG_TYPE_CGROUP_SKB: |
---|
| 8581 | + if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) { |
---|
| 8582 | + range = tnum_range(0, 3); |
---|
| 8583 | + enforce_attach_type_range = tnum_range(2, 3); |
---|
| 8584 | + } |
---|
| 8585 | + break; |
---|
4539 | 8586 | case BPF_PROG_TYPE_CGROUP_SOCK: |
---|
4540 | 8587 | case BPF_PROG_TYPE_SOCK_OPS: |
---|
4541 | 8588 | case BPF_PROG_TYPE_CGROUP_DEVICE: |
---|
| 8589 | + case BPF_PROG_TYPE_CGROUP_SYSCTL: |
---|
| 8590 | + case BPF_PROG_TYPE_CGROUP_SOCKOPT: |
---|
4542 | 8591 | break; |
---|
| 8592 | + case BPF_PROG_TYPE_RAW_TRACEPOINT: |
---|
| 8593 | + if (!env->prog->aux->attach_btf_id) |
---|
| 8594 | + return 0; |
---|
| 8595 | + range = tnum_const(0); |
---|
| 8596 | + break; |
---|
| 8597 | + case BPF_PROG_TYPE_TRACING: |
---|
| 8598 | + switch (env->prog->expected_attach_type) { |
---|
| 8599 | + case BPF_TRACE_FENTRY: |
---|
| 8600 | + case BPF_TRACE_FEXIT: |
---|
| 8601 | + range = tnum_const(0); |
---|
| 8602 | + break; |
---|
| 8603 | + case BPF_TRACE_RAW_TP: |
---|
| 8604 | + case BPF_MODIFY_RETURN: |
---|
| 8605 | + return 0; |
---|
| 8606 | + case BPF_TRACE_ITER: |
---|
| 8607 | + break; |
---|
| 8608 | + default: |
---|
| 8609 | + return -ENOTSUPP; |
---|
| 8610 | + } |
---|
| 8611 | + break; |
---|
| 8612 | + case BPF_PROG_TYPE_SK_LOOKUP: |
---|
| 8613 | + range = tnum_range(SK_DROP, SK_PASS); |
---|
| 8614 | + break; |
---|
| 8615 | + case BPF_PROG_TYPE_EXT: |
---|
| 8616 | + /* freplace program can return anything as its return value |
---|
| 8617 | + * depends on the to-be-replaced kernel func or bpf program. |
---|
| 8618 | + */ |
---|
4543 | 8619 | default: |
---|
4544 | 8620 | return 0; |
---|
4545 | 8621 | } |
---|
4546 | 8622 | |
---|
4547 | | - reg = cur_regs(env) + BPF_REG_0; |
---|
4548 | 8623 | if (reg->type != SCALAR_VALUE) { |
---|
4549 | 8624 | verbose(env, "At program exit the register R0 is not a known value (%s)\n", |
---|
4550 | 8625 | reg_type_str[reg->type]); |
---|
.. | .. |
---|
4565 | 8640 | verbose(env, " should have been in %s\n", tn_buf); |
---|
4566 | 8641 | return -EINVAL; |
---|
4567 | 8642 | } |
---|
| 8643 | + |
---|
| 8644 | + if (!tnum_is_unknown(enforce_attach_type_range) && |
---|
| 8645 | + tnum_in(enforce_attach_type_range, reg->var_off)) |
---|
| 8646 | + env->prog->enforce_expected_attach_type = 1; |
---|
4568 | 8647 | return 0; |
---|
4569 | 8648 | } |
---|
4570 | 8649 | |
---|
.. | .. |
---|
4608 | 8687 | BRANCH = 2, |
---|
4609 | 8688 | }; |
---|
4610 | 8689 | |
---|
4611 | | -#define STATE_LIST_MARK ((struct bpf_verifier_state_list *) -1L) |
---|
| 8690 | +static u32 state_htab_size(struct bpf_verifier_env *env) |
---|
| 8691 | +{ |
---|
| 8692 | + return env->prog->len; |
---|
| 8693 | +} |
---|
4612 | 8694 | |
---|
4613 | | -static int *insn_stack; /* stack of insns to process */ |
---|
4614 | | -static int cur_stack; /* current stack index */ |
---|
4615 | | -static int *insn_state; |
---|
| 8695 | +static struct bpf_verifier_state_list **explored_state( |
---|
| 8696 | + struct bpf_verifier_env *env, |
---|
| 8697 | + int idx) |
---|
| 8698 | +{ |
---|
| 8699 | + struct bpf_verifier_state *cur = env->cur_state; |
---|
| 8700 | + struct bpf_func_state *state = cur->frame[cur->curframe]; |
---|
| 8701 | + |
---|
| 8702 | + return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)]; |
---|
| 8703 | +} |
---|
| 8704 | + |
---|
| 8705 | +static void init_explored_state(struct bpf_verifier_env *env, int idx) |
---|
| 8706 | +{ |
---|
| 8707 | + env->insn_aux_data[idx].prune_point = true; |
---|
| 8708 | +} |
---|
4616 | 8709 | |
---|
4617 | 8710 | /* t, w, e - match pseudo-code above: |
---|
4618 | 8711 | * t - index of current instruction |
---|
4619 | 8712 | * w - next instruction |
---|
4620 | 8713 | * e - edge |
---|
4621 | 8714 | */ |
---|
4622 | | -static int push_insn(int t, int w, int e, struct bpf_verifier_env *env) |
---|
| 8715 | +static int push_insn(int t, int w, int e, struct bpf_verifier_env *env, |
---|
| 8716 | + bool loop_ok) |
---|
4623 | 8717 | { |
---|
| 8718 | + int *insn_stack = env->cfg.insn_stack; |
---|
| 8719 | + int *insn_state = env->cfg.insn_state; |
---|
| 8720 | + |
---|
4624 | 8721 | if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH)) |
---|
4625 | 8722 | return 0; |
---|
4626 | 8723 | |
---|
.. | .. |
---|
4628 | 8725 | return 0; |
---|
4629 | 8726 | |
---|
4630 | 8727 | if (w < 0 || w >= env->prog->len) { |
---|
| 8728 | + verbose_linfo(env, t, "%d: ", t); |
---|
4631 | 8729 | verbose(env, "jump out of range from insn %d to %d\n", t, w); |
---|
4632 | 8730 | return -EINVAL; |
---|
4633 | 8731 | } |
---|
4634 | 8732 | |
---|
4635 | 8733 | if (e == BRANCH) |
---|
4636 | 8734 | /* mark branch target for state pruning */ |
---|
4637 | | - env->explored_states[w] = STATE_LIST_MARK; |
---|
| 8735 | + init_explored_state(env, w); |
---|
4638 | 8736 | |
---|
4639 | 8737 | if (insn_state[w] == 0) { |
---|
4640 | 8738 | /* tree-edge */ |
---|
4641 | 8739 | insn_state[t] = DISCOVERED | e; |
---|
4642 | 8740 | insn_state[w] = DISCOVERED; |
---|
4643 | | - if (cur_stack >= env->prog->len) |
---|
| 8741 | + if (env->cfg.cur_stack >= env->prog->len) |
---|
4644 | 8742 | return -E2BIG; |
---|
4645 | | - insn_stack[cur_stack++] = w; |
---|
| 8743 | + insn_stack[env->cfg.cur_stack++] = w; |
---|
4646 | 8744 | return 1; |
---|
4647 | 8745 | } else if ((insn_state[w] & 0xF0) == DISCOVERED) { |
---|
| 8746 | + if (loop_ok && env->bpf_capable) |
---|
| 8747 | + return 0; |
---|
| 8748 | + verbose_linfo(env, t, "%d: ", t); |
---|
| 8749 | + verbose_linfo(env, w, "%d: ", w); |
---|
4648 | 8750 | verbose(env, "back-edge from insn %d to %d\n", t, w); |
---|
4649 | 8751 | return -EINVAL; |
---|
4650 | 8752 | } else if (insn_state[w] == EXPLORED) { |
---|
.. | .. |
---|
4664 | 8766 | { |
---|
4665 | 8767 | struct bpf_insn *insns = env->prog->insnsi; |
---|
4666 | 8768 | int insn_cnt = env->prog->len; |
---|
| 8769 | + int *insn_stack, *insn_state; |
---|
4667 | 8770 | int ret = 0; |
---|
4668 | 8771 | int i, t; |
---|
4669 | 8772 | |
---|
4670 | | - ret = check_subprogs(env); |
---|
4671 | | - if (ret < 0) |
---|
4672 | | - return ret; |
---|
4673 | | - |
---|
4674 | | - insn_state = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL); |
---|
| 8773 | + insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL); |
---|
4675 | 8774 | if (!insn_state) |
---|
4676 | 8775 | return -ENOMEM; |
---|
4677 | 8776 | |
---|
4678 | | - insn_stack = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL); |
---|
| 8777 | + insn_stack = env->cfg.insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL); |
---|
4679 | 8778 | if (!insn_stack) { |
---|
4680 | | - kfree(insn_state); |
---|
| 8779 | + kvfree(insn_state); |
---|
4681 | 8780 | return -ENOMEM; |
---|
4682 | 8781 | } |
---|
4683 | 8782 | |
---|
4684 | 8783 | insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */ |
---|
4685 | 8784 | insn_stack[0] = 0; /* 0 is the first instruction */ |
---|
4686 | | - cur_stack = 1; |
---|
| 8785 | + env->cfg.cur_stack = 1; |
---|
4687 | 8786 | |
---|
4688 | 8787 | peek_stack: |
---|
4689 | | - if (cur_stack == 0) |
---|
| 8788 | + if (env->cfg.cur_stack == 0) |
---|
4690 | 8789 | goto check_state; |
---|
4691 | | - t = insn_stack[cur_stack - 1]; |
---|
| 8790 | + t = insn_stack[env->cfg.cur_stack - 1]; |
---|
4692 | 8791 | |
---|
4693 | | - if (BPF_CLASS(insns[t].code) == BPF_JMP) { |
---|
| 8792 | + if (BPF_CLASS(insns[t].code) == BPF_JMP || |
---|
| 8793 | + BPF_CLASS(insns[t].code) == BPF_JMP32) { |
---|
4694 | 8794 | u8 opcode = BPF_OP(insns[t].code); |
---|
4695 | 8795 | |
---|
4696 | 8796 | if (opcode == BPF_EXIT) { |
---|
4697 | 8797 | goto mark_explored; |
---|
4698 | 8798 | } else if (opcode == BPF_CALL) { |
---|
4699 | | - ret = push_insn(t, t + 1, FALLTHROUGH, env); |
---|
| 8799 | + ret = push_insn(t, t + 1, FALLTHROUGH, env, false); |
---|
4700 | 8800 | if (ret == 1) |
---|
4701 | 8801 | goto peek_stack; |
---|
4702 | 8802 | else if (ret < 0) |
---|
4703 | 8803 | goto err_free; |
---|
4704 | 8804 | if (t + 1 < insn_cnt) |
---|
4705 | | - env->explored_states[t + 1] = STATE_LIST_MARK; |
---|
| 8805 | + init_explored_state(env, t + 1); |
---|
4706 | 8806 | if (insns[t].src_reg == BPF_PSEUDO_CALL) { |
---|
4707 | | - env->explored_states[t] = STATE_LIST_MARK; |
---|
4708 | | - ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env); |
---|
| 8807 | + init_explored_state(env, t); |
---|
| 8808 | + ret = push_insn(t, t + insns[t].imm + 1, BRANCH, |
---|
| 8809 | + env, false); |
---|
4709 | 8810 | if (ret == 1) |
---|
4710 | 8811 | goto peek_stack; |
---|
4711 | 8812 | else if (ret < 0) |
---|
.. | .. |
---|
4718 | 8819 | } |
---|
4719 | 8820 | /* unconditional jump with single edge */ |
---|
4720 | 8821 | ret = push_insn(t, t + insns[t].off + 1, |
---|
4721 | | - FALLTHROUGH, env); |
---|
| 8822 | + FALLTHROUGH, env, true); |
---|
4722 | 8823 | if (ret == 1) |
---|
4723 | 8824 | goto peek_stack; |
---|
4724 | 8825 | else if (ret < 0) |
---|
4725 | 8826 | goto err_free; |
---|
| 8827 | + /* unconditional jmp is not a good pruning point, |
---|
| 8828 | + * but it's marked, since backtracking needs |
---|
| 8829 | + * to record jmp history in is_state_visited(). |
---|
| 8830 | + */ |
---|
| 8831 | + init_explored_state(env, t + insns[t].off + 1); |
---|
4726 | 8832 | /* tell verifier to check for equivalent states |
---|
4727 | 8833 | * after every call and jump |
---|
4728 | 8834 | */ |
---|
4729 | 8835 | if (t + 1 < insn_cnt) |
---|
4730 | | - env->explored_states[t + 1] = STATE_LIST_MARK; |
---|
| 8836 | + init_explored_state(env, t + 1); |
---|
4731 | 8837 | } else { |
---|
4732 | 8838 | /* conditional jump with two edges */ |
---|
4733 | | - env->explored_states[t] = STATE_LIST_MARK; |
---|
4734 | | - ret = push_insn(t, t + 1, FALLTHROUGH, env); |
---|
| 8839 | + init_explored_state(env, t); |
---|
| 8840 | + ret = push_insn(t, t + 1, FALLTHROUGH, env, true); |
---|
4735 | 8841 | if (ret == 1) |
---|
4736 | 8842 | goto peek_stack; |
---|
4737 | 8843 | else if (ret < 0) |
---|
4738 | 8844 | goto err_free; |
---|
4739 | 8845 | |
---|
4740 | | - ret = push_insn(t, t + insns[t].off + 1, BRANCH, env); |
---|
| 8846 | + ret = push_insn(t, t + insns[t].off + 1, BRANCH, env, true); |
---|
4741 | 8847 | if (ret == 1) |
---|
4742 | 8848 | goto peek_stack; |
---|
4743 | 8849 | else if (ret < 0) |
---|
.. | .. |
---|
4747 | 8853 | /* all other non-branch instructions with single |
---|
4748 | 8854 | * fall-through edge |
---|
4749 | 8855 | */ |
---|
4750 | | - ret = push_insn(t, t + 1, FALLTHROUGH, env); |
---|
| 8856 | + ret = push_insn(t, t + 1, FALLTHROUGH, env, false); |
---|
4751 | 8857 | if (ret == 1) |
---|
4752 | 8858 | goto peek_stack; |
---|
4753 | 8859 | else if (ret < 0) |
---|
.. | .. |
---|
4756 | 8862 | |
---|
4757 | 8863 | mark_explored: |
---|
4758 | 8864 | insn_state[t] = EXPLORED; |
---|
4759 | | - if (cur_stack-- <= 0) { |
---|
| 8865 | + if (env->cfg.cur_stack-- <= 0) { |
---|
4760 | 8866 | verbose(env, "pop stack internal bug\n"); |
---|
4761 | 8867 | ret = -EFAULT; |
---|
4762 | 8868 | goto err_free; |
---|
.. | .. |
---|
4774 | 8880 | ret = 0; /* cfg looks good */ |
---|
4775 | 8881 | |
---|
4776 | 8882 | err_free: |
---|
4777 | | - kfree(insn_state); |
---|
4778 | | - kfree(insn_stack); |
---|
| 8883 | + kvfree(insn_state); |
---|
| 8884 | + kvfree(insn_stack); |
---|
| 8885 | + env->cfg.insn_state = env->cfg.insn_stack = NULL; |
---|
4779 | 8886 | return ret; |
---|
| 8887 | +} |
---|
| 8888 | + |
---|
| 8889 | +static int check_abnormal_return(struct bpf_verifier_env *env) |
---|
| 8890 | +{ |
---|
| 8891 | + int i; |
---|
| 8892 | + |
---|
| 8893 | + for (i = 1; i < env->subprog_cnt; i++) { |
---|
| 8894 | + if (env->subprog_info[i].has_ld_abs) { |
---|
| 8895 | + verbose(env, "LD_ABS is not allowed in subprogs without BTF\n"); |
---|
| 8896 | + return -EINVAL; |
---|
| 8897 | + } |
---|
| 8898 | + if (env->subprog_info[i].has_tail_call) { |
---|
| 8899 | + verbose(env, "tail_call is not allowed in subprogs without BTF\n"); |
---|
| 8900 | + return -EINVAL; |
---|
| 8901 | + } |
---|
| 8902 | + } |
---|
| 8903 | + return 0; |
---|
| 8904 | +} |
---|
| 8905 | + |
---|
| 8906 | +/* The minimum supported BTF func info size */ |
---|
| 8907 | +#define MIN_BPF_FUNCINFO_SIZE 8 |
---|
| 8908 | +#define MAX_FUNCINFO_REC_SIZE 252 |
---|
| 8909 | + |
---|
| 8910 | +static int check_btf_func(struct bpf_verifier_env *env, |
---|
| 8911 | + const union bpf_attr *attr, |
---|
| 8912 | + union bpf_attr __user *uattr) |
---|
| 8913 | +{ |
---|
| 8914 | + const struct btf_type *type, *func_proto, *ret_type; |
---|
| 8915 | + u32 i, nfuncs, urec_size, min_size; |
---|
| 8916 | + u32 krec_size = sizeof(struct bpf_func_info); |
---|
| 8917 | + struct bpf_func_info *krecord; |
---|
| 8918 | + struct bpf_func_info_aux *info_aux = NULL; |
---|
| 8919 | + struct bpf_prog *prog; |
---|
| 8920 | + const struct btf *btf; |
---|
| 8921 | + void __user *urecord; |
---|
| 8922 | + u32 prev_offset = 0; |
---|
| 8923 | + bool scalar_return; |
---|
| 8924 | + int ret = -ENOMEM; |
---|
| 8925 | + |
---|
| 8926 | + nfuncs = attr->func_info_cnt; |
---|
| 8927 | + if (!nfuncs) { |
---|
| 8928 | + if (check_abnormal_return(env)) |
---|
| 8929 | + return -EINVAL; |
---|
| 8930 | + return 0; |
---|
| 8931 | + } |
---|
| 8932 | + |
---|
| 8933 | + if (nfuncs != env->subprog_cnt) { |
---|
| 8934 | + verbose(env, "number of funcs in func_info doesn't match number of subprogs\n"); |
---|
| 8935 | + return -EINVAL; |
---|
| 8936 | + } |
---|
| 8937 | + |
---|
| 8938 | + urec_size = attr->func_info_rec_size; |
---|
| 8939 | + if (urec_size < MIN_BPF_FUNCINFO_SIZE || |
---|
| 8940 | + urec_size > MAX_FUNCINFO_REC_SIZE || |
---|
| 8941 | + urec_size % sizeof(u32)) { |
---|
| 8942 | + verbose(env, "invalid func info rec size %u\n", urec_size); |
---|
| 8943 | + return -EINVAL; |
---|
| 8944 | + } |
---|
| 8945 | + |
---|
| 8946 | + prog = env->prog; |
---|
| 8947 | + btf = prog->aux->btf; |
---|
| 8948 | + |
---|
| 8949 | + urecord = u64_to_user_ptr(attr->func_info); |
---|
| 8950 | + min_size = min_t(u32, krec_size, urec_size); |
---|
| 8951 | + |
---|
| 8952 | + krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN); |
---|
| 8953 | + if (!krecord) |
---|
| 8954 | + return -ENOMEM; |
---|
| 8955 | + info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL | __GFP_NOWARN); |
---|
| 8956 | + if (!info_aux) |
---|
| 8957 | + goto err_free; |
---|
| 8958 | + |
---|
| 8959 | + for (i = 0; i < nfuncs; i++) { |
---|
| 8960 | + ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size); |
---|
| 8961 | + if (ret) { |
---|
| 8962 | + if (ret == -E2BIG) { |
---|
| 8963 | + verbose(env, "nonzero tailing record in func info"); |
---|
| 8964 | + /* set the size kernel expects so loader can zero |
---|
| 8965 | + * out the rest of the record. |
---|
| 8966 | + */ |
---|
| 8967 | + if (put_user(min_size, &uattr->func_info_rec_size)) |
---|
| 8968 | + ret = -EFAULT; |
---|
| 8969 | + } |
---|
| 8970 | + goto err_free; |
---|
| 8971 | + } |
---|
| 8972 | + |
---|
| 8973 | + if (copy_from_user(&krecord[i], urecord, min_size)) { |
---|
| 8974 | + ret = -EFAULT; |
---|
| 8975 | + goto err_free; |
---|
| 8976 | + } |
---|
| 8977 | + |
---|
| 8978 | + /* check insn_off */ |
---|
| 8979 | + ret = -EINVAL; |
---|
| 8980 | + if (i == 0) { |
---|
| 8981 | + if (krecord[i].insn_off) { |
---|
| 8982 | + verbose(env, |
---|
| 8983 | + "nonzero insn_off %u for the first func info record", |
---|
| 8984 | + krecord[i].insn_off); |
---|
| 8985 | + goto err_free; |
---|
| 8986 | + } |
---|
| 8987 | + } else if (krecord[i].insn_off <= prev_offset) { |
---|
| 8988 | + verbose(env, |
---|
| 8989 | + "same or smaller insn offset (%u) than previous func info record (%u)", |
---|
| 8990 | + krecord[i].insn_off, prev_offset); |
---|
| 8991 | + goto err_free; |
---|
| 8992 | + } |
---|
| 8993 | + |
---|
| 8994 | + if (env->subprog_info[i].start != krecord[i].insn_off) { |
---|
| 8995 | + verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n"); |
---|
| 8996 | + goto err_free; |
---|
| 8997 | + } |
---|
| 8998 | + |
---|
| 8999 | + /* check type_id */ |
---|
| 9000 | + type = btf_type_by_id(btf, krecord[i].type_id); |
---|
| 9001 | + if (!type || !btf_type_is_func(type)) { |
---|
| 9002 | + verbose(env, "invalid type id %d in func info", |
---|
| 9003 | + krecord[i].type_id); |
---|
| 9004 | + goto err_free; |
---|
| 9005 | + } |
---|
| 9006 | + info_aux[i].linkage = BTF_INFO_VLEN(type->info); |
---|
| 9007 | + |
---|
| 9008 | + func_proto = btf_type_by_id(btf, type->type); |
---|
| 9009 | + if (unlikely(!func_proto || !btf_type_is_func_proto(func_proto))) |
---|
| 9010 | + /* btf_func_check() already verified it during BTF load */ |
---|
| 9011 | + goto err_free; |
---|
| 9012 | + ret_type = btf_type_skip_modifiers(btf, func_proto->type, NULL); |
---|
| 9013 | + scalar_return = |
---|
| 9014 | + btf_type_is_small_int(ret_type) || btf_type_is_enum(ret_type); |
---|
| 9015 | + if (i && !scalar_return && env->subprog_info[i].has_ld_abs) { |
---|
| 9016 | + verbose(env, "LD_ABS is only allowed in functions that return 'int'.\n"); |
---|
| 9017 | + goto err_free; |
---|
| 9018 | + } |
---|
| 9019 | + if (i && !scalar_return && env->subprog_info[i].has_tail_call) { |
---|
| 9020 | + verbose(env, "tail_call is only allowed in functions that return 'int'.\n"); |
---|
| 9021 | + goto err_free; |
---|
| 9022 | + } |
---|
| 9023 | + |
---|
| 9024 | + prev_offset = krecord[i].insn_off; |
---|
| 9025 | + urecord += urec_size; |
---|
| 9026 | + } |
---|
| 9027 | + |
---|
| 9028 | + prog->aux->func_info = krecord; |
---|
| 9029 | + prog->aux->func_info_cnt = nfuncs; |
---|
| 9030 | + prog->aux->func_info_aux = info_aux; |
---|
| 9031 | + return 0; |
---|
| 9032 | + |
---|
| 9033 | +err_free: |
---|
| 9034 | + kvfree(krecord); |
---|
| 9035 | + kfree(info_aux); |
---|
| 9036 | + return ret; |
---|
| 9037 | +} |
---|
| 9038 | + |
---|
| 9039 | +static void adjust_btf_func(struct bpf_verifier_env *env) |
---|
| 9040 | +{ |
---|
| 9041 | + struct bpf_prog_aux *aux = env->prog->aux; |
---|
| 9042 | + int i; |
---|
| 9043 | + |
---|
| 9044 | + if (!aux->func_info) |
---|
| 9045 | + return; |
---|
| 9046 | + |
---|
| 9047 | + for (i = 0; i < env->subprog_cnt; i++) |
---|
| 9048 | + aux->func_info[i].insn_off = env->subprog_info[i].start; |
---|
| 9049 | +} |
---|
| 9050 | + |
---|
| 9051 | +#define MIN_BPF_LINEINFO_SIZE (offsetof(struct bpf_line_info, line_col) + \ |
---|
| 9052 | + sizeof(((struct bpf_line_info *)(0))->line_col)) |
---|
| 9053 | +#define MAX_LINEINFO_REC_SIZE MAX_FUNCINFO_REC_SIZE |
---|
| 9054 | + |
---|
| 9055 | +static int check_btf_line(struct bpf_verifier_env *env, |
---|
| 9056 | + const union bpf_attr *attr, |
---|
| 9057 | + union bpf_attr __user *uattr) |
---|
| 9058 | +{ |
---|
| 9059 | + u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0; |
---|
| 9060 | + struct bpf_subprog_info *sub; |
---|
| 9061 | + struct bpf_line_info *linfo; |
---|
| 9062 | + struct bpf_prog *prog; |
---|
| 9063 | + const struct btf *btf; |
---|
| 9064 | + void __user *ulinfo; |
---|
| 9065 | + int err; |
---|
| 9066 | + |
---|
| 9067 | + nr_linfo = attr->line_info_cnt; |
---|
| 9068 | + if (!nr_linfo) |
---|
| 9069 | + return 0; |
---|
| 9070 | + if (nr_linfo > INT_MAX / sizeof(struct bpf_line_info)) |
---|
| 9071 | + return -EINVAL; |
---|
| 9072 | + |
---|
| 9073 | + rec_size = attr->line_info_rec_size; |
---|
| 9074 | + if (rec_size < MIN_BPF_LINEINFO_SIZE || |
---|
| 9075 | + rec_size > MAX_LINEINFO_REC_SIZE || |
---|
| 9076 | + rec_size & (sizeof(u32) - 1)) |
---|
| 9077 | + return -EINVAL; |
---|
| 9078 | + |
---|
| 9079 | + /* Need to zero it in case the userspace may |
---|
| 9080 | + * pass in a smaller bpf_line_info object. |
---|
| 9081 | + */ |
---|
| 9082 | + linfo = kvcalloc(nr_linfo, sizeof(struct bpf_line_info), |
---|
| 9083 | + GFP_KERNEL | __GFP_NOWARN); |
---|
| 9084 | + if (!linfo) |
---|
| 9085 | + return -ENOMEM; |
---|
| 9086 | + |
---|
| 9087 | + prog = env->prog; |
---|
| 9088 | + btf = prog->aux->btf; |
---|
| 9089 | + |
---|
| 9090 | + s = 0; |
---|
| 9091 | + sub = env->subprog_info; |
---|
| 9092 | + ulinfo = u64_to_user_ptr(attr->line_info); |
---|
| 9093 | + expected_size = sizeof(struct bpf_line_info); |
---|
| 9094 | + ncopy = min_t(u32, expected_size, rec_size); |
---|
| 9095 | + for (i = 0; i < nr_linfo; i++) { |
---|
| 9096 | + err = bpf_check_uarg_tail_zero(ulinfo, expected_size, rec_size); |
---|
| 9097 | + if (err) { |
---|
| 9098 | + if (err == -E2BIG) { |
---|
| 9099 | + verbose(env, "nonzero tailing record in line_info"); |
---|
| 9100 | + if (put_user(expected_size, |
---|
| 9101 | + &uattr->line_info_rec_size)) |
---|
| 9102 | + err = -EFAULT; |
---|
| 9103 | + } |
---|
| 9104 | + goto err_free; |
---|
| 9105 | + } |
---|
| 9106 | + |
---|
| 9107 | + if (copy_from_user(&linfo[i], ulinfo, ncopy)) { |
---|
| 9108 | + err = -EFAULT; |
---|
| 9109 | + goto err_free; |
---|
| 9110 | + } |
---|
| 9111 | + |
---|
| 9112 | + /* |
---|
| 9113 | + * Check insn_off to ensure |
---|
| 9114 | + * 1) strictly increasing AND |
---|
| 9115 | + * 2) bounded by prog->len |
---|
| 9116 | + * |
---|
| 9117 | + * The linfo[0].insn_off == 0 check logically falls into |
---|
| 9118 | + * the later "missing bpf_line_info for func..." case |
---|
| 9119 | + * because the first linfo[0].insn_off must be the |
---|
| 9120 | + * first sub also and the first sub must have |
---|
| 9121 | + * subprog_info[0].start == 0. |
---|
| 9122 | + */ |
---|
| 9123 | + if ((i && linfo[i].insn_off <= prev_offset) || |
---|
| 9124 | + linfo[i].insn_off >= prog->len) { |
---|
| 9125 | + verbose(env, "Invalid line_info[%u].insn_off:%u (prev_offset:%u prog->len:%u)\n", |
---|
| 9126 | + i, linfo[i].insn_off, prev_offset, |
---|
| 9127 | + prog->len); |
---|
| 9128 | + err = -EINVAL; |
---|
| 9129 | + goto err_free; |
---|
| 9130 | + } |
---|
| 9131 | + |
---|
| 9132 | + if (!prog->insnsi[linfo[i].insn_off].code) { |
---|
| 9133 | + verbose(env, |
---|
| 9134 | + "Invalid insn code at line_info[%u].insn_off\n", |
---|
| 9135 | + i); |
---|
| 9136 | + err = -EINVAL; |
---|
| 9137 | + goto err_free; |
---|
| 9138 | + } |
---|
| 9139 | + |
---|
| 9140 | + if (!btf_name_by_offset(btf, linfo[i].line_off) || |
---|
| 9141 | + !btf_name_by_offset(btf, linfo[i].file_name_off)) { |
---|
| 9142 | + verbose(env, "Invalid line_info[%u].line_off or .file_name_off\n", i); |
---|
| 9143 | + err = -EINVAL; |
---|
| 9144 | + goto err_free; |
---|
| 9145 | + } |
---|
| 9146 | + |
---|
| 9147 | + if (s != env->subprog_cnt) { |
---|
| 9148 | + if (linfo[i].insn_off == sub[s].start) { |
---|
| 9149 | + sub[s].linfo_idx = i; |
---|
| 9150 | + s++; |
---|
| 9151 | + } else if (sub[s].start < linfo[i].insn_off) { |
---|
| 9152 | + verbose(env, "missing bpf_line_info for func#%u\n", s); |
---|
| 9153 | + err = -EINVAL; |
---|
| 9154 | + goto err_free; |
---|
| 9155 | + } |
---|
| 9156 | + } |
---|
| 9157 | + |
---|
| 9158 | + prev_offset = linfo[i].insn_off; |
---|
| 9159 | + ulinfo += rec_size; |
---|
| 9160 | + } |
---|
| 9161 | + |
---|
| 9162 | + if (s != env->subprog_cnt) { |
---|
| 9163 | + verbose(env, "missing bpf_line_info for %u funcs starting from func#%u\n", |
---|
| 9164 | + env->subprog_cnt - s, s); |
---|
| 9165 | + err = -EINVAL; |
---|
| 9166 | + goto err_free; |
---|
| 9167 | + } |
---|
| 9168 | + |
---|
| 9169 | + prog->aux->linfo = linfo; |
---|
| 9170 | + prog->aux->nr_linfo = nr_linfo; |
---|
| 9171 | + |
---|
| 9172 | + return 0; |
---|
| 9173 | + |
---|
| 9174 | +err_free: |
---|
| 9175 | + kvfree(linfo); |
---|
| 9176 | + return err; |
---|
| 9177 | +} |
---|
| 9178 | + |
---|
| 9179 | +static int check_btf_info(struct bpf_verifier_env *env, |
---|
| 9180 | + const union bpf_attr *attr, |
---|
| 9181 | + union bpf_attr __user *uattr) |
---|
| 9182 | +{ |
---|
| 9183 | + struct btf *btf; |
---|
| 9184 | + int err; |
---|
| 9185 | + |
---|
| 9186 | + if (!attr->func_info_cnt && !attr->line_info_cnt) { |
---|
| 9187 | + if (check_abnormal_return(env)) |
---|
| 9188 | + return -EINVAL; |
---|
| 9189 | + return 0; |
---|
| 9190 | + } |
---|
| 9191 | + |
---|
| 9192 | + btf = btf_get_by_fd(attr->prog_btf_fd); |
---|
| 9193 | + if (IS_ERR(btf)) |
---|
| 9194 | + return PTR_ERR(btf); |
---|
| 9195 | + env->prog->aux->btf = btf; |
---|
| 9196 | + |
---|
| 9197 | + err = check_btf_func(env, attr, uattr); |
---|
| 9198 | + if (err) |
---|
| 9199 | + return err; |
---|
| 9200 | + |
---|
| 9201 | + err = check_btf_line(env, attr, uattr); |
---|
| 9202 | + if (err) |
---|
| 9203 | + return err; |
---|
| 9204 | + |
---|
| 9205 | + return 0; |
---|
4780 | 9206 | } |
---|
4781 | 9207 | |
---|
4782 | 9208 | /* check %cur's range satisfies %old's */ |
---|
.. | .. |
---|
4786 | 9212 | return old->umin_value <= cur->umin_value && |
---|
4787 | 9213 | old->umax_value >= cur->umax_value && |
---|
4788 | 9214 | old->smin_value <= cur->smin_value && |
---|
4789 | | - old->smax_value >= cur->smax_value; |
---|
| 9215 | + old->smax_value >= cur->smax_value && |
---|
| 9216 | + old->u32_min_value <= cur->u32_min_value && |
---|
| 9217 | + old->u32_max_value >= cur->u32_max_value && |
---|
| 9218 | + old->s32_min_value <= cur->s32_min_value && |
---|
| 9219 | + old->s32_max_value >= cur->s32_max_value; |
---|
4790 | 9220 | } |
---|
4791 | 9221 | |
---|
4792 | 9222 | /* If in the old state two registers had the same id, then they need to have |
---|
.. | .. |
---|
4816 | 9246 | /* We ran out of idmap slots, which should be impossible */ |
---|
4817 | 9247 | WARN_ON_ONCE(1); |
---|
4818 | 9248 | return false; |
---|
| 9249 | +} |
---|
| 9250 | + |
---|
| 9251 | +static void clean_func_state(struct bpf_verifier_env *env, |
---|
| 9252 | + struct bpf_func_state *st) |
---|
| 9253 | +{ |
---|
| 9254 | + enum bpf_reg_liveness live; |
---|
| 9255 | + int i, j; |
---|
| 9256 | + |
---|
| 9257 | + for (i = 0; i < BPF_REG_FP; i++) { |
---|
| 9258 | + live = st->regs[i].live; |
---|
| 9259 | + /* liveness must not touch this register anymore */ |
---|
| 9260 | + st->regs[i].live |= REG_LIVE_DONE; |
---|
| 9261 | + if (!(live & REG_LIVE_READ)) |
---|
| 9262 | + /* since the register is unused, clear its state |
---|
| 9263 | + * to make further comparison simpler |
---|
| 9264 | + */ |
---|
| 9265 | + __mark_reg_not_init(env, &st->regs[i]); |
---|
| 9266 | + } |
---|
| 9267 | + |
---|
| 9268 | + for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) { |
---|
| 9269 | + live = st->stack[i].spilled_ptr.live; |
---|
| 9270 | + /* liveness must not touch this stack slot anymore */ |
---|
| 9271 | + st->stack[i].spilled_ptr.live |= REG_LIVE_DONE; |
---|
| 9272 | + if (!(live & REG_LIVE_READ)) { |
---|
| 9273 | + __mark_reg_not_init(env, &st->stack[i].spilled_ptr); |
---|
| 9274 | + for (j = 0; j < BPF_REG_SIZE; j++) |
---|
| 9275 | + st->stack[i].slot_type[j] = STACK_INVALID; |
---|
| 9276 | + } |
---|
| 9277 | + } |
---|
| 9278 | +} |
---|
| 9279 | + |
---|
| 9280 | +static void clean_verifier_state(struct bpf_verifier_env *env, |
---|
| 9281 | + struct bpf_verifier_state *st) |
---|
| 9282 | +{ |
---|
| 9283 | + int i; |
---|
| 9284 | + |
---|
| 9285 | + if (st->frame[0]->regs[0].live & REG_LIVE_DONE) |
---|
| 9286 | + /* all regs in this state in all frames were already marked */ |
---|
| 9287 | + return; |
---|
| 9288 | + |
---|
| 9289 | + for (i = 0; i <= st->curframe; i++) |
---|
| 9290 | + clean_func_state(env, st->frame[i]); |
---|
| 9291 | +} |
---|
| 9292 | + |
---|
| 9293 | +/* the parentage chains form a tree. |
---|
| 9294 | + * the verifier states are added to state lists at given insn and |
---|
| 9295 | + * pushed into state stack for future exploration. |
---|
| 9296 | + * when the verifier reaches bpf_exit insn some of the verifer states |
---|
| 9297 | + * stored in the state lists have their final liveness state already, |
---|
| 9298 | + * but a lot of states will get revised from liveness point of view when |
---|
| 9299 | + * the verifier explores other branches. |
---|
| 9300 | + * Example: |
---|
| 9301 | + * 1: r0 = 1 |
---|
| 9302 | + * 2: if r1 == 100 goto pc+1 |
---|
| 9303 | + * 3: r0 = 2 |
---|
| 9304 | + * 4: exit |
---|
| 9305 | + * when the verifier reaches exit insn the register r0 in the state list of |
---|
| 9306 | + * insn 2 will be seen as !REG_LIVE_READ. Then the verifier pops the other_branch |
---|
| 9307 | + * of insn 2 and goes exploring further. At the insn 4 it will walk the |
---|
| 9308 | + * parentage chain from insn 4 into insn 2 and will mark r0 as REG_LIVE_READ. |
---|
| 9309 | + * |
---|
| 9310 | + * Since the verifier pushes the branch states as it sees them while exploring |
---|
| 9311 | + * the program the condition of walking the branch instruction for the second |
---|
| 9312 | + * time means that all states below this branch were already explored and |
---|
| 9313 | + * their final liveness markes are already propagated. |
---|
| 9314 | + * Hence when the verifier completes the search of state list in is_state_visited() |
---|
| 9315 | + * we can call this clean_live_states() function to mark all liveness states |
---|
| 9316 | + * as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state' |
---|
| 9317 | + * will not be used. |
---|
| 9318 | + * This function also clears the registers and stack for states that !READ |
---|
| 9319 | + * to simplify state merging. |
---|
| 9320 | + * |
---|
| 9321 | + * Important note here that walking the same branch instruction in the callee |
---|
| 9322 | + * doesn't meant that the states are DONE. The verifier has to compare |
---|
| 9323 | + * the callsites |
---|
| 9324 | + */ |
---|
| 9325 | +static void clean_live_states(struct bpf_verifier_env *env, int insn, |
---|
| 9326 | + struct bpf_verifier_state *cur) |
---|
| 9327 | +{ |
---|
| 9328 | + struct bpf_verifier_state_list *sl; |
---|
| 9329 | + int i; |
---|
| 9330 | + |
---|
| 9331 | + sl = *explored_state(env, insn); |
---|
| 9332 | + while (sl) { |
---|
| 9333 | + if (sl->state.branches) |
---|
| 9334 | + goto next; |
---|
| 9335 | + if (sl->state.insn_idx != insn || |
---|
| 9336 | + sl->state.curframe != cur->curframe) |
---|
| 9337 | + goto next; |
---|
| 9338 | + for (i = 0; i <= cur->curframe; i++) |
---|
| 9339 | + if (sl->state.frame[i]->callsite != cur->frame[i]->callsite) |
---|
| 9340 | + goto next; |
---|
| 9341 | + clean_verifier_state(env, &sl->state); |
---|
| 9342 | +next: |
---|
| 9343 | + sl = sl->next; |
---|
| 9344 | + } |
---|
4819 | 9345 | } |
---|
4820 | 9346 | |
---|
4821 | 9347 | /* Returns true if (rold safe implies rcur safe) */ |
---|
.. | .. |
---|
4849 | 9375 | if (env->explore_alu_limits) |
---|
4850 | 9376 | return false; |
---|
4851 | 9377 | if (rcur->type == SCALAR_VALUE) { |
---|
| 9378 | + if (!rold->precise) |
---|
| 9379 | + return true; |
---|
4852 | 9380 | /* new val must satisfy old val knowledge */ |
---|
4853 | 9381 | return range_within(rold, rcur) && |
---|
4854 | 9382 | tnum_in(rold->var_off, rcur->var_off); |
---|
.. | .. |
---|
4865 | 9393 | case PTR_TO_MAP_VALUE: |
---|
4866 | 9394 | /* If the new min/max/var_off satisfy the old ones and |
---|
4867 | 9395 | * everything else matches, we are OK. |
---|
4868 | | - * We don't care about the 'id' value, because nothing |
---|
4869 | | - * uses it for PTR_TO_MAP_VALUE (only for ..._OR_NULL) |
---|
| 9396 | + * 'id' is not compared, since it's only used for maps with |
---|
| 9397 | + * bpf_spin_lock inside map element and in such cases if |
---|
| 9398 | + * the rest of the prog is valid for one map element then |
---|
| 9399 | + * it's valid for all map elements regardless of the key |
---|
| 9400 | + * used in bpf_map_lookup() |
---|
4870 | 9401 | */ |
---|
4871 | 9402 | return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 && |
---|
4872 | 9403 | range_within(rold, rcur) && |
---|
.. | .. |
---|
4911 | 9442 | case PTR_TO_CTX: |
---|
4912 | 9443 | case CONST_PTR_TO_MAP: |
---|
4913 | 9444 | case PTR_TO_PACKET_END: |
---|
| 9445 | + case PTR_TO_FLOW_KEYS: |
---|
| 9446 | + case PTR_TO_SOCKET: |
---|
| 9447 | + case PTR_TO_SOCKET_OR_NULL: |
---|
| 9448 | + case PTR_TO_SOCK_COMMON: |
---|
| 9449 | + case PTR_TO_SOCK_COMMON_OR_NULL: |
---|
| 9450 | + case PTR_TO_TCP_SOCK: |
---|
| 9451 | + case PTR_TO_TCP_SOCK_OR_NULL: |
---|
| 9452 | + case PTR_TO_XDP_SOCK: |
---|
4914 | 9453 | /* Only valid matches are exact, which memcmp() above |
---|
4915 | 9454 | * would have accepted |
---|
4916 | 9455 | */ |
---|
.. | .. |
---|
4929 | 9468 | { |
---|
4930 | 9469 | int i, spi; |
---|
4931 | 9470 | |
---|
4932 | | - /* if explored stack has more populated slots than current stack |
---|
4933 | | - * such stacks are not equivalent |
---|
4934 | | - */ |
---|
4935 | | - if (old->allocated_stack > cur->allocated_stack) |
---|
4936 | | - return false; |
---|
4937 | | - |
---|
4938 | 9471 | /* walk slots of the explored stack and ignore any additional |
---|
4939 | 9472 | * slots in the current stack, since explored(safe) state |
---|
4940 | 9473 | * didn't use them |
---|
.. | .. |
---|
4942 | 9475 | for (i = 0; i < old->allocated_stack; i++) { |
---|
4943 | 9476 | spi = i / BPF_REG_SIZE; |
---|
4944 | 9477 | |
---|
4945 | | - if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) |
---|
| 9478 | + if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) { |
---|
| 9479 | + i += BPF_REG_SIZE - 1; |
---|
4946 | 9480 | /* explored state didn't use this */ |
---|
4947 | 9481 | continue; |
---|
| 9482 | + } |
---|
4948 | 9483 | |
---|
4949 | 9484 | if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID) |
---|
4950 | 9485 | continue; |
---|
| 9486 | + |
---|
| 9487 | + /* explored stack has more populated slots than current stack |
---|
| 9488 | + * and these slots were used |
---|
| 9489 | + */ |
---|
| 9490 | + if (i >= cur->allocated_stack) |
---|
| 9491 | + return false; |
---|
| 9492 | + |
---|
4951 | 9493 | /* if old state was safe with misc data in the stack |
---|
4952 | 9494 | * it will be safe with zero-initialized stack. |
---|
4953 | 9495 | * The opposite is not true |
---|
.. | .. |
---|
4958 | 9500 | if (old->stack[spi].slot_type[i % BPF_REG_SIZE] != |
---|
4959 | 9501 | cur->stack[spi].slot_type[i % BPF_REG_SIZE]) |
---|
4960 | 9502 | /* Ex: old explored (safe) state has STACK_SPILL in |
---|
4961 | | - * this stack slot, but current has has STACK_MISC -> |
---|
| 9503 | + * this stack slot, but current has STACK_MISC -> |
---|
4962 | 9504 | * this verifier states are not equivalent, |
---|
4963 | 9505 | * return false to continue verification of this path |
---|
4964 | 9506 | */ |
---|
4965 | 9507 | return false; |
---|
4966 | | - if (i % BPF_REG_SIZE) |
---|
| 9508 | + if (i % BPF_REG_SIZE != BPF_REG_SIZE - 1) |
---|
4967 | 9509 | continue; |
---|
4968 | | - if (old->stack[spi].slot_type[0] != STACK_SPILL) |
---|
| 9510 | + if (!is_spilled_reg(&old->stack[spi])) |
---|
4969 | 9511 | continue; |
---|
4970 | 9512 | if (!regsafe(env, &old->stack[spi].spilled_ptr, |
---|
4971 | 9513 | &cur->stack[spi].spilled_ptr, idmap)) |
---|
.. | .. |
---|
4982 | 9524 | return false; |
---|
4983 | 9525 | } |
---|
4984 | 9526 | return true; |
---|
| 9527 | +} |
---|
| 9528 | + |
---|
| 9529 | +static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur) |
---|
| 9530 | +{ |
---|
| 9531 | + if (old->acquired_refs != cur->acquired_refs) |
---|
| 9532 | + return false; |
---|
| 9533 | + return !memcmp(old->refs, cur->refs, |
---|
| 9534 | + sizeof(*old->refs) * old->acquired_refs); |
---|
4985 | 9535 | } |
---|
4986 | 9536 | |
---|
4987 | 9537 | /* compare two verifier states |
---|
.. | .. |
---|
5024 | 9574 | if (!stacksafe(env, old, cur, env->idmap_scratch)) |
---|
5025 | 9575 | return false; |
---|
5026 | 9576 | |
---|
| 9577 | + if (!refsafe(old, cur)) |
---|
| 9578 | + return false; |
---|
| 9579 | + |
---|
5027 | 9580 | return true; |
---|
5028 | 9581 | } |
---|
5029 | 9582 | |
---|
.. | .. |
---|
5042 | 9595 | if (old->speculative && !cur->speculative) |
---|
5043 | 9596 | return false; |
---|
5044 | 9597 | |
---|
| 9598 | + if (old->active_spin_lock != cur->active_spin_lock) |
---|
| 9599 | + return false; |
---|
| 9600 | + |
---|
5045 | 9601 | /* for states to be equal callsites have to be the same |
---|
5046 | 9602 | * and all frame states need to be equivalent |
---|
5047 | 9603 | */ |
---|
.. | .. |
---|
5052 | 9608 | return false; |
---|
5053 | 9609 | } |
---|
5054 | 9610 | return true; |
---|
| 9611 | +} |
---|
| 9612 | + |
---|
| 9613 | +/* Return 0 if no propagation happened. Return negative error code if error |
---|
| 9614 | + * happened. Otherwise, return the propagated bit. |
---|
| 9615 | + */ |
---|
| 9616 | +static int propagate_liveness_reg(struct bpf_verifier_env *env, |
---|
| 9617 | + struct bpf_reg_state *reg, |
---|
| 9618 | + struct bpf_reg_state *parent_reg) |
---|
| 9619 | +{ |
---|
| 9620 | + u8 parent_flag = parent_reg->live & REG_LIVE_READ; |
---|
| 9621 | + u8 flag = reg->live & REG_LIVE_READ; |
---|
| 9622 | + int err; |
---|
| 9623 | + |
---|
| 9624 | + /* When comes here, read flags of PARENT_REG or REG could be any of |
---|
| 9625 | + * REG_LIVE_READ64, REG_LIVE_READ32, REG_LIVE_NONE. There is no need |
---|
| 9626 | + * of propagation if PARENT_REG has strongest REG_LIVE_READ64. |
---|
| 9627 | + */ |
---|
| 9628 | + if (parent_flag == REG_LIVE_READ64 || |
---|
| 9629 | + /* Or if there is no read flag from REG. */ |
---|
| 9630 | + !flag || |
---|
| 9631 | + /* Or if the read flag from REG is the same as PARENT_REG. */ |
---|
| 9632 | + parent_flag == flag) |
---|
| 9633 | + return 0; |
---|
| 9634 | + |
---|
| 9635 | + err = mark_reg_read(env, reg, parent_reg, flag); |
---|
| 9636 | + if (err) |
---|
| 9637 | + return err; |
---|
| 9638 | + |
---|
| 9639 | + return flag; |
---|
5055 | 9640 | } |
---|
5056 | 9641 | |
---|
5057 | 9642 | /* A write screens off any subsequent reads; but write marks come from the |
---|
.. | .. |
---|
5065 | 9650 | const struct bpf_verifier_state *vstate, |
---|
5066 | 9651 | struct bpf_verifier_state *vparent) |
---|
5067 | 9652 | { |
---|
5068 | | - int i, frame, err = 0; |
---|
| 9653 | + struct bpf_reg_state *state_reg, *parent_reg; |
---|
5069 | 9654 | struct bpf_func_state *state, *parent; |
---|
| 9655 | + int i, frame, err = 0; |
---|
5070 | 9656 | |
---|
5071 | 9657 | if (vparent->curframe != vstate->curframe) { |
---|
5072 | 9658 | WARN(1, "propagate_live: parent frame %d current frame %d\n", |
---|
.. | .. |
---|
5075 | 9661 | } |
---|
5076 | 9662 | /* Propagate read liveness of registers... */ |
---|
5077 | 9663 | BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG); |
---|
5078 | | - /* We don't need to worry about FP liveness because it's read-only */ |
---|
5079 | | - for (i = 0; i < BPF_REG_FP; i++) { |
---|
5080 | | - if (vparent->frame[vparent->curframe]->regs[i].live & REG_LIVE_READ) |
---|
5081 | | - continue; |
---|
5082 | | - if (vstate->frame[vstate->curframe]->regs[i].live & REG_LIVE_READ) { |
---|
5083 | | - err = mark_reg_read(env, &vstate->frame[vstate->curframe]->regs[i], |
---|
5084 | | - &vparent->frame[vstate->curframe]->regs[i]); |
---|
5085 | | - if (err) |
---|
| 9664 | + for (frame = 0; frame <= vstate->curframe; frame++) { |
---|
| 9665 | + parent = vparent->frame[frame]; |
---|
| 9666 | + state = vstate->frame[frame]; |
---|
| 9667 | + parent_reg = parent->regs; |
---|
| 9668 | + state_reg = state->regs; |
---|
| 9669 | + /* We don't need to worry about FP liveness, it's read-only */ |
---|
| 9670 | + for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) { |
---|
| 9671 | + err = propagate_liveness_reg(env, &state_reg[i], |
---|
| 9672 | + &parent_reg[i]); |
---|
| 9673 | + if (err < 0) |
---|
| 9674 | + return err; |
---|
| 9675 | + if (err == REG_LIVE_READ64) |
---|
| 9676 | + mark_insn_zext(env, &parent_reg[i]); |
---|
| 9677 | + } |
---|
| 9678 | + |
---|
| 9679 | + /* Propagate stack slots. */ |
---|
| 9680 | + for (i = 0; i < state->allocated_stack / BPF_REG_SIZE && |
---|
| 9681 | + i < parent->allocated_stack / BPF_REG_SIZE; i++) { |
---|
| 9682 | + parent_reg = &parent->stack[i].spilled_ptr; |
---|
| 9683 | + state_reg = &state->stack[i].spilled_ptr; |
---|
| 9684 | + err = propagate_liveness_reg(env, state_reg, |
---|
| 9685 | + parent_reg); |
---|
| 9686 | + if (err < 0) |
---|
5086 | 9687 | return err; |
---|
5087 | 9688 | } |
---|
5088 | 9689 | } |
---|
| 9690 | + return 0; |
---|
| 9691 | +} |
---|
5089 | 9692 | |
---|
5090 | | - /* ... and stack slots */ |
---|
5091 | | - for (frame = 0; frame <= vstate->curframe; frame++) { |
---|
5092 | | - state = vstate->frame[frame]; |
---|
5093 | | - parent = vparent->frame[frame]; |
---|
5094 | | - for (i = 0; i < state->allocated_stack / BPF_REG_SIZE && |
---|
5095 | | - i < parent->allocated_stack / BPF_REG_SIZE; i++) { |
---|
5096 | | - if (parent->stack[i].spilled_ptr.live & REG_LIVE_READ) |
---|
| 9693 | +/* find precise scalars in the previous equivalent state and |
---|
| 9694 | + * propagate them into the current state |
---|
| 9695 | + */ |
---|
| 9696 | +static int propagate_precision(struct bpf_verifier_env *env, |
---|
| 9697 | + const struct bpf_verifier_state *old) |
---|
| 9698 | +{ |
---|
| 9699 | + struct bpf_reg_state *state_reg; |
---|
| 9700 | + struct bpf_func_state *state; |
---|
| 9701 | + int i, err = 0, fr; |
---|
| 9702 | + |
---|
| 9703 | + for (fr = old->curframe; fr >= 0; fr--) { |
---|
| 9704 | + state = old->frame[fr]; |
---|
| 9705 | + state_reg = state->regs; |
---|
| 9706 | + for (i = 0; i < BPF_REG_FP; i++, state_reg++) { |
---|
| 9707 | + if (state_reg->type != SCALAR_VALUE || |
---|
| 9708 | + !state_reg->precise || |
---|
| 9709 | + !(state_reg->live & REG_LIVE_READ)) |
---|
5097 | 9710 | continue; |
---|
5098 | | - if (state->stack[i].spilled_ptr.live & REG_LIVE_READ) |
---|
5099 | | - mark_reg_read(env, &state->stack[i].spilled_ptr, |
---|
5100 | | - &parent->stack[i].spilled_ptr); |
---|
| 9711 | + if (env->log.level & BPF_LOG_LEVEL2) |
---|
| 9712 | + verbose(env, "frame %d: propagating r%d\n", fr, i); |
---|
| 9713 | + err = mark_chain_precision_frame(env, fr, i); |
---|
| 9714 | + if (err < 0) |
---|
| 9715 | + return err; |
---|
| 9716 | + } |
---|
| 9717 | + |
---|
| 9718 | + for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { |
---|
| 9719 | + if (!is_spilled_reg(&state->stack[i])) |
---|
| 9720 | + continue; |
---|
| 9721 | + state_reg = &state->stack[i].spilled_ptr; |
---|
| 9722 | + if (state_reg->type != SCALAR_VALUE || |
---|
| 9723 | + !state_reg->precise || |
---|
| 9724 | + !(state_reg->live & REG_LIVE_READ)) |
---|
| 9725 | + continue; |
---|
| 9726 | + if (env->log.level & BPF_LOG_LEVEL2) |
---|
| 9727 | + verbose(env, "frame %d: propagating fp%d\n", |
---|
| 9728 | + fr, (-i - 1) * BPF_REG_SIZE); |
---|
| 9729 | + err = mark_chain_precision_stack_frame(env, fr, i); |
---|
| 9730 | + if (err < 0) |
---|
| 9731 | + return err; |
---|
5101 | 9732 | } |
---|
5102 | 9733 | } |
---|
5103 | | - return err; |
---|
| 9734 | + return 0; |
---|
5104 | 9735 | } |
---|
| 9736 | + |
---|
| 9737 | +static bool states_maybe_looping(struct bpf_verifier_state *old, |
---|
| 9738 | + struct bpf_verifier_state *cur) |
---|
| 9739 | +{ |
---|
| 9740 | + struct bpf_func_state *fold, *fcur; |
---|
| 9741 | + int i, fr = cur->curframe; |
---|
| 9742 | + |
---|
| 9743 | + if (old->curframe != fr) |
---|
| 9744 | + return false; |
---|
| 9745 | + |
---|
| 9746 | + fold = old->frame[fr]; |
---|
| 9747 | + fcur = cur->frame[fr]; |
---|
| 9748 | + for (i = 0; i < MAX_BPF_REG; i++) |
---|
| 9749 | + if (memcmp(&fold->regs[i], &fcur->regs[i], |
---|
| 9750 | + offsetof(struct bpf_reg_state, parent))) |
---|
| 9751 | + return false; |
---|
| 9752 | + return true; |
---|
| 9753 | +} |
---|
| 9754 | + |
---|
5105 | 9755 | |
---|
5106 | 9756 | static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) |
---|
5107 | 9757 | { |
---|
5108 | 9758 | struct bpf_verifier_state_list *new_sl; |
---|
5109 | | - struct bpf_verifier_state_list *sl; |
---|
| 9759 | + struct bpf_verifier_state_list *sl, **pprev; |
---|
5110 | 9760 | struct bpf_verifier_state *cur = env->cur_state, *new; |
---|
5111 | 9761 | int i, j, err, states_cnt = 0; |
---|
| 9762 | + bool add_new_state = env->test_state_freq ? true : false; |
---|
5112 | 9763 | |
---|
5113 | | - sl = env->explored_states[insn_idx]; |
---|
5114 | | - if (!sl) |
---|
| 9764 | + cur->last_insn_idx = env->prev_insn_idx; |
---|
| 9765 | + if (!env->insn_aux_data[insn_idx].prune_point) |
---|
5115 | 9766 | /* this 'insn_idx' instruction wasn't marked, so we will not |
---|
5116 | 9767 | * be doing state search here |
---|
5117 | 9768 | */ |
---|
5118 | 9769 | return 0; |
---|
5119 | 9770 | |
---|
5120 | | - while (sl != STATE_LIST_MARK) { |
---|
| 9771 | + /* bpf progs typically have pruning point every 4 instructions |
---|
| 9772 | + * http://vger.kernel.org/bpfconf2019.html#session-1 |
---|
| 9773 | + * Do not add new state for future pruning if the verifier hasn't seen |
---|
| 9774 | + * at least 2 jumps and at least 8 instructions. |
---|
| 9775 | + * This heuristics helps decrease 'total_states' and 'peak_states' metric. |
---|
| 9776 | + * In tests that amounts to up to 50% reduction into total verifier |
---|
| 9777 | + * memory consumption and 20% verifier time speedup. |
---|
| 9778 | + */ |
---|
| 9779 | + if (env->jmps_processed - env->prev_jmps_processed >= 2 && |
---|
| 9780 | + env->insn_processed - env->prev_insn_processed >= 8) |
---|
| 9781 | + add_new_state = true; |
---|
| 9782 | + |
---|
| 9783 | + pprev = explored_state(env, insn_idx); |
---|
| 9784 | + sl = *pprev; |
---|
| 9785 | + |
---|
| 9786 | + clean_live_states(env, insn_idx, cur); |
---|
| 9787 | + |
---|
| 9788 | + while (sl) { |
---|
| 9789 | + states_cnt++; |
---|
| 9790 | + if (sl->state.insn_idx != insn_idx) |
---|
| 9791 | + goto next; |
---|
| 9792 | + if (sl->state.branches) { |
---|
| 9793 | + if (states_maybe_looping(&sl->state, cur) && |
---|
| 9794 | + states_equal(env, &sl->state, cur)) { |
---|
| 9795 | + verbose_linfo(env, insn_idx, "; "); |
---|
| 9796 | + verbose(env, "infinite loop detected at insn %d\n", insn_idx); |
---|
| 9797 | + return -EINVAL; |
---|
| 9798 | + } |
---|
| 9799 | + /* if the verifier is processing a loop, avoid adding new state |
---|
| 9800 | + * too often, since different loop iterations have distinct |
---|
| 9801 | + * states and may not help future pruning. |
---|
| 9802 | + * This threshold shouldn't be too low to make sure that |
---|
| 9803 | + * a loop with large bound will be rejected quickly. |
---|
| 9804 | + * The most abusive loop will be: |
---|
| 9805 | + * r1 += 1 |
---|
| 9806 | + * if r1 < 1000000 goto pc-2 |
---|
| 9807 | + * 1M insn_procssed limit / 100 == 10k peak states. |
---|
| 9808 | + * This threshold shouldn't be too high either, since states |
---|
| 9809 | + * at the end of the loop are likely to be useful in pruning. |
---|
| 9810 | + */ |
---|
| 9811 | + if (env->jmps_processed - env->prev_jmps_processed < 20 && |
---|
| 9812 | + env->insn_processed - env->prev_insn_processed < 100) |
---|
| 9813 | + add_new_state = false; |
---|
| 9814 | + goto miss; |
---|
| 9815 | + } |
---|
5121 | 9816 | if (states_equal(env, &sl->state, cur)) { |
---|
| 9817 | + sl->hit_cnt++; |
---|
5122 | 9818 | /* reached equivalent register/stack state, |
---|
5123 | 9819 | * prune the search. |
---|
5124 | 9820 | * Registers read by the continuation are read by us. |
---|
.. | .. |
---|
5130 | 9826 | * this state and will pop a new one. |
---|
5131 | 9827 | */ |
---|
5132 | 9828 | err = propagate_liveness(env, &sl->state, cur); |
---|
| 9829 | + |
---|
| 9830 | + /* if previous state reached the exit with precision and |
---|
| 9831 | + * current state is equivalent to it (except precsion marks) |
---|
| 9832 | + * the precision needs to be propagated back in |
---|
| 9833 | + * the current state. |
---|
| 9834 | + */ |
---|
| 9835 | + err = err ? : push_jmp_history(env, cur); |
---|
| 9836 | + err = err ? : propagate_precision(env, &sl->state); |
---|
5133 | 9837 | if (err) |
---|
5134 | 9838 | return err; |
---|
5135 | 9839 | return 1; |
---|
5136 | 9840 | } |
---|
5137 | | - sl = sl->next; |
---|
5138 | | - states_cnt++; |
---|
| 9841 | +miss: |
---|
| 9842 | + /* when new state is not going to be added do not increase miss count. |
---|
| 9843 | + * Otherwise several loop iterations will remove the state |
---|
| 9844 | + * recorded earlier. The goal of these heuristics is to have |
---|
| 9845 | + * states from some iterations of the loop (some in the beginning |
---|
| 9846 | + * and some at the end) to help pruning. |
---|
| 9847 | + */ |
---|
| 9848 | + if (add_new_state) |
---|
| 9849 | + sl->miss_cnt++; |
---|
| 9850 | + /* heuristic to determine whether this state is beneficial |
---|
| 9851 | + * to keep checking from state equivalence point of view. |
---|
| 9852 | + * Higher numbers increase max_states_per_insn and verification time, |
---|
| 9853 | + * but do not meaningfully decrease insn_processed. |
---|
| 9854 | + */ |
---|
| 9855 | + if (sl->miss_cnt > sl->hit_cnt * 3 + 3) { |
---|
| 9856 | + /* the state is unlikely to be useful. Remove it to |
---|
| 9857 | + * speed up verification |
---|
| 9858 | + */ |
---|
| 9859 | + *pprev = sl->next; |
---|
| 9860 | + if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) { |
---|
| 9861 | + u32 br = sl->state.branches; |
---|
| 9862 | + |
---|
| 9863 | + WARN_ONCE(br, |
---|
| 9864 | + "BUG live_done but branches_to_explore %d\n", |
---|
| 9865 | + br); |
---|
| 9866 | + free_verifier_state(&sl->state, false); |
---|
| 9867 | + kfree(sl); |
---|
| 9868 | + env->peak_states--; |
---|
| 9869 | + } else { |
---|
| 9870 | + /* cannot free this state, since parentage chain may |
---|
| 9871 | + * walk it later. Add it for free_list instead to |
---|
| 9872 | + * be freed at the end of verification |
---|
| 9873 | + */ |
---|
| 9874 | + sl->next = env->free_list; |
---|
| 9875 | + env->free_list = sl; |
---|
| 9876 | + } |
---|
| 9877 | + sl = *pprev; |
---|
| 9878 | + continue; |
---|
| 9879 | + } |
---|
| 9880 | +next: |
---|
| 9881 | + pprev = &sl->next; |
---|
| 9882 | + sl = *pprev; |
---|
5139 | 9883 | } |
---|
5140 | 9884 | |
---|
5141 | | - if (!env->allow_ptr_leaks && states_cnt > BPF_COMPLEXITY_LIMIT_STATES) |
---|
5142 | | - return 0; |
---|
| 9885 | + if (env->max_states_per_insn < states_cnt) |
---|
| 9886 | + env->max_states_per_insn = states_cnt; |
---|
5143 | 9887 | |
---|
5144 | | - /* there were no equivalent states, remember current one. |
---|
5145 | | - * technically the current state is not proven to be safe yet, |
---|
| 9888 | + if (!env->bpf_capable && states_cnt > BPF_COMPLEXITY_LIMIT_STATES) |
---|
| 9889 | + return push_jmp_history(env, cur); |
---|
| 9890 | + |
---|
| 9891 | + if (!add_new_state) |
---|
| 9892 | + return push_jmp_history(env, cur); |
---|
| 9893 | + |
---|
| 9894 | + /* There were no equivalent states, remember the current one. |
---|
| 9895 | + * Technically the current state is not proven to be safe yet, |
---|
5146 | 9896 | * but it will either reach outer most bpf_exit (which means it's safe) |
---|
5147 | | - * or it will be rejected. Since there are no loops, we won't be |
---|
| 9897 | + * or it will be rejected. When there are no loops the verifier won't be |
---|
5148 | 9898 | * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx) |
---|
5149 | | - * again on the way to bpf_exit |
---|
| 9899 | + * again on the way to bpf_exit. |
---|
| 9900 | + * When looping the sl->state.branches will be > 0 and this state |
---|
| 9901 | + * will not be considered for equivalence until branches == 0. |
---|
5150 | 9902 | */ |
---|
5151 | 9903 | new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL); |
---|
5152 | 9904 | if (!new_sl) |
---|
5153 | 9905 | return -ENOMEM; |
---|
| 9906 | + env->total_states++; |
---|
| 9907 | + env->peak_states++; |
---|
| 9908 | + env->prev_jmps_processed = env->jmps_processed; |
---|
| 9909 | + env->prev_insn_processed = env->insn_processed; |
---|
| 9910 | + |
---|
| 9911 | + /* forget precise markings we inherited, see __mark_chain_precision */ |
---|
| 9912 | + if (env->bpf_capable) |
---|
| 9913 | + mark_all_scalars_imprecise(env, cur); |
---|
5154 | 9914 | |
---|
5155 | 9915 | /* add new state to the head of linked list */ |
---|
5156 | 9916 | new = &new_sl->state; |
---|
.. | .. |
---|
5160 | 9920 | kfree(new_sl); |
---|
5161 | 9921 | return err; |
---|
5162 | 9922 | } |
---|
5163 | | - new_sl->next = env->explored_states[insn_idx]; |
---|
5164 | | - env->explored_states[insn_idx] = new_sl; |
---|
5165 | | - /* connect new state to parentage chain */ |
---|
5166 | | - for (i = 0; i < BPF_REG_FP; i++) |
---|
5167 | | - cur_regs(env)[i].parent = &new->frame[new->curframe]->regs[i]; |
---|
| 9923 | + new->insn_idx = insn_idx; |
---|
| 9924 | + WARN_ONCE(new->branches != 1, |
---|
| 9925 | + "BUG is_state_visited:branches_to_explore=%d insn %d\n", new->branches, insn_idx); |
---|
| 9926 | + |
---|
| 9927 | + cur->parent = new; |
---|
| 9928 | + cur->first_insn_idx = insn_idx; |
---|
| 9929 | + clear_jmp_history(cur); |
---|
| 9930 | + new_sl->next = *explored_state(env, insn_idx); |
---|
| 9931 | + *explored_state(env, insn_idx) = new_sl; |
---|
| 9932 | + /* connect new state to parentage chain. Current frame needs all |
---|
| 9933 | + * registers connected. Only r6 - r9 of the callers are alive (pushed |
---|
| 9934 | + * to the stack implicitly by JITs) so in callers' frames connect just |
---|
| 9935 | + * r6 - r9 as an optimization. Callers will have r1 - r5 connected to |
---|
| 9936 | + * the state of the call instruction (with WRITTEN set), and r0 comes |
---|
| 9937 | + * from callee with its full parentage chain, anyway. |
---|
| 9938 | + */ |
---|
5168 | 9939 | /* clear write marks in current state: the writes we did are not writes |
---|
5169 | 9940 | * our child did, so they don't screen off its reads from us. |
---|
5170 | 9941 | * (There are no read marks in current state, because reads always mark |
---|
5171 | 9942 | * their parent and current state never has children yet. Only |
---|
5172 | 9943 | * explored_states can get read marks.) |
---|
5173 | 9944 | */ |
---|
5174 | | - for (i = 0; i < BPF_REG_FP; i++) |
---|
5175 | | - cur->frame[cur->curframe]->regs[i].live = REG_LIVE_NONE; |
---|
| 9945 | + for (j = 0; j <= cur->curframe; j++) { |
---|
| 9946 | + for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) |
---|
| 9947 | + cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i]; |
---|
| 9948 | + for (i = 0; i < BPF_REG_FP; i++) |
---|
| 9949 | + cur->frame[j]->regs[i].live = REG_LIVE_NONE; |
---|
| 9950 | + } |
---|
5176 | 9951 | |
---|
5177 | 9952 | /* all stack frames are accessible from callee, clear them all */ |
---|
5178 | 9953 | for (j = 0; j <= cur->curframe; j++) { |
---|
.. | .. |
---|
5188 | 9963 | return 0; |
---|
5189 | 9964 | } |
---|
5190 | 9965 | |
---|
| 9966 | +/* Return true if it's OK to have the same insn return a different type. */ |
---|
| 9967 | +static bool reg_type_mismatch_ok(enum bpf_reg_type type) |
---|
| 9968 | +{ |
---|
| 9969 | + switch (type) { |
---|
| 9970 | + case PTR_TO_CTX: |
---|
| 9971 | + case PTR_TO_SOCKET: |
---|
| 9972 | + case PTR_TO_SOCKET_OR_NULL: |
---|
| 9973 | + case PTR_TO_SOCK_COMMON: |
---|
| 9974 | + case PTR_TO_SOCK_COMMON_OR_NULL: |
---|
| 9975 | + case PTR_TO_TCP_SOCK: |
---|
| 9976 | + case PTR_TO_TCP_SOCK_OR_NULL: |
---|
| 9977 | + case PTR_TO_XDP_SOCK: |
---|
| 9978 | + case PTR_TO_BTF_ID: |
---|
| 9979 | + case PTR_TO_BTF_ID_OR_NULL: |
---|
| 9980 | + return false; |
---|
| 9981 | + default: |
---|
| 9982 | + return true; |
---|
| 9983 | + } |
---|
| 9984 | +} |
---|
| 9985 | + |
---|
| 9986 | +/* If an instruction was previously used with particular pointer types, then we |
---|
| 9987 | + * need to be careful to avoid cases such as the below, where it may be ok |
---|
| 9988 | + * for one branch accessing the pointer, but not ok for the other branch: |
---|
| 9989 | + * |
---|
| 9990 | + * R1 = sock_ptr |
---|
| 9991 | + * goto X; |
---|
| 9992 | + * ... |
---|
| 9993 | + * R1 = some_other_valid_ptr; |
---|
| 9994 | + * goto X; |
---|
| 9995 | + * ... |
---|
| 9996 | + * R2 = *(u32 *)(R1 + 0); |
---|
| 9997 | + */ |
---|
| 9998 | +static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev) |
---|
| 9999 | +{ |
---|
| 10000 | + return src != prev && (!reg_type_mismatch_ok(src) || |
---|
| 10001 | + !reg_type_mismatch_ok(prev)); |
---|
| 10002 | +} |
---|
| 10003 | + |
---|
5191 | 10004 | static int do_check(struct bpf_verifier_env *env) |
---|
5192 | 10005 | { |
---|
5193 | | - struct bpf_verifier_state *state; |
---|
| 10006 | + bool pop_log = !(env->log.level & BPF_LOG_LEVEL2); |
---|
| 10007 | + struct bpf_verifier_state *state = env->cur_state; |
---|
5194 | 10008 | struct bpf_insn *insns = env->prog->insnsi; |
---|
5195 | 10009 | struct bpf_reg_state *regs; |
---|
5196 | | - int insn_cnt = env->prog->len, i; |
---|
5197 | | - int insn_processed = 0; |
---|
| 10010 | + int insn_cnt = env->prog->len; |
---|
5198 | 10011 | bool do_print_state = false; |
---|
5199 | | - |
---|
5200 | | - state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL); |
---|
5201 | | - if (!state) |
---|
5202 | | - return -ENOMEM; |
---|
5203 | | - state->curframe = 0; |
---|
5204 | | - state->speculative = false; |
---|
5205 | | - state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL); |
---|
5206 | | - if (!state->frame[0]) { |
---|
5207 | | - kfree(state); |
---|
5208 | | - return -ENOMEM; |
---|
5209 | | - } |
---|
5210 | | - env->cur_state = state; |
---|
5211 | | - init_func_state(env, state->frame[0], |
---|
5212 | | - BPF_MAIN_FUNC /* callsite */, |
---|
5213 | | - 0 /* frameno */, |
---|
5214 | | - 0 /* subprogno, zero == main subprog */); |
---|
| 10012 | + int prev_insn_idx = -1; |
---|
5215 | 10013 | |
---|
5216 | 10014 | for (;;) { |
---|
5217 | 10015 | struct bpf_insn *insn; |
---|
5218 | 10016 | u8 class; |
---|
5219 | 10017 | int err; |
---|
5220 | 10018 | |
---|
| 10019 | + env->prev_insn_idx = prev_insn_idx; |
---|
5221 | 10020 | if (env->insn_idx >= insn_cnt) { |
---|
5222 | 10021 | verbose(env, "invalid insn idx %d insn_cnt %d\n", |
---|
5223 | 10022 | env->insn_idx, insn_cnt); |
---|
.. | .. |
---|
5227 | 10026 | insn = &insns[env->insn_idx]; |
---|
5228 | 10027 | class = BPF_CLASS(insn->code); |
---|
5229 | 10028 | |
---|
5230 | | - if (++insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) { |
---|
| 10029 | + if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) { |
---|
5231 | 10030 | verbose(env, |
---|
5232 | 10031 | "BPF program is too large. Processed %d insn\n", |
---|
5233 | | - insn_processed); |
---|
| 10032 | + env->insn_processed); |
---|
5234 | 10033 | return -E2BIG; |
---|
5235 | 10034 | } |
---|
5236 | 10035 | |
---|
.. | .. |
---|
5239 | 10038 | return err; |
---|
5240 | 10039 | if (err == 1) { |
---|
5241 | 10040 | /* found equivalent state, can prune the search */ |
---|
5242 | | - if (env->log.level) { |
---|
| 10041 | + if (env->log.level & BPF_LOG_LEVEL) { |
---|
5243 | 10042 | if (do_print_state) |
---|
5244 | 10043 | verbose(env, "\nfrom %d to %d%s: safe\n", |
---|
5245 | 10044 | env->prev_insn_idx, env->insn_idx, |
---|
.. | .. |
---|
5257 | 10056 | if (need_resched()) |
---|
5258 | 10057 | cond_resched(); |
---|
5259 | 10058 | |
---|
5260 | | - if (env->log.level > 1 || (env->log.level && do_print_state)) { |
---|
5261 | | - if (env->log.level > 1) |
---|
| 10059 | + if (env->log.level & BPF_LOG_LEVEL2 || |
---|
| 10060 | + (env->log.level & BPF_LOG_LEVEL && do_print_state)) { |
---|
| 10061 | + if (env->log.level & BPF_LOG_LEVEL2) |
---|
5262 | 10062 | verbose(env, "%d:", env->insn_idx); |
---|
5263 | 10063 | else |
---|
5264 | 10064 | verbose(env, "\nfrom %d to %d%s:", |
---|
.. | .. |
---|
5269 | 10069 | do_print_state = false; |
---|
5270 | 10070 | } |
---|
5271 | 10071 | |
---|
5272 | | - if (env->log.level) { |
---|
| 10072 | + if (env->log.level & BPF_LOG_LEVEL) { |
---|
5273 | 10073 | const struct bpf_insn_cbs cbs = { |
---|
5274 | 10074 | .cb_print = verbose, |
---|
5275 | 10075 | .private_data = env, |
---|
5276 | 10076 | }; |
---|
5277 | 10077 | |
---|
| 10078 | + verbose_linfo(env, env->insn_idx, "; "); |
---|
5278 | 10079 | verbose(env, "%d: ", env->insn_idx); |
---|
5279 | 10080 | print_bpf_insn(&cbs, insn, env->allow_ptr_leaks); |
---|
5280 | 10081 | } |
---|
.. | .. |
---|
5288 | 10089 | |
---|
5289 | 10090 | regs = cur_regs(env); |
---|
5290 | 10091 | sanitize_mark_insn_seen(env); |
---|
| 10092 | + prev_insn_idx = env->insn_idx; |
---|
5291 | 10093 | |
---|
5292 | 10094 | if (class == BPF_ALU || class == BPF_ALU64) { |
---|
5293 | 10095 | err = check_alu_op(env, insn); |
---|
.. | .. |
---|
5328 | 10130 | */ |
---|
5329 | 10131 | *prev_src_type = src_reg_type; |
---|
5330 | 10132 | |
---|
5331 | | - } else if (src_reg_type != *prev_src_type && |
---|
5332 | | - (src_reg_type == PTR_TO_CTX || |
---|
5333 | | - *prev_src_type == PTR_TO_CTX)) { |
---|
| 10133 | + } else if (reg_type_mismatch(src_reg_type, *prev_src_type)) { |
---|
5334 | 10134 | /* ABuser program is trying to use the same insn |
---|
5335 | 10135 | * dst_reg = *(u32*) (src_reg + off) |
---|
5336 | 10136 | * with different pointer types: |
---|
.. | .. |
---|
5375 | 10175 | |
---|
5376 | 10176 | if (*prev_dst_type == NOT_INIT) { |
---|
5377 | 10177 | *prev_dst_type = dst_reg_type; |
---|
5378 | | - } else if (dst_reg_type != *prev_dst_type && |
---|
5379 | | - (dst_reg_type == PTR_TO_CTX || |
---|
5380 | | - *prev_dst_type == PTR_TO_CTX)) { |
---|
| 10178 | + } else if (reg_type_mismatch(dst_reg_type, *prev_dst_type)) { |
---|
5381 | 10179 | verbose(env, "same insn cannot be used with different pointers\n"); |
---|
5382 | 10180 | return -EINVAL; |
---|
5383 | 10181 | } |
---|
.. | .. |
---|
5394 | 10192 | return err; |
---|
5395 | 10193 | |
---|
5396 | 10194 | if (is_ctx_reg(env, insn->dst_reg)) { |
---|
5397 | | - verbose(env, "BPF_ST stores into R%d context is not allowed\n", |
---|
5398 | | - insn->dst_reg); |
---|
| 10195 | + verbose(env, "BPF_ST stores into R%d %s is not allowed\n", |
---|
| 10196 | + insn->dst_reg, |
---|
| 10197 | + reg_type_str[reg_state(env, insn->dst_reg)->type]); |
---|
5399 | 10198 | return -EACCES; |
---|
5400 | 10199 | } |
---|
5401 | 10200 | |
---|
.. | .. |
---|
5406 | 10205 | if (err) |
---|
5407 | 10206 | return err; |
---|
5408 | 10207 | |
---|
5409 | | - } else if (class == BPF_JMP) { |
---|
| 10208 | + } else if (class == BPF_JMP || class == BPF_JMP32) { |
---|
5410 | 10209 | u8 opcode = BPF_OP(insn->code); |
---|
5411 | 10210 | |
---|
| 10211 | + env->jmps_processed++; |
---|
5412 | 10212 | if (opcode == BPF_CALL) { |
---|
5413 | 10213 | if (BPF_SRC(insn->code) != BPF_K || |
---|
5414 | 10214 | insn->off != 0 || |
---|
5415 | 10215 | (insn->src_reg != BPF_REG_0 && |
---|
5416 | 10216 | insn->src_reg != BPF_PSEUDO_CALL) || |
---|
5417 | | - insn->dst_reg != BPF_REG_0) { |
---|
| 10217 | + insn->dst_reg != BPF_REG_0 || |
---|
| 10218 | + class == BPF_JMP32) { |
---|
5418 | 10219 | verbose(env, "BPF_CALL uses reserved fields\n"); |
---|
5419 | 10220 | return -EINVAL; |
---|
5420 | 10221 | } |
---|
5421 | 10222 | |
---|
| 10223 | + if (env->cur_state->active_spin_lock && |
---|
| 10224 | + (insn->src_reg == BPF_PSEUDO_CALL || |
---|
| 10225 | + insn->imm != BPF_FUNC_spin_unlock)) { |
---|
| 10226 | + verbose(env, "function calls are not allowed while holding a lock\n"); |
---|
| 10227 | + return -EINVAL; |
---|
| 10228 | + } |
---|
5422 | 10229 | if (insn->src_reg == BPF_PSEUDO_CALL) |
---|
5423 | 10230 | err = check_func_call(env, insn, &env->insn_idx); |
---|
5424 | 10231 | else |
---|
.. | .. |
---|
5430 | 10237 | if (BPF_SRC(insn->code) != BPF_K || |
---|
5431 | 10238 | insn->imm != 0 || |
---|
5432 | 10239 | insn->src_reg != BPF_REG_0 || |
---|
5433 | | - insn->dst_reg != BPF_REG_0) { |
---|
| 10240 | + insn->dst_reg != BPF_REG_0 || |
---|
| 10241 | + class == BPF_JMP32) { |
---|
5434 | 10242 | verbose(env, "BPF_JA uses reserved fields\n"); |
---|
5435 | 10243 | return -EINVAL; |
---|
5436 | 10244 | } |
---|
.. | .. |
---|
5442 | 10250 | if (BPF_SRC(insn->code) != BPF_K || |
---|
5443 | 10251 | insn->imm != 0 || |
---|
5444 | 10252 | insn->src_reg != BPF_REG_0 || |
---|
5445 | | - insn->dst_reg != BPF_REG_0) { |
---|
| 10253 | + insn->dst_reg != BPF_REG_0 || |
---|
| 10254 | + class == BPF_JMP32) { |
---|
5446 | 10255 | verbose(env, "BPF_EXIT uses reserved fields\n"); |
---|
| 10256 | + return -EINVAL; |
---|
| 10257 | + } |
---|
| 10258 | + |
---|
| 10259 | + if (env->cur_state->active_spin_lock) { |
---|
| 10260 | + verbose(env, "bpf_spin_unlock is missing\n"); |
---|
5447 | 10261 | return -EINVAL; |
---|
5448 | 10262 | } |
---|
5449 | 10263 | |
---|
5450 | 10264 | if (state->curframe) { |
---|
5451 | 10265 | /* exit from nested function */ |
---|
5452 | | - env->prev_insn_idx = env->insn_idx; |
---|
5453 | 10266 | err = prepare_func_exit(env, &env->insn_idx); |
---|
5454 | 10267 | if (err) |
---|
5455 | 10268 | return err; |
---|
.. | .. |
---|
5457 | 10270 | continue; |
---|
5458 | 10271 | } |
---|
5459 | 10272 | |
---|
5460 | | - /* eBPF calling convetion is such that R0 is used |
---|
5461 | | - * to return the value from eBPF program. |
---|
5462 | | - * Make sure that it's readable at this time |
---|
5463 | | - * of bpf_exit, which means that program wrote |
---|
5464 | | - * something into it earlier |
---|
5465 | | - */ |
---|
5466 | | - err = check_reg_arg(env, BPF_REG_0, SRC_OP); |
---|
| 10273 | + err = check_reference_leak(env); |
---|
5467 | 10274 | if (err) |
---|
5468 | 10275 | return err; |
---|
5469 | | - |
---|
5470 | | - if (is_pointer_value(env, BPF_REG_0)) { |
---|
5471 | | - verbose(env, "R0 leaks addr as return value\n"); |
---|
5472 | | - return -EACCES; |
---|
5473 | | - } |
---|
5474 | 10276 | |
---|
5475 | 10277 | err = check_return_code(env); |
---|
5476 | 10278 | if (err) |
---|
5477 | 10279 | return err; |
---|
5478 | 10280 | process_bpf_exit: |
---|
5479 | | - err = pop_stack(env, &env->prev_insn_idx, |
---|
5480 | | - &env->insn_idx); |
---|
| 10281 | + update_branch_counts(env, env->cur_state); |
---|
| 10282 | + err = pop_stack(env, &prev_insn_idx, |
---|
| 10283 | + &env->insn_idx, pop_log); |
---|
5481 | 10284 | if (err < 0) { |
---|
5482 | 10285 | if (err != -ENOENT) |
---|
5483 | 10286 | return err; |
---|
.. | .. |
---|
5518 | 10321 | env->insn_idx++; |
---|
5519 | 10322 | } |
---|
5520 | 10323 | |
---|
5521 | | - verbose(env, "processed %d insns (limit %d), stack depth ", |
---|
5522 | | - insn_processed, BPF_COMPLEXITY_LIMIT_INSNS); |
---|
5523 | | - for (i = 0; i < env->subprog_cnt; i++) { |
---|
5524 | | - u32 depth = env->subprog_info[i].stack_depth; |
---|
| 10324 | + return 0; |
---|
| 10325 | +} |
---|
5525 | 10326 | |
---|
5526 | | - verbose(env, "%d", depth); |
---|
5527 | | - if (i + 1 < env->subprog_cnt) |
---|
5528 | | - verbose(env, "+"); |
---|
| 10327 | +/* replace pseudo btf_id with kernel symbol address */ |
---|
| 10328 | +static int check_pseudo_btf_id(struct bpf_verifier_env *env, |
---|
| 10329 | + struct bpf_insn *insn, |
---|
| 10330 | + struct bpf_insn_aux_data *aux) |
---|
| 10331 | +{ |
---|
| 10332 | + const struct btf_var_secinfo *vsi; |
---|
| 10333 | + const struct btf_type *datasec; |
---|
| 10334 | + const struct btf_type *t; |
---|
| 10335 | + const char *sym_name; |
---|
| 10336 | + bool percpu = false; |
---|
| 10337 | + u32 type, id = insn->imm; |
---|
| 10338 | + s32 datasec_id; |
---|
| 10339 | + u64 addr; |
---|
| 10340 | + int i; |
---|
| 10341 | + |
---|
| 10342 | + if (!btf_vmlinux) { |
---|
| 10343 | + verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n"); |
---|
| 10344 | + return -EINVAL; |
---|
5529 | 10345 | } |
---|
5530 | | - verbose(env, "\n"); |
---|
5531 | | - env->prog->aux->stack_depth = env->subprog_info[0].stack_depth; |
---|
| 10346 | + |
---|
| 10347 | + if (insn[1].imm != 0) { |
---|
| 10348 | + verbose(env, "reserved field (insn[1].imm) is used in pseudo_btf_id ldimm64 insn.\n"); |
---|
| 10349 | + return -EINVAL; |
---|
| 10350 | + } |
---|
| 10351 | + |
---|
| 10352 | + t = btf_type_by_id(btf_vmlinux, id); |
---|
| 10353 | + if (!t) { |
---|
| 10354 | + verbose(env, "ldimm64 insn specifies invalid btf_id %d.\n", id); |
---|
| 10355 | + return -ENOENT; |
---|
| 10356 | + } |
---|
| 10357 | + |
---|
| 10358 | + if (!btf_type_is_var(t)) { |
---|
| 10359 | + verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR.\n", |
---|
| 10360 | + id); |
---|
| 10361 | + return -EINVAL; |
---|
| 10362 | + } |
---|
| 10363 | + |
---|
| 10364 | + sym_name = btf_name_by_offset(btf_vmlinux, t->name_off); |
---|
| 10365 | + addr = kallsyms_lookup_name(sym_name); |
---|
| 10366 | + if (!addr) { |
---|
| 10367 | + verbose(env, "ldimm64 failed to find the address for kernel symbol '%s'.\n", |
---|
| 10368 | + sym_name); |
---|
| 10369 | + return -ENOENT; |
---|
| 10370 | + } |
---|
| 10371 | + |
---|
| 10372 | + datasec_id = btf_find_by_name_kind(btf_vmlinux, ".data..percpu", |
---|
| 10373 | + BTF_KIND_DATASEC); |
---|
| 10374 | + if (datasec_id > 0) { |
---|
| 10375 | + datasec = btf_type_by_id(btf_vmlinux, datasec_id); |
---|
| 10376 | + for_each_vsi(i, datasec, vsi) { |
---|
| 10377 | + if (vsi->type == id) { |
---|
| 10378 | + percpu = true; |
---|
| 10379 | + break; |
---|
| 10380 | + } |
---|
| 10381 | + } |
---|
| 10382 | + } |
---|
| 10383 | + |
---|
| 10384 | + insn[0].imm = (u32)addr; |
---|
| 10385 | + insn[1].imm = addr >> 32; |
---|
| 10386 | + |
---|
| 10387 | + type = t->type; |
---|
| 10388 | + t = btf_type_skip_modifiers(btf_vmlinux, type, NULL); |
---|
| 10389 | + if (percpu) { |
---|
| 10390 | + aux->btf_var.reg_type = PTR_TO_PERCPU_BTF_ID; |
---|
| 10391 | + aux->btf_var.btf_id = type; |
---|
| 10392 | + } else if (!btf_type_is_struct(t)) { |
---|
| 10393 | + const struct btf_type *ret; |
---|
| 10394 | + const char *tname; |
---|
| 10395 | + u32 tsize; |
---|
| 10396 | + |
---|
| 10397 | + /* resolve the type size of ksym. */ |
---|
| 10398 | + ret = btf_resolve_size(btf_vmlinux, t, &tsize); |
---|
| 10399 | + if (IS_ERR(ret)) { |
---|
| 10400 | + tname = btf_name_by_offset(btf_vmlinux, t->name_off); |
---|
| 10401 | + verbose(env, "ldimm64 unable to resolve the size of type '%s': %ld\n", |
---|
| 10402 | + tname, PTR_ERR(ret)); |
---|
| 10403 | + return -EINVAL; |
---|
| 10404 | + } |
---|
| 10405 | + aux->btf_var.reg_type = PTR_TO_MEM; |
---|
| 10406 | + aux->btf_var.mem_size = tsize; |
---|
| 10407 | + } else { |
---|
| 10408 | + aux->btf_var.reg_type = PTR_TO_BTF_ID; |
---|
| 10409 | + aux->btf_var.btf_id = type; |
---|
| 10410 | + } |
---|
5532 | 10411 | return 0; |
---|
5533 | 10412 | } |
---|
5534 | 10413 | |
---|
.. | .. |
---|
5540 | 10419 | !(map->map_flags & BPF_F_NO_PREALLOC); |
---|
5541 | 10420 | } |
---|
5542 | 10421 | |
---|
| 10422 | +static bool is_tracing_prog_type(enum bpf_prog_type type) |
---|
| 10423 | +{ |
---|
| 10424 | + switch (type) { |
---|
| 10425 | + case BPF_PROG_TYPE_KPROBE: |
---|
| 10426 | + case BPF_PROG_TYPE_TRACEPOINT: |
---|
| 10427 | + case BPF_PROG_TYPE_PERF_EVENT: |
---|
| 10428 | + case BPF_PROG_TYPE_RAW_TRACEPOINT: |
---|
| 10429 | + return true; |
---|
| 10430 | + default: |
---|
| 10431 | + return false; |
---|
| 10432 | + } |
---|
| 10433 | +} |
---|
| 10434 | + |
---|
| 10435 | +static bool is_preallocated_map(struct bpf_map *map) |
---|
| 10436 | +{ |
---|
| 10437 | + if (!check_map_prealloc(map)) |
---|
| 10438 | + return false; |
---|
| 10439 | + if (map->inner_map_meta && !check_map_prealloc(map->inner_map_meta)) |
---|
| 10440 | + return false; |
---|
| 10441 | + return true; |
---|
| 10442 | +} |
---|
| 10443 | + |
---|
5543 | 10444 | static int check_map_prog_compatibility(struct bpf_verifier_env *env, |
---|
5544 | 10445 | struct bpf_map *map, |
---|
5545 | 10446 | struct bpf_prog *prog) |
---|
5546 | 10447 | |
---|
5547 | 10448 | { |
---|
5548 | | - /* Make sure that BPF_PROG_TYPE_PERF_EVENT programs only use |
---|
5549 | | - * preallocated hash maps, since doing memory allocation |
---|
5550 | | - * in overflow_handler can crash depending on where nmi got |
---|
5551 | | - * triggered. |
---|
| 10449 | + enum bpf_prog_type prog_type = resolve_prog_type(prog); |
---|
| 10450 | + /* |
---|
| 10451 | + * Validate that trace type programs use preallocated hash maps. |
---|
| 10452 | + * |
---|
| 10453 | + * For programs attached to PERF events this is mandatory as the |
---|
| 10454 | + * perf NMI can hit any arbitrary code sequence. |
---|
| 10455 | + * |
---|
| 10456 | + * All other trace types using preallocated hash maps are unsafe as |
---|
| 10457 | + * well because tracepoint or kprobes can be inside locked regions |
---|
| 10458 | + * of the memory allocator or at a place where a recursion into the |
---|
| 10459 | + * memory allocator would see inconsistent state. |
---|
| 10460 | + * |
---|
| 10461 | + * On RT enabled kernels run-time allocation of all trace type |
---|
| 10462 | + * programs is strictly prohibited due to lock type constraints. On |
---|
| 10463 | + * !RT kernels it is allowed for backwards compatibility reasons for |
---|
| 10464 | + * now, but warnings are emitted so developers are made aware of |
---|
| 10465 | + * the unsafety and can fix their programs before this is enforced. |
---|
5552 | 10466 | */ |
---|
5553 | | - if (prog->type == BPF_PROG_TYPE_PERF_EVENT) { |
---|
5554 | | - if (!check_map_prealloc(map)) { |
---|
| 10467 | + if (is_tracing_prog_type(prog_type) && !is_preallocated_map(map)) { |
---|
| 10468 | + if (prog_type == BPF_PROG_TYPE_PERF_EVENT) { |
---|
5555 | 10469 | verbose(env, "perf_event programs can only use preallocated hash map\n"); |
---|
5556 | 10470 | return -EINVAL; |
---|
5557 | 10471 | } |
---|
5558 | | - if (map->inner_map_meta && |
---|
5559 | | - !check_map_prealloc(map->inner_map_meta)) { |
---|
5560 | | - verbose(env, "perf_event programs can only use preallocated inner hash map\n"); |
---|
| 10472 | + if (IS_ENABLED(CONFIG_PREEMPT_RT)) { |
---|
| 10473 | + verbose(env, "trace type programs can only use preallocated hash map\n"); |
---|
5561 | 10474 | return -EINVAL; |
---|
5562 | 10475 | } |
---|
| 10476 | + WARN_ONCE(1, "trace type BPF program uses run-time allocation\n"); |
---|
| 10477 | + verbose(env, "trace type programs with run-time allocated hash maps are unsafe. Switch to preallocated hash maps.\n"); |
---|
| 10478 | + } |
---|
| 10479 | + |
---|
| 10480 | + if ((is_tracing_prog_type(prog_type) || |
---|
| 10481 | + prog_type == BPF_PROG_TYPE_SOCKET_FILTER) && |
---|
| 10482 | + map_value_has_spin_lock(map)) { |
---|
| 10483 | + verbose(env, "tracing progs cannot use bpf_spin_lock yet\n"); |
---|
| 10484 | + return -EINVAL; |
---|
5563 | 10485 | } |
---|
5564 | 10486 | |
---|
5565 | 10487 | if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) && |
---|
.. | .. |
---|
5568 | 10490 | return -EINVAL; |
---|
5569 | 10491 | } |
---|
5570 | 10492 | |
---|
| 10493 | + if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { |
---|
| 10494 | + verbose(env, "bpf_struct_ops map cannot be used in prog\n"); |
---|
| 10495 | + return -EINVAL; |
---|
| 10496 | + } |
---|
| 10497 | + |
---|
| 10498 | + if (prog->aux->sleepable) |
---|
| 10499 | + switch (map->map_type) { |
---|
| 10500 | + case BPF_MAP_TYPE_HASH: |
---|
| 10501 | + case BPF_MAP_TYPE_LRU_HASH: |
---|
| 10502 | + case BPF_MAP_TYPE_ARRAY: |
---|
| 10503 | + if (!is_preallocated_map(map)) { |
---|
| 10504 | + verbose(env, |
---|
| 10505 | + "Sleepable programs can only use preallocated hash maps\n"); |
---|
| 10506 | + return -EINVAL; |
---|
| 10507 | + } |
---|
| 10508 | + break; |
---|
| 10509 | + default: |
---|
| 10510 | + verbose(env, |
---|
| 10511 | + "Sleepable programs can only use array and hash maps\n"); |
---|
| 10512 | + return -EINVAL; |
---|
| 10513 | + } |
---|
| 10514 | + |
---|
5571 | 10515 | return 0; |
---|
5572 | 10516 | } |
---|
5573 | 10517 | |
---|
5574 | | -/* look for pseudo eBPF instructions that access map FDs and |
---|
5575 | | - * replace them with actual map pointers |
---|
| 10518 | +static bool bpf_map_is_cgroup_storage(struct bpf_map *map) |
---|
| 10519 | +{ |
---|
| 10520 | + return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE || |
---|
| 10521 | + map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE); |
---|
| 10522 | +} |
---|
| 10523 | + |
---|
| 10524 | +/* find and rewrite pseudo imm in ld_imm64 instructions: |
---|
| 10525 | + * |
---|
| 10526 | + * 1. if it accesses map FD, replace it with actual map pointer. |
---|
| 10527 | + * 2. if it accesses btf_id of a VAR, replace it with pointer to the var. |
---|
| 10528 | + * |
---|
| 10529 | + * NOTE: btf_vmlinux is required for converting pseudo btf_id. |
---|
5576 | 10530 | */ |
---|
5577 | | -static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env) |
---|
| 10531 | +static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env) |
---|
5578 | 10532 | { |
---|
5579 | 10533 | struct bpf_insn *insn = env->prog->insnsi; |
---|
5580 | 10534 | int insn_cnt = env->prog->len; |
---|
.. | .. |
---|
5599 | 10553 | } |
---|
5600 | 10554 | |
---|
5601 | 10555 | if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) { |
---|
| 10556 | + struct bpf_insn_aux_data *aux; |
---|
5602 | 10557 | struct bpf_map *map; |
---|
5603 | 10558 | struct fd f; |
---|
| 10559 | + u64 addr; |
---|
5604 | 10560 | |
---|
5605 | 10561 | if (i == insn_cnt - 1 || insn[1].code != 0 || |
---|
5606 | 10562 | insn[1].dst_reg != 0 || insn[1].src_reg != 0 || |
---|
.. | .. |
---|
5609 | 10565 | return -EINVAL; |
---|
5610 | 10566 | } |
---|
5611 | 10567 | |
---|
5612 | | - if (insn->src_reg == 0) |
---|
| 10568 | + if (insn[0].src_reg == 0) |
---|
5613 | 10569 | /* valid generic load 64-bit imm */ |
---|
5614 | 10570 | goto next_insn; |
---|
5615 | 10571 | |
---|
5616 | | - if (insn->src_reg != BPF_PSEUDO_MAP_FD) { |
---|
| 10572 | + if (insn[0].src_reg == BPF_PSEUDO_BTF_ID) { |
---|
| 10573 | + aux = &env->insn_aux_data[i]; |
---|
| 10574 | + err = check_pseudo_btf_id(env, insn, aux); |
---|
| 10575 | + if (err) |
---|
| 10576 | + return err; |
---|
| 10577 | + goto next_insn; |
---|
| 10578 | + } |
---|
| 10579 | + |
---|
| 10580 | + /* In final convert_pseudo_ld_imm64() step, this is |
---|
| 10581 | + * converted into regular 64-bit imm load insn. |
---|
| 10582 | + */ |
---|
| 10583 | + if ((insn[0].src_reg != BPF_PSEUDO_MAP_FD && |
---|
| 10584 | + insn[0].src_reg != BPF_PSEUDO_MAP_VALUE) || |
---|
| 10585 | + (insn[0].src_reg == BPF_PSEUDO_MAP_FD && |
---|
| 10586 | + insn[1].imm != 0)) { |
---|
5617 | 10587 | verbose(env, |
---|
5618 | 10588 | "unrecognized bpf_ld_imm64 insn\n"); |
---|
5619 | 10589 | return -EINVAL; |
---|
5620 | 10590 | } |
---|
5621 | 10591 | |
---|
5622 | | - f = fdget(insn->imm); |
---|
| 10592 | + f = fdget(insn[0].imm); |
---|
5623 | 10593 | map = __bpf_map_get(f); |
---|
5624 | 10594 | if (IS_ERR(map)) { |
---|
5625 | 10595 | verbose(env, "fd %d is not pointing to valid bpf_map\n", |
---|
5626 | | - insn->imm); |
---|
| 10596 | + insn[0].imm); |
---|
5627 | 10597 | return PTR_ERR(map); |
---|
5628 | 10598 | } |
---|
5629 | 10599 | |
---|
.. | .. |
---|
5633 | 10603 | return err; |
---|
5634 | 10604 | } |
---|
5635 | 10605 | |
---|
5636 | | - /* store map pointer inside BPF_LD_IMM64 instruction */ |
---|
5637 | | - insn[0].imm = (u32) (unsigned long) map; |
---|
5638 | | - insn[1].imm = ((u64) (unsigned long) map) >> 32; |
---|
| 10606 | + aux = &env->insn_aux_data[i]; |
---|
| 10607 | + if (insn->src_reg == BPF_PSEUDO_MAP_FD) { |
---|
| 10608 | + addr = (unsigned long)map; |
---|
| 10609 | + } else { |
---|
| 10610 | + u32 off = insn[1].imm; |
---|
| 10611 | + |
---|
| 10612 | + if (off >= BPF_MAX_VAR_OFF) { |
---|
| 10613 | + verbose(env, "direct value offset of %u is not allowed\n", off); |
---|
| 10614 | + fdput(f); |
---|
| 10615 | + return -EINVAL; |
---|
| 10616 | + } |
---|
| 10617 | + |
---|
| 10618 | + if (!map->ops->map_direct_value_addr) { |
---|
| 10619 | + verbose(env, "no direct value access support for this map type\n"); |
---|
| 10620 | + fdput(f); |
---|
| 10621 | + return -EINVAL; |
---|
| 10622 | + } |
---|
| 10623 | + |
---|
| 10624 | + err = map->ops->map_direct_value_addr(map, &addr, off); |
---|
| 10625 | + if (err) { |
---|
| 10626 | + verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n", |
---|
| 10627 | + map->value_size, off); |
---|
| 10628 | + fdput(f); |
---|
| 10629 | + return err; |
---|
| 10630 | + } |
---|
| 10631 | + |
---|
| 10632 | + aux->map_off = off; |
---|
| 10633 | + addr += off; |
---|
| 10634 | + } |
---|
| 10635 | + |
---|
| 10636 | + insn[0].imm = (u32)addr; |
---|
| 10637 | + insn[1].imm = addr >> 32; |
---|
5639 | 10638 | |
---|
5640 | 10639 | /* check whether we recorded this map already */ |
---|
5641 | | - for (j = 0; j < env->used_map_cnt; j++) |
---|
| 10640 | + for (j = 0; j < env->used_map_cnt; j++) { |
---|
5642 | 10641 | if (env->used_maps[j] == map) { |
---|
| 10642 | + aux->map_index = j; |
---|
5643 | 10643 | fdput(f); |
---|
5644 | 10644 | goto next_insn; |
---|
5645 | 10645 | } |
---|
| 10646 | + } |
---|
5646 | 10647 | |
---|
5647 | 10648 | if (env->used_map_cnt >= MAX_USED_MAPS) { |
---|
5648 | 10649 | fdput(f); |
---|
.. | .. |
---|
5654 | 10655 | * will be used by the valid program until it's unloaded |
---|
5655 | 10656 | * and all maps are released in free_used_maps() |
---|
5656 | 10657 | */ |
---|
5657 | | - map = bpf_map_inc(map, false); |
---|
5658 | | - if (IS_ERR(map)) { |
---|
5659 | | - fdput(f); |
---|
5660 | | - return PTR_ERR(map); |
---|
5661 | | - } |
---|
| 10658 | + bpf_map_inc(map); |
---|
| 10659 | + |
---|
| 10660 | + aux->map_index = env->used_map_cnt; |
---|
5662 | 10661 | env->used_maps[env->used_map_cnt++] = map; |
---|
5663 | 10662 | |
---|
5664 | | - if (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE && |
---|
5665 | | - bpf_cgroup_storage_assign(env->prog, map)) { |
---|
5666 | | - verbose(env, |
---|
5667 | | - "only one cgroup storage is allowed\n"); |
---|
| 10663 | + if (bpf_map_is_cgroup_storage(map) && |
---|
| 10664 | + bpf_cgroup_storage_assign(env->prog->aux, map)) { |
---|
| 10665 | + verbose(env, "only one cgroup storage of each type is allowed\n"); |
---|
5668 | 10666 | fdput(f); |
---|
5669 | 10667 | return -EBUSY; |
---|
5670 | 10668 | } |
---|
.. | .. |
---|
5693 | 10691 | /* drop refcnt of maps used by the rejected program */ |
---|
5694 | 10692 | static void release_maps(struct bpf_verifier_env *env) |
---|
5695 | 10693 | { |
---|
5696 | | - int i; |
---|
5697 | | - |
---|
5698 | | - if (env->prog->aux->cgroup_storage) |
---|
5699 | | - bpf_cgroup_storage_release(env->prog, |
---|
5700 | | - env->prog->aux->cgroup_storage); |
---|
5701 | | - |
---|
5702 | | - for (i = 0; i < env->used_map_cnt; i++) |
---|
5703 | | - bpf_map_put(env->used_maps[i]); |
---|
| 10694 | + __bpf_free_used_maps(env->prog->aux, env->used_maps, |
---|
| 10695 | + env->used_map_cnt); |
---|
5704 | 10696 | } |
---|
5705 | 10697 | |
---|
5706 | 10698 | /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */ |
---|
.. | .. |
---|
5719 | 10711 | * insni[off, off + cnt). Adjust corresponding insn_aux_data by copying |
---|
5720 | 10712 | * [0, off) and [off, end) to new locations, so the patched range stays zero |
---|
5721 | 10713 | */ |
---|
5722 | | -static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len, |
---|
5723 | | - u32 off, u32 cnt) |
---|
| 10714 | +static void adjust_insn_aux_data(struct bpf_verifier_env *env, |
---|
| 10715 | + struct bpf_insn_aux_data *new_data, |
---|
| 10716 | + struct bpf_prog *new_prog, u32 off, u32 cnt) |
---|
5724 | 10717 | { |
---|
5725 | | - struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data; |
---|
5726 | | - bool old_seen = old_data[off].seen; |
---|
| 10718 | + struct bpf_insn_aux_data *old_data = env->insn_aux_data; |
---|
| 10719 | + struct bpf_insn *insn = new_prog->insnsi; |
---|
| 10720 | + u32 old_seen = old_data[off].seen; |
---|
| 10721 | + u32 prog_len; |
---|
5727 | 10722 | int i; |
---|
5728 | 10723 | |
---|
| 10724 | + /* aux info at OFF always needs adjustment, no matter fast path |
---|
| 10725 | + * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the |
---|
| 10726 | + * original insn at old prog. |
---|
| 10727 | + */ |
---|
| 10728 | + old_data[off].zext_dst = insn_has_def32(env, insn + off + cnt - 1); |
---|
| 10729 | + |
---|
5729 | 10730 | if (cnt == 1) |
---|
5730 | | - return 0; |
---|
5731 | | - new_data = vzalloc(array_size(prog_len, |
---|
5732 | | - sizeof(struct bpf_insn_aux_data))); |
---|
5733 | | - if (!new_data) |
---|
5734 | | - return -ENOMEM; |
---|
| 10731 | + return; |
---|
| 10732 | + prog_len = new_prog->len; |
---|
| 10733 | + |
---|
5735 | 10734 | memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off); |
---|
5736 | 10735 | memcpy(new_data + off + cnt - 1, old_data + off, |
---|
5737 | 10736 | sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1)); |
---|
5738 | 10737 | for (i = off; i < off + cnt - 1; i++) { |
---|
5739 | 10738 | /* Expand insni[off]'s seen count to the patched range. */ |
---|
5740 | 10739 | new_data[i].seen = old_seen; |
---|
| 10740 | + new_data[i].zext_dst = insn_has_def32(env, insn + i); |
---|
5741 | 10741 | } |
---|
5742 | 10742 | env->insn_aux_data = new_data; |
---|
5743 | 10743 | vfree(old_data); |
---|
5744 | | - return 0; |
---|
5745 | 10744 | } |
---|
5746 | 10745 | |
---|
5747 | 10746 | static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len) |
---|
.. | .. |
---|
5758 | 10757 | } |
---|
5759 | 10758 | } |
---|
5760 | 10759 | |
---|
| 10760 | +static void adjust_poke_descs(struct bpf_prog *prog, u32 off, u32 len) |
---|
| 10761 | +{ |
---|
| 10762 | + struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab; |
---|
| 10763 | + int i, sz = prog->aux->size_poke_tab; |
---|
| 10764 | + struct bpf_jit_poke_descriptor *desc; |
---|
| 10765 | + |
---|
| 10766 | + for (i = 0; i < sz; i++) { |
---|
| 10767 | + desc = &tab[i]; |
---|
| 10768 | + if (desc->insn_idx <= off) |
---|
| 10769 | + continue; |
---|
| 10770 | + desc->insn_idx += len - 1; |
---|
| 10771 | + } |
---|
| 10772 | +} |
---|
| 10773 | + |
---|
5761 | 10774 | static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off, |
---|
5762 | 10775 | const struct bpf_insn *patch, u32 len) |
---|
5763 | 10776 | { |
---|
5764 | 10777 | struct bpf_prog *new_prog; |
---|
| 10778 | + struct bpf_insn_aux_data *new_data = NULL; |
---|
| 10779 | + |
---|
| 10780 | + if (len > 1) { |
---|
| 10781 | + new_data = vzalloc(array_size(env->prog->len + len - 1, |
---|
| 10782 | + sizeof(struct bpf_insn_aux_data))); |
---|
| 10783 | + if (!new_data) |
---|
| 10784 | + return NULL; |
---|
| 10785 | + } |
---|
5765 | 10786 | |
---|
5766 | 10787 | new_prog = bpf_patch_insn_single(env->prog, off, patch, len); |
---|
5767 | | - if (!new_prog) |
---|
| 10788 | + if (IS_ERR(new_prog)) { |
---|
| 10789 | + if (PTR_ERR(new_prog) == -ERANGE) |
---|
| 10790 | + verbose(env, |
---|
| 10791 | + "insn %d cannot be patched due to 16-bit range\n", |
---|
| 10792 | + env->insn_aux_data[off].orig_idx); |
---|
| 10793 | + vfree(new_data); |
---|
5768 | 10794 | return NULL; |
---|
5769 | | - if (adjust_insn_aux_data(env, new_prog->len, off, len)) |
---|
5770 | | - return NULL; |
---|
| 10795 | + } |
---|
| 10796 | + adjust_insn_aux_data(env, new_data, new_prog, off, len); |
---|
5771 | 10797 | adjust_subprog_starts(env, off, len); |
---|
| 10798 | + adjust_poke_descs(new_prog, off, len); |
---|
5772 | 10799 | return new_prog; |
---|
| 10800 | +} |
---|
| 10801 | + |
---|
| 10802 | +static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env, |
---|
| 10803 | + u32 off, u32 cnt) |
---|
| 10804 | +{ |
---|
| 10805 | + int i, j; |
---|
| 10806 | + |
---|
| 10807 | + /* find first prog starting at or after off (first to remove) */ |
---|
| 10808 | + for (i = 0; i < env->subprog_cnt; i++) |
---|
| 10809 | + if (env->subprog_info[i].start >= off) |
---|
| 10810 | + break; |
---|
| 10811 | + /* find first prog starting at or after off + cnt (first to stay) */ |
---|
| 10812 | + for (j = i; j < env->subprog_cnt; j++) |
---|
| 10813 | + if (env->subprog_info[j].start >= off + cnt) |
---|
| 10814 | + break; |
---|
| 10815 | + /* if j doesn't start exactly at off + cnt, we are just removing |
---|
| 10816 | + * the front of previous prog |
---|
| 10817 | + */ |
---|
| 10818 | + if (env->subprog_info[j].start != off + cnt) |
---|
| 10819 | + j--; |
---|
| 10820 | + |
---|
| 10821 | + if (j > i) { |
---|
| 10822 | + struct bpf_prog_aux *aux = env->prog->aux; |
---|
| 10823 | + int move; |
---|
| 10824 | + |
---|
| 10825 | + /* move fake 'exit' subprog as well */ |
---|
| 10826 | + move = env->subprog_cnt + 1 - j; |
---|
| 10827 | + |
---|
| 10828 | + memmove(env->subprog_info + i, |
---|
| 10829 | + env->subprog_info + j, |
---|
| 10830 | + sizeof(*env->subprog_info) * move); |
---|
| 10831 | + env->subprog_cnt -= j - i; |
---|
| 10832 | + |
---|
| 10833 | + /* remove func_info */ |
---|
| 10834 | + if (aux->func_info) { |
---|
| 10835 | + move = aux->func_info_cnt - j; |
---|
| 10836 | + |
---|
| 10837 | + memmove(aux->func_info + i, |
---|
| 10838 | + aux->func_info + j, |
---|
| 10839 | + sizeof(*aux->func_info) * move); |
---|
| 10840 | + aux->func_info_cnt -= j - i; |
---|
| 10841 | + /* func_info->insn_off is set after all code rewrites, |
---|
| 10842 | + * in adjust_btf_func() - no need to adjust |
---|
| 10843 | + */ |
---|
| 10844 | + } |
---|
| 10845 | + } else { |
---|
| 10846 | + /* convert i from "first prog to remove" to "first to adjust" */ |
---|
| 10847 | + if (env->subprog_info[i].start == off) |
---|
| 10848 | + i++; |
---|
| 10849 | + } |
---|
| 10850 | + |
---|
| 10851 | + /* update fake 'exit' subprog as well */ |
---|
| 10852 | + for (; i <= env->subprog_cnt; i++) |
---|
| 10853 | + env->subprog_info[i].start -= cnt; |
---|
| 10854 | + |
---|
| 10855 | + return 0; |
---|
| 10856 | +} |
---|
| 10857 | + |
---|
| 10858 | +static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off, |
---|
| 10859 | + u32 cnt) |
---|
| 10860 | +{ |
---|
| 10861 | + struct bpf_prog *prog = env->prog; |
---|
| 10862 | + u32 i, l_off, l_cnt, nr_linfo; |
---|
| 10863 | + struct bpf_line_info *linfo; |
---|
| 10864 | + |
---|
| 10865 | + nr_linfo = prog->aux->nr_linfo; |
---|
| 10866 | + if (!nr_linfo) |
---|
| 10867 | + return 0; |
---|
| 10868 | + |
---|
| 10869 | + linfo = prog->aux->linfo; |
---|
| 10870 | + |
---|
| 10871 | + /* find first line info to remove, count lines to be removed */ |
---|
| 10872 | + for (i = 0; i < nr_linfo; i++) |
---|
| 10873 | + if (linfo[i].insn_off >= off) |
---|
| 10874 | + break; |
---|
| 10875 | + |
---|
| 10876 | + l_off = i; |
---|
| 10877 | + l_cnt = 0; |
---|
| 10878 | + for (; i < nr_linfo; i++) |
---|
| 10879 | + if (linfo[i].insn_off < off + cnt) |
---|
| 10880 | + l_cnt++; |
---|
| 10881 | + else |
---|
| 10882 | + break; |
---|
| 10883 | + |
---|
| 10884 | + /* First live insn doesn't match first live linfo, it needs to "inherit" |
---|
| 10885 | + * last removed linfo. prog is already modified, so prog->len == off |
---|
| 10886 | + * means no live instructions after (tail of the program was removed). |
---|
| 10887 | + */ |
---|
| 10888 | + if (prog->len != off && l_cnt && |
---|
| 10889 | + (i == nr_linfo || linfo[i].insn_off != off + cnt)) { |
---|
| 10890 | + l_cnt--; |
---|
| 10891 | + linfo[--i].insn_off = off + cnt; |
---|
| 10892 | + } |
---|
| 10893 | + |
---|
| 10894 | + /* remove the line info which refer to the removed instructions */ |
---|
| 10895 | + if (l_cnt) { |
---|
| 10896 | + memmove(linfo + l_off, linfo + i, |
---|
| 10897 | + sizeof(*linfo) * (nr_linfo - i)); |
---|
| 10898 | + |
---|
| 10899 | + prog->aux->nr_linfo -= l_cnt; |
---|
| 10900 | + nr_linfo = prog->aux->nr_linfo; |
---|
| 10901 | + } |
---|
| 10902 | + |
---|
| 10903 | + /* pull all linfo[i].insn_off >= off + cnt in by cnt */ |
---|
| 10904 | + for (i = l_off; i < nr_linfo; i++) |
---|
| 10905 | + linfo[i].insn_off -= cnt; |
---|
| 10906 | + |
---|
| 10907 | + /* fix up all subprogs (incl. 'exit') which start >= off */ |
---|
| 10908 | + for (i = 0; i <= env->subprog_cnt; i++) |
---|
| 10909 | + if (env->subprog_info[i].linfo_idx > l_off) { |
---|
| 10910 | + /* program may have started in the removed region but |
---|
| 10911 | + * may not be fully removed |
---|
| 10912 | + */ |
---|
| 10913 | + if (env->subprog_info[i].linfo_idx >= l_off + l_cnt) |
---|
| 10914 | + env->subprog_info[i].linfo_idx -= l_cnt; |
---|
| 10915 | + else |
---|
| 10916 | + env->subprog_info[i].linfo_idx = l_off; |
---|
| 10917 | + } |
---|
| 10918 | + |
---|
| 10919 | + return 0; |
---|
| 10920 | +} |
---|
| 10921 | + |
---|
| 10922 | +static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt) |
---|
| 10923 | +{ |
---|
| 10924 | + struct bpf_insn_aux_data *aux_data = env->insn_aux_data; |
---|
| 10925 | + unsigned int orig_prog_len = env->prog->len; |
---|
| 10926 | + int err; |
---|
| 10927 | + |
---|
| 10928 | + if (bpf_prog_is_dev_bound(env->prog->aux)) |
---|
| 10929 | + bpf_prog_offload_remove_insns(env, off, cnt); |
---|
| 10930 | + |
---|
| 10931 | + err = bpf_remove_insns(env->prog, off, cnt); |
---|
| 10932 | + if (err) |
---|
| 10933 | + return err; |
---|
| 10934 | + |
---|
| 10935 | + err = adjust_subprog_starts_after_remove(env, off, cnt); |
---|
| 10936 | + if (err) |
---|
| 10937 | + return err; |
---|
| 10938 | + |
---|
| 10939 | + err = bpf_adj_linfo_after_remove(env, off, cnt); |
---|
| 10940 | + if (err) |
---|
| 10941 | + return err; |
---|
| 10942 | + |
---|
| 10943 | + memmove(aux_data + off, aux_data + off + cnt, |
---|
| 10944 | + sizeof(*aux_data) * (orig_prog_len - off - cnt)); |
---|
| 10945 | + |
---|
| 10946 | + return 0; |
---|
5773 | 10947 | } |
---|
5774 | 10948 | |
---|
5775 | 10949 | /* The verifier does more data flow analysis than llvm and will not |
---|
.. | .. |
---|
5795 | 10969 | if (aux_data[i].seen) |
---|
5796 | 10970 | continue; |
---|
5797 | 10971 | memcpy(insn + i, &trap, sizeof(trap)); |
---|
| 10972 | + aux_data[i].zext_dst = false; |
---|
5798 | 10973 | } |
---|
5799 | 10974 | } |
---|
5800 | 10975 | |
---|
5801 | | -/* convert load instructions that access fields of 'struct __sk_buff' |
---|
5802 | | - * into sequence of instructions that access fields of 'struct sk_buff' |
---|
| 10976 | +static bool insn_is_cond_jump(u8 code) |
---|
| 10977 | +{ |
---|
| 10978 | + u8 op; |
---|
| 10979 | + |
---|
| 10980 | + if (BPF_CLASS(code) == BPF_JMP32) |
---|
| 10981 | + return true; |
---|
| 10982 | + |
---|
| 10983 | + if (BPF_CLASS(code) != BPF_JMP) |
---|
| 10984 | + return false; |
---|
| 10985 | + |
---|
| 10986 | + op = BPF_OP(code); |
---|
| 10987 | + return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL; |
---|
| 10988 | +} |
---|
| 10989 | + |
---|
| 10990 | +static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env) |
---|
| 10991 | +{ |
---|
| 10992 | + struct bpf_insn_aux_data *aux_data = env->insn_aux_data; |
---|
| 10993 | + struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0); |
---|
| 10994 | + struct bpf_insn *insn = env->prog->insnsi; |
---|
| 10995 | + const int insn_cnt = env->prog->len; |
---|
| 10996 | + int i; |
---|
| 10997 | + |
---|
| 10998 | + for (i = 0; i < insn_cnt; i++, insn++) { |
---|
| 10999 | + if (!insn_is_cond_jump(insn->code)) |
---|
| 11000 | + continue; |
---|
| 11001 | + |
---|
| 11002 | + if (!aux_data[i + 1].seen) |
---|
| 11003 | + ja.off = insn->off; |
---|
| 11004 | + else if (!aux_data[i + 1 + insn->off].seen) |
---|
| 11005 | + ja.off = 0; |
---|
| 11006 | + else |
---|
| 11007 | + continue; |
---|
| 11008 | + |
---|
| 11009 | + if (bpf_prog_is_dev_bound(env->prog->aux)) |
---|
| 11010 | + bpf_prog_offload_replace_insn(env, i, &ja); |
---|
| 11011 | + |
---|
| 11012 | + memcpy(insn, &ja, sizeof(ja)); |
---|
| 11013 | + } |
---|
| 11014 | +} |
---|
| 11015 | + |
---|
| 11016 | +static int opt_remove_dead_code(struct bpf_verifier_env *env) |
---|
| 11017 | +{ |
---|
| 11018 | + struct bpf_insn_aux_data *aux_data = env->insn_aux_data; |
---|
| 11019 | + int insn_cnt = env->prog->len; |
---|
| 11020 | + int i, err; |
---|
| 11021 | + |
---|
| 11022 | + for (i = 0; i < insn_cnt; i++) { |
---|
| 11023 | + int j; |
---|
| 11024 | + |
---|
| 11025 | + j = 0; |
---|
| 11026 | + while (i + j < insn_cnt && !aux_data[i + j].seen) |
---|
| 11027 | + j++; |
---|
| 11028 | + if (!j) |
---|
| 11029 | + continue; |
---|
| 11030 | + |
---|
| 11031 | + err = verifier_remove_insns(env, i, j); |
---|
| 11032 | + if (err) |
---|
| 11033 | + return err; |
---|
| 11034 | + insn_cnt = env->prog->len; |
---|
| 11035 | + } |
---|
| 11036 | + |
---|
| 11037 | + return 0; |
---|
| 11038 | +} |
---|
| 11039 | + |
---|
| 11040 | +static int opt_remove_nops(struct bpf_verifier_env *env) |
---|
| 11041 | +{ |
---|
| 11042 | + const struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0); |
---|
| 11043 | + struct bpf_insn *insn = env->prog->insnsi; |
---|
| 11044 | + int insn_cnt = env->prog->len; |
---|
| 11045 | + int i, err; |
---|
| 11046 | + |
---|
| 11047 | + for (i = 0; i < insn_cnt; i++) { |
---|
| 11048 | + if (memcmp(&insn[i], &ja, sizeof(ja))) |
---|
| 11049 | + continue; |
---|
| 11050 | + |
---|
| 11051 | + err = verifier_remove_insns(env, i, 1); |
---|
| 11052 | + if (err) |
---|
| 11053 | + return err; |
---|
| 11054 | + insn_cnt--; |
---|
| 11055 | + i--; |
---|
| 11056 | + } |
---|
| 11057 | + |
---|
| 11058 | + return 0; |
---|
| 11059 | +} |
---|
| 11060 | + |
---|
| 11061 | +static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env, |
---|
| 11062 | + const union bpf_attr *attr) |
---|
| 11063 | +{ |
---|
| 11064 | + struct bpf_insn *patch, zext_patch[2], rnd_hi32_patch[4]; |
---|
| 11065 | + struct bpf_insn_aux_data *aux = env->insn_aux_data; |
---|
| 11066 | + int i, patch_len, delta = 0, len = env->prog->len; |
---|
| 11067 | + struct bpf_insn *insns = env->prog->insnsi; |
---|
| 11068 | + struct bpf_prog *new_prog; |
---|
| 11069 | + bool rnd_hi32; |
---|
| 11070 | + |
---|
| 11071 | + rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32; |
---|
| 11072 | + zext_patch[1] = BPF_ZEXT_REG(0); |
---|
| 11073 | + rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0); |
---|
| 11074 | + rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32); |
---|
| 11075 | + rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX); |
---|
| 11076 | + for (i = 0; i < len; i++) { |
---|
| 11077 | + int adj_idx = i + delta; |
---|
| 11078 | + struct bpf_insn insn; |
---|
| 11079 | + |
---|
| 11080 | + insn = insns[adj_idx]; |
---|
| 11081 | + if (!aux[adj_idx].zext_dst) { |
---|
| 11082 | + u8 code, class; |
---|
| 11083 | + u32 imm_rnd; |
---|
| 11084 | + |
---|
| 11085 | + if (!rnd_hi32) |
---|
| 11086 | + continue; |
---|
| 11087 | + |
---|
| 11088 | + code = insn.code; |
---|
| 11089 | + class = BPF_CLASS(code); |
---|
| 11090 | + if (insn_no_def(&insn)) |
---|
| 11091 | + continue; |
---|
| 11092 | + |
---|
| 11093 | + /* NOTE: arg "reg" (the fourth one) is only used for |
---|
| 11094 | + * BPF_STX which has been ruled out in above |
---|
| 11095 | + * check, it is safe to pass NULL here. |
---|
| 11096 | + */ |
---|
| 11097 | + if (is_reg64(env, &insn, insn.dst_reg, NULL, DST_OP)) { |
---|
| 11098 | + if (class == BPF_LD && |
---|
| 11099 | + BPF_MODE(code) == BPF_IMM) |
---|
| 11100 | + i++; |
---|
| 11101 | + continue; |
---|
| 11102 | + } |
---|
| 11103 | + |
---|
| 11104 | + /* ctx load could be transformed into wider load. */ |
---|
| 11105 | + if (class == BPF_LDX && |
---|
| 11106 | + aux[adj_idx].ptr_type == PTR_TO_CTX) |
---|
| 11107 | + continue; |
---|
| 11108 | + |
---|
| 11109 | + imm_rnd = get_random_int(); |
---|
| 11110 | + rnd_hi32_patch[0] = insn; |
---|
| 11111 | + rnd_hi32_patch[1].imm = imm_rnd; |
---|
| 11112 | + rnd_hi32_patch[3].dst_reg = insn.dst_reg; |
---|
| 11113 | + patch = rnd_hi32_patch; |
---|
| 11114 | + patch_len = 4; |
---|
| 11115 | + goto apply_patch_buffer; |
---|
| 11116 | + } |
---|
| 11117 | + |
---|
| 11118 | + if (!bpf_jit_needs_zext()) |
---|
| 11119 | + continue; |
---|
| 11120 | + |
---|
| 11121 | + zext_patch[0] = insn; |
---|
| 11122 | + zext_patch[1].dst_reg = insn.dst_reg; |
---|
| 11123 | + zext_patch[1].src_reg = insn.dst_reg; |
---|
| 11124 | + patch = zext_patch; |
---|
| 11125 | + patch_len = 2; |
---|
| 11126 | +apply_patch_buffer: |
---|
| 11127 | + new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len); |
---|
| 11128 | + if (!new_prog) |
---|
| 11129 | + return -ENOMEM; |
---|
| 11130 | + env->prog = new_prog; |
---|
| 11131 | + insns = new_prog->insnsi; |
---|
| 11132 | + aux = env->insn_aux_data; |
---|
| 11133 | + delta += patch_len - 1; |
---|
| 11134 | + } |
---|
| 11135 | + |
---|
| 11136 | + return 0; |
---|
| 11137 | +} |
---|
| 11138 | + |
---|
| 11139 | +/* convert load instructions that access fields of a context type into a |
---|
| 11140 | + * sequence of instructions that access fields of the underlying structure: |
---|
| 11141 | + * struct __sk_buff -> struct sk_buff |
---|
| 11142 | + * struct bpf_sock_ops -> struct sock |
---|
5803 | 11143 | */ |
---|
5804 | 11144 | static int convert_ctx_accesses(struct bpf_verifier_env *env) |
---|
5805 | 11145 | { |
---|
.. | .. |
---|
5812 | 11152 | enum bpf_access_type type; |
---|
5813 | 11153 | bool is_narrower_load; |
---|
5814 | 11154 | |
---|
5815 | | - if (ops->gen_prologue) { |
---|
| 11155 | + if (ops->gen_prologue || env->seen_direct_write) { |
---|
| 11156 | + if (!ops->gen_prologue) { |
---|
| 11157 | + verbose(env, "bpf verifier is misconfigured\n"); |
---|
| 11158 | + return -EINVAL; |
---|
| 11159 | + } |
---|
5816 | 11160 | cnt = ops->gen_prologue(insn_buf, env->seen_direct_write, |
---|
5817 | 11161 | env->prog); |
---|
5818 | 11162 | if (cnt >= ARRAY_SIZE(insn_buf)) { |
---|
.. | .. |
---|
5828 | 11172 | } |
---|
5829 | 11173 | } |
---|
5830 | 11174 | |
---|
5831 | | - if (!ops->convert_ctx_access || bpf_prog_is_dev_bound(env->prog->aux)) |
---|
| 11175 | + if (bpf_prog_is_dev_bound(env->prog->aux)) |
---|
5832 | 11176 | return 0; |
---|
5833 | 11177 | |
---|
5834 | 11178 | insn = env->prog->insnsi + delta; |
---|
5835 | 11179 | |
---|
5836 | 11180 | for (i = 0; i < insn_cnt; i++, insn++) { |
---|
| 11181 | + bpf_convert_ctx_access_t convert_ctx_access; |
---|
5837 | 11182 | bool ctx_access; |
---|
5838 | 11183 | |
---|
5839 | 11184 | if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) || |
---|
.. | .. |
---|
5877 | 11222 | if (!ctx_access) |
---|
5878 | 11223 | continue; |
---|
5879 | 11224 | |
---|
5880 | | - if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX) |
---|
| 11225 | + switch (env->insn_aux_data[i + delta].ptr_type) { |
---|
| 11226 | + case PTR_TO_CTX: |
---|
| 11227 | + if (!ops->convert_ctx_access) |
---|
| 11228 | + continue; |
---|
| 11229 | + convert_ctx_access = ops->convert_ctx_access; |
---|
| 11230 | + break; |
---|
| 11231 | + case PTR_TO_SOCKET: |
---|
| 11232 | + case PTR_TO_SOCK_COMMON: |
---|
| 11233 | + convert_ctx_access = bpf_sock_convert_ctx_access; |
---|
| 11234 | + break; |
---|
| 11235 | + case PTR_TO_TCP_SOCK: |
---|
| 11236 | + convert_ctx_access = bpf_tcp_sock_convert_ctx_access; |
---|
| 11237 | + break; |
---|
| 11238 | + case PTR_TO_XDP_SOCK: |
---|
| 11239 | + convert_ctx_access = bpf_xdp_sock_convert_ctx_access; |
---|
| 11240 | + break; |
---|
| 11241 | + case PTR_TO_BTF_ID: |
---|
| 11242 | + if (type == BPF_READ) { |
---|
| 11243 | + insn->code = BPF_LDX | BPF_PROBE_MEM | |
---|
| 11244 | + BPF_SIZE((insn)->code); |
---|
| 11245 | + env->prog->aux->num_exentries++; |
---|
| 11246 | + } else if (resolve_prog_type(env->prog) != BPF_PROG_TYPE_STRUCT_OPS) { |
---|
| 11247 | + verbose(env, "Writes through BTF pointers are not allowed\n"); |
---|
| 11248 | + return -EINVAL; |
---|
| 11249 | + } |
---|
5881 | 11250 | continue; |
---|
| 11251 | + default: |
---|
| 11252 | + continue; |
---|
| 11253 | + } |
---|
5882 | 11254 | |
---|
5883 | 11255 | ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size; |
---|
5884 | 11256 | size = BPF_LDST_BYTES(insn); |
---|
.. | .. |
---|
5910 | 11282 | } |
---|
5911 | 11283 | |
---|
5912 | 11284 | target_size = 0; |
---|
5913 | | - cnt = ops->convert_ctx_access(type, insn, insn_buf, env->prog, |
---|
5914 | | - &target_size); |
---|
| 11285 | + cnt = convert_ctx_access(type, insn, insn_buf, env->prog, |
---|
| 11286 | + &target_size); |
---|
5915 | 11287 | if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) || |
---|
5916 | 11288 | (ctx_field_size && !target_size)) { |
---|
5917 | 11289 | verbose(env, "bpf verifier is misconfigured\n"); |
---|
.. | .. |
---|
5919 | 11291 | } |
---|
5920 | 11292 | |
---|
5921 | 11293 | if (is_narrower_load && size < target_size) { |
---|
5922 | | - u8 shift = (off & (size_default - 1)) * 8; |
---|
5923 | | - |
---|
| 11294 | + u8 shift = bpf_ctx_narrow_access_offset( |
---|
| 11295 | + off, size, size_default) * 8; |
---|
| 11296 | + if (shift && cnt + 1 >= ARRAY_SIZE(insn_buf)) { |
---|
| 11297 | + verbose(env, "bpf verifier narrow ctx load misconfigured\n"); |
---|
| 11298 | + return -EINVAL; |
---|
| 11299 | + } |
---|
5924 | 11300 | if (ctx_field_size <= 4) { |
---|
5925 | 11301 | if (shift) |
---|
5926 | 11302 | insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH, |
---|
.. | .. |
---|
5933 | 11309 | insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH, |
---|
5934 | 11310 | insn->dst_reg, |
---|
5935 | 11311 | shift); |
---|
5936 | | - insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg, |
---|
| 11312 | + insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg, |
---|
5937 | 11313 | (1ULL << size * 8) - 1); |
---|
5938 | 11314 | } |
---|
5939 | 11315 | } |
---|
.. | .. |
---|
5956 | 11332 | { |
---|
5957 | 11333 | struct bpf_prog *prog = env->prog, **func, *tmp; |
---|
5958 | 11334 | int i, j, subprog_start, subprog_end = 0, len, subprog; |
---|
| 11335 | + struct bpf_map *map_ptr; |
---|
5959 | 11336 | struct bpf_insn *insn; |
---|
5960 | 11337 | void *old_bpf_func; |
---|
5961 | | - int err = -ENOMEM; |
---|
| 11338 | + int err, num_exentries; |
---|
5962 | 11339 | |
---|
5963 | 11340 | if (env->subprog_cnt <= 1) |
---|
5964 | 11341 | return 0; |
---|
.. | .. |
---|
5989 | 11366 | insn->imm = 1; |
---|
5990 | 11367 | } |
---|
5991 | 11368 | |
---|
| 11369 | + err = bpf_prog_alloc_jited_linfo(prog); |
---|
| 11370 | + if (err) |
---|
| 11371 | + goto out_undo_insn; |
---|
| 11372 | + |
---|
| 11373 | + err = -ENOMEM; |
---|
5992 | 11374 | func = kcalloc(env->subprog_cnt, sizeof(prog), GFP_KERNEL); |
---|
5993 | 11375 | if (!func) |
---|
5994 | 11376 | goto out_undo_insn; |
---|
.. | .. |
---|
5998 | 11380 | subprog_end = env->subprog_info[i + 1].start; |
---|
5999 | 11381 | |
---|
6000 | 11382 | len = subprog_end - subprog_start; |
---|
6001 | | - func[i] = bpf_prog_alloc(bpf_prog_size(len), GFP_USER); |
---|
| 11383 | + /* BPF_PROG_RUN doesn't call subprogs directly, |
---|
| 11384 | + * hence main prog stats include the runtime of subprogs. |
---|
| 11385 | + * subprogs don't have IDs and not reachable via prog_get_next_id |
---|
| 11386 | + * func[i]->aux->stats will never be accessed and stays NULL |
---|
| 11387 | + */ |
---|
| 11388 | + func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER); |
---|
6002 | 11389 | if (!func[i]) |
---|
6003 | 11390 | goto out_free; |
---|
6004 | 11391 | memcpy(func[i]->insnsi, &prog->insnsi[subprog_start], |
---|
.. | .. |
---|
6008 | 11395 | if (bpf_prog_calc_tag(func[i])) |
---|
6009 | 11396 | goto out_free; |
---|
6010 | 11397 | func[i]->is_func = 1; |
---|
6011 | | - /* Use bpf_prog_F_tag to indicate functions in stack traces. |
---|
6012 | | - * Long term would need debug info to populate names |
---|
6013 | | - */ |
---|
| 11398 | + func[i]->aux->func_idx = i; |
---|
| 11399 | + /* the btf and func_info will be freed only at prog->aux */ |
---|
| 11400 | + func[i]->aux->btf = prog->aux->btf; |
---|
| 11401 | + func[i]->aux->func_info = prog->aux->func_info; |
---|
| 11402 | + func[i]->aux->func_info_cnt = prog->aux->func_info_cnt; |
---|
| 11403 | + |
---|
| 11404 | + for (j = 0; j < prog->aux->size_poke_tab; j++) { |
---|
| 11405 | + u32 insn_idx = prog->aux->poke_tab[j].insn_idx; |
---|
| 11406 | + int ret; |
---|
| 11407 | + |
---|
| 11408 | + if (!(insn_idx >= subprog_start && |
---|
| 11409 | + insn_idx <= subprog_end)) |
---|
| 11410 | + continue; |
---|
| 11411 | + |
---|
| 11412 | + ret = bpf_jit_add_poke_descriptor(func[i], |
---|
| 11413 | + &prog->aux->poke_tab[j]); |
---|
| 11414 | + if (ret < 0) { |
---|
| 11415 | + verbose(env, "adding tail call poke descriptor failed\n"); |
---|
| 11416 | + goto out_free; |
---|
| 11417 | + } |
---|
| 11418 | + |
---|
| 11419 | + func[i]->insnsi[insn_idx - subprog_start].imm = ret + 1; |
---|
| 11420 | + |
---|
| 11421 | + map_ptr = func[i]->aux->poke_tab[ret].tail_call.map; |
---|
| 11422 | + ret = map_ptr->ops->map_poke_track(map_ptr, func[i]->aux); |
---|
| 11423 | + if (ret < 0) { |
---|
| 11424 | + verbose(env, "tracking tail call prog failed\n"); |
---|
| 11425 | + goto out_free; |
---|
| 11426 | + } |
---|
| 11427 | + } |
---|
| 11428 | + |
---|
6014 | 11429 | func[i]->aux->name[0] = 'F'; |
---|
6015 | 11430 | func[i]->aux->stack_depth = env->subprog_info[i].stack_depth; |
---|
6016 | 11431 | func[i]->jit_requested = 1; |
---|
| 11432 | + func[i]->aux->linfo = prog->aux->linfo; |
---|
| 11433 | + func[i]->aux->nr_linfo = prog->aux->nr_linfo; |
---|
| 11434 | + func[i]->aux->jited_linfo = prog->aux->jited_linfo; |
---|
| 11435 | + func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx; |
---|
| 11436 | + num_exentries = 0; |
---|
| 11437 | + insn = func[i]->insnsi; |
---|
| 11438 | + for (j = 0; j < func[i]->len; j++, insn++) { |
---|
| 11439 | + if (BPF_CLASS(insn->code) == BPF_LDX && |
---|
| 11440 | + BPF_MODE(insn->code) == BPF_PROBE_MEM) |
---|
| 11441 | + num_exentries++; |
---|
| 11442 | + } |
---|
| 11443 | + func[i]->aux->num_exentries = num_exentries; |
---|
| 11444 | + func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable; |
---|
6017 | 11445 | func[i] = bpf_int_jit_compile(func[i]); |
---|
6018 | 11446 | if (!func[i]->jited) { |
---|
6019 | 11447 | err = -ENOTSUPP; |
---|
.. | .. |
---|
6021 | 11449 | } |
---|
6022 | 11450 | cond_resched(); |
---|
6023 | 11451 | } |
---|
| 11452 | + |
---|
| 11453 | + /* Untrack main program's aux structs so that during map_poke_run() |
---|
| 11454 | + * we will not stumble upon the unfilled poke descriptors; each |
---|
| 11455 | + * of the main program's poke descs got distributed across subprogs |
---|
| 11456 | + * and got tracked onto map, so we are sure that none of them will |
---|
| 11457 | + * be missed after the operation below |
---|
| 11458 | + */ |
---|
| 11459 | + for (i = 0; i < prog->aux->size_poke_tab; i++) { |
---|
| 11460 | + map_ptr = prog->aux->poke_tab[i].tail_call.map; |
---|
| 11461 | + |
---|
| 11462 | + map_ptr->ops->map_poke_untrack(map_ptr, prog->aux); |
---|
| 11463 | + } |
---|
| 11464 | + |
---|
6024 | 11465 | /* at this point all bpf functions were successfully JITed |
---|
6025 | 11466 | * now populate all bpf_calls with correct addresses and |
---|
6026 | 11467 | * run last pass of JIT |
---|
.. | .. |
---|
6032 | 11473 | insn->src_reg != BPF_PSEUDO_CALL) |
---|
6033 | 11474 | continue; |
---|
6034 | 11475 | subprog = insn->off; |
---|
6035 | | - insn->imm = (u64 (*)(u64, u64, u64, u64, u64)) |
---|
6036 | | - func[subprog]->bpf_func - |
---|
6037 | | - __bpf_call_base; |
---|
| 11476 | + insn->imm = BPF_CAST_CALL(func[subprog]->bpf_func) - |
---|
| 11477 | + __bpf_call_base; |
---|
6038 | 11478 | } |
---|
6039 | 11479 | |
---|
6040 | 11480 | /* we use the aux data to keep a list of the start addresses |
---|
.. | .. |
---|
6087 | 11527 | prog->bpf_func = func[0]->bpf_func; |
---|
6088 | 11528 | prog->aux->func = func; |
---|
6089 | 11529 | prog->aux->func_cnt = env->subprog_cnt; |
---|
| 11530 | + bpf_prog_free_unused_jited_linfo(prog); |
---|
6090 | 11531 | return 0; |
---|
6091 | 11532 | out_free: |
---|
6092 | | - for (i = 0; i < env->subprog_cnt; i++) |
---|
6093 | | - if (func[i]) |
---|
6094 | | - bpf_jit_free(func[i]); |
---|
| 11533 | + for (i = 0; i < env->subprog_cnt; i++) { |
---|
| 11534 | + if (!func[i]) |
---|
| 11535 | + continue; |
---|
| 11536 | + |
---|
| 11537 | + for (j = 0; j < func[i]->aux->size_poke_tab; j++) { |
---|
| 11538 | + map_ptr = func[i]->aux->poke_tab[j].tail_call.map; |
---|
| 11539 | + map_ptr->ops->map_poke_untrack(map_ptr, func[i]->aux); |
---|
| 11540 | + } |
---|
| 11541 | + bpf_jit_free(func[i]); |
---|
| 11542 | + } |
---|
6095 | 11543 | kfree(func); |
---|
6096 | 11544 | out_undo_insn: |
---|
6097 | 11545 | /* cleanup main prog to be interpreted */ |
---|
.. | .. |
---|
6103 | 11551 | insn->off = 0; |
---|
6104 | 11552 | insn->imm = env->insn_aux_data[i].call_imm; |
---|
6105 | 11553 | } |
---|
| 11554 | + bpf_prog_free_jited_linfo(prog); |
---|
6106 | 11555 | return err; |
---|
6107 | 11556 | } |
---|
6108 | 11557 | |
---|
.. | .. |
---|
6113 | 11562 | struct bpf_insn *insn = prog->insnsi; |
---|
6114 | 11563 | int i, depth; |
---|
6115 | 11564 | #endif |
---|
6116 | | - int err; |
---|
| 11565 | + int err = 0; |
---|
6117 | 11566 | |
---|
6118 | | - err = 0; |
---|
6119 | | - if (env->prog->jit_requested) { |
---|
| 11567 | + if (env->prog->jit_requested && |
---|
| 11568 | + !bpf_prog_is_dev_bound(env->prog->aux)) { |
---|
6120 | 11569 | err = jit_subprogs(env); |
---|
6121 | 11570 | if (err == 0) |
---|
6122 | 11571 | return 0; |
---|
.. | .. |
---|
6124 | 11573 | return err; |
---|
6125 | 11574 | } |
---|
6126 | 11575 | #ifndef CONFIG_BPF_JIT_ALWAYS_ON |
---|
| 11576 | + if (env->subprog_cnt > 1 && env->prog->aux->tail_call_reachable) { |
---|
| 11577 | + /* When JIT fails the progs with bpf2bpf calls and tail_calls |
---|
| 11578 | + * have to be rejected, since interpreter doesn't support them yet. |
---|
| 11579 | + */ |
---|
| 11580 | + verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n"); |
---|
| 11581 | + return -EINVAL; |
---|
| 11582 | + } |
---|
6127 | 11583 | for (i = 0; i < prog->len; i++, insn++) { |
---|
6128 | 11584 | if (insn->code != (BPF_JMP | BPF_CALL) || |
---|
6129 | 11585 | insn->src_reg != BPF_PSEUDO_CALL) |
---|
.. | .. |
---|
6146 | 11602 | static int fixup_bpf_calls(struct bpf_verifier_env *env) |
---|
6147 | 11603 | { |
---|
6148 | 11604 | struct bpf_prog *prog = env->prog; |
---|
| 11605 | + bool expect_blinding = bpf_jit_blinding_enabled(prog); |
---|
6149 | 11606 | struct bpf_insn *insn = prog->insnsi; |
---|
6150 | 11607 | const struct bpf_func_proto *fn; |
---|
6151 | 11608 | const int insn_cnt = prog->len; |
---|
.. | .. |
---|
6154 | 11611 | struct bpf_insn insn_buf[16]; |
---|
6155 | 11612 | struct bpf_prog *new_prog; |
---|
6156 | 11613 | struct bpf_map *map_ptr; |
---|
6157 | | - int i, cnt, delta = 0; |
---|
| 11614 | + int i, ret, cnt, delta = 0; |
---|
6158 | 11615 | |
---|
6159 | 11616 | for (i = 0; i < insn_cnt; i++, insn++) { |
---|
6160 | 11617 | if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) || |
---|
.. | .. |
---|
6162 | 11619 | insn->code == (BPF_ALU | BPF_MOD | BPF_X) || |
---|
6163 | 11620 | insn->code == (BPF_ALU | BPF_DIV | BPF_X)) { |
---|
6164 | 11621 | bool is64 = BPF_CLASS(insn->code) == BPF_ALU64; |
---|
6165 | | - struct bpf_insn mask_and_div[] = { |
---|
6166 | | - BPF_MOV_REG(BPF_CLASS(insn->code), BPF_REG_AX, insn->src_reg), |
---|
| 11622 | + bool isdiv = BPF_OP(insn->code) == BPF_DIV; |
---|
| 11623 | + struct bpf_insn *patchlet; |
---|
| 11624 | + struct bpf_insn chk_and_div[] = { |
---|
6167 | 11625 | /* [R,W]x div 0 -> 0 */ |
---|
6168 | | - BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, 2), |
---|
6169 | | - BPF_RAW_REG(*insn, insn->dst_reg, BPF_REG_AX), |
---|
| 11626 | + BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | |
---|
| 11627 | + BPF_JNE | BPF_K, insn->src_reg, |
---|
| 11628 | + 0, 2, 0), |
---|
| 11629 | + BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg), |
---|
6170 | 11630 | BPF_JMP_IMM(BPF_JA, 0, 0, 1), |
---|
6171 | | - BPF_ALU_REG(BPF_CLASS(insn->code), BPF_XOR, insn->dst_reg, insn->dst_reg), |
---|
| 11631 | + *insn, |
---|
6172 | 11632 | }; |
---|
6173 | | - struct bpf_insn mask_and_mod[] = { |
---|
6174 | | - BPF_MOV_REG(BPF_CLASS(insn->code), BPF_REG_AX, insn->src_reg), |
---|
6175 | | - BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, 1 + (is64 ? 0 : 1)), |
---|
6176 | | - BPF_RAW_REG(*insn, insn->dst_reg, BPF_REG_AX), |
---|
| 11633 | + struct bpf_insn chk_and_mod[] = { |
---|
| 11634 | + /* [R,W]x mod 0 -> [R,W]x */ |
---|
| 11635 | + BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | |
---|
| 11636 | + BPF_JEQ | BPF_K, insn->src_reg, |
---|
| 11637 | + 0, 1 + (is64 ? 0 : 1), 0), |
---|
| 11638 | + *insn, |
---|
6177 | 11639 | BPF_JMP_IMM(BPF_JA, 0, 0, 1), |
---|
6178 | 11640 | BPF_MOV32_REG(insn->dst_reg, insn->dst_reg), |
---|
6179 | 11641 | }; |
---|
6180 | | - struct bpf_insn *patchlet; |
---|
6181 | 11642 | |
---|
6182 | | - if (insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) || |
---|
6183 | | - insn->code == (BPF_ALU | BPF_DIV | BPF_X)) { |
---|
6184 | | - patchlet = mask_and_div; |
---|
6185 | | - cnt = ARRAY_SIZE(mask_and_div); |
---|
6186 | | - } else { |
---|
6187 | | - patchlet = mask_and_mod; |
---|
6188 | | - cnt = ARRAY_SIZE(mask_and_mod) - (is64 ? 2 : 0); |
---|
6189 | | - } |
---|
| 11643 | + patchlet = isdiv ? chk_and_div : chk_and_mod; |
---|
| 11644 | + cnt = isdiv ? ARRAY_SIZE(chk_and_div) : |
---|
| 11645 | + ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0); |
---|
6190 | 11646 | |
---|
6191 | 11647 | new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt); |
---|
6192 | 11648 | if (!new_prog) |
---|
.. | .. |
---|
6288 | 11744 | * the program array. |
---|
6289 | 11745 | */ |
---|
6290 | 11746 | prog->cb_access = 1; |
---|
6291 | | - env->prog->aux->stack_depth = MAX_BPF_STACK; |
---|
| 11747 | + if (!allow_tail_call_in_subprogs(env)) |
---|
| 11748 | + prog->aux->stack_depth = MAX_BPF_STACK; |
---|
| 11749 | + prog->aux->max_pkt_offset = MAX_PACKET_OFF; |
---|
6292 | 11750 | |
---|
6293 | 11751 | /* mark bpf_tail_call as different opcode to avoid |
---|
6294 | 11752 | * conditional branch in the interpeter for every normal |
---|
.. | .. |
---|
6299 | 11757 | insn->code = BPF_JMP | BPF_TAIL_CALL; |
---|
6300 | 11758 | |
---|
6301 | 11759 | aux = &env->insn_aux_data[i + delta]; |
---|
| 11760 | + if (env->bpf_capable && !expect_blinding && |
---|
| 11761 | + prog->jit_requested && |
---|
| 11762 | + !bpf_map_key_poisoned(aux) && |
---|
| 11763 | + !bpf_map_ptr_poisoned(aux) && |
---|
| 11764 | + !bpf_map_ptr_unpriv(aux)) { |
---|
| 11765 | + struct bpf_jit_poke_descriptor desc = { |
---|
| 11766 | + .reason = BPF_POKE_REASON_TAIL_CALL, |
---|
| 11767 | + .tail_call.map = BPF_MAP_PTR(aux->map_ptr_state), |
---|
| 11768 | + .tail_call.key = bpf_map_key_immediate(aux), |
---|
| 11769 | + .insn_idx = i + delta, |
---|
| 11770 | + }; |
---|
| 11771 | + |
---|
| 11772 | + ret = bpf_jit_add_poke_descriptor(prog, &desc); |
---|
| 11773 | + if (ret < 0) { |
---|
| 11774 | + verbose(env, "adding tail call poke descriptor failed\n"); |
---|
| 11775 | + return ret; |
---|
| 11776 | + } |
---|
| 11777 | + |
---|
| 11778 | + insn->imm = ret + 1; |
---|
| 11779 | + continue; |
---|
| 11780 | + } |
---|
| 11781 | + |
---|
6302 | 11782 | if (!bpf_map_ptr_unpriv(aux)) |
---|
6303 | 11783 | continue; |
---|
6304 | 11784 | |
---|
.. | .. |
---|
6313 | 11793 | return -EINVAL; |
---|
6314 | 11794 | } |
---|
6315 | 11795 | |
---|
6316 | | - map_ptr = BPF_MAP_PTR(aux->map_state); |
---|
| 11796 | + map_ptr = BPF_MAP_PTR(aux->map_ptr_state); |
---|
6317 | 11797 | insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3, |
---|
6318 | 11798 | map_ptr->max_entries, 2); |
---|
6319 | 11799 | insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3, |
---|
.. | .. |
---|
6339 | 11819 | if (prog->jit_requested && BITS_PER_LONG == 64 && |
---|
6340 | 11820 | (insn->imm == BPF_FUNC_map_lookup_elem || |
---|
6341 | 11821 | insn->imm == BPF_FUNC_map_update_elem || |
---|
6342 | | - insn->imm == BPF_FUNC_map_delete_elem)) { |
---|
| 11822 | + insn->imm == BPF_FUNC_map_delete_elem || |
---|
| 11823 | + insn->imm == BPF_FUNC_map_push_elem || |
---|
| 11824 | + insn->imm == BPF_FUNC_map_pop_elem || |
---|
| 11825 | + insn->imm == BPF_FUNC_map_peek_elem)) { |
---|
6343 | 11826 | aux = &env->insn_aux_data[i + delta]; |
---|
6344 | 11827 | if (bpf_map_ptr_poisoned(aux)) |
---|
6345 | 11828 | goto patch_call_imm; |
---|
6346 | 11829 | |
---|
6347 | | - map_ptr = BPF_MAP_PTR(aux->map_state); |
---|
| 11830 | + map_ptr = BPF_MAP_PTR(aux->map_ptr_state); |
---|
6348 | 11831 | ops = map_ptr->ops; |
---|
6349 | 11832 | if (insn->imm == BPF_FUNC_map_lookup_elem && |
---|
6350 | 11833 | ops->map_gen_lookup) { |
---|
6351 | 11834 | cnt = ops->map_gen_lookup(map_ptr, insn_buf); |
---|
6352 | | - if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) { |
---|
| 11835 | + if (cnt == -EOPNOTSUPP) |
---|
| 11836 | + goto patch_map_ops_generic; |
---|
| 11837 | + if (cnt <= 0 || cnt >= ARRAY_SIZE(insn_buf)) { |
---|
6353 | 11838 | verbose(env, "bpf verifier is misconfigured\n"); |
---|
6354 | 11839 | return -EINVAL; |
---|
6355 | 11840 | } |
---|
.. | .. |
---|
6372 | 11857 | BUILD_BUG_ON(!__same_type(ops->map_update_elem, |
---|
6373 | 11858 | (int (*)(struct bpf_map *map, void *key, void *value, |
---|
6374 | 11859 | u64 flags))NULL)); |
---|
| 11860 | + BUILD_BUG_ON(!__same_type(ops->map_push_elem, |
---|
| 11861 | + (int (*)(struct bpf_map *map, void *value, |
---|
| 11862 | + u64 flags))NULL)); |
---|
| 11863 | + BUILD_BUG_ON(!__same_type(ops->map_pop_elem, |
---|
| 11864 | + (int (*)(struct bpf_map *map, void *value))NULL)); |
---|
| 11865 | + BUILD_BUG_ON(!__same_type(ops->map_peek_elem, |
---|
| 11866 | + (int (*)(struct bpf_map *map, void *value))NULL)); |
---|
| 11867 | +patch_map_ops_generic: |
---|
6375 | 11868 | switch (insn->imm) { |
---|
6376 | 11869 | case BPF_FUNC_map_lookup_elem: |
---|
6377 | 11870 | insn->imm = BPF_CAST_CALL(ops->map_lookup_elem) - |
---|
.. | .. |
---|
6385 | 11878 | insn->imm = BPF_CAST_CALL(ops->map_delete_elem) - |
---|
6386 | 11879 | __bpf_call_base; |
---|
6387 | 11880 | continue; |
---|
| 11881 | + case BPF_FUNC_map_push_elem: |
---|
| 11882 | + insn->imm = BPF_CAST_CALL(ops->map_push_elem) - |
---|
| 11883 | + __bpf_call_base; |
---|
| 11884 | + continue; |
---|
| 11885 | + case BPF_FUNC_map_pop_elem: |
---|
| 11886 | + insn->imm = BPF_CAST_CALL(ops->map_pop_elem) - |
---|
| 11887 | + __bpf_call_base; |
---|
| 11888 | + continue; |
---|
| 11889 | + case BPF_FUNC_map_peek_elem: |
---|
| 11890 | + insn->imm = BPF_CAST_CALL(ops->map_peek_elem) - |
---|
| 11891 | + __bpf_call_base; |
---|
| 11892 | + continue; |
---|
6388 | 11893 | } |
---|
6389 | 11894 | |
---|
6390 | 11895 | goto patch_call_imm; |
---|
| 11896 | + } |
---|
| 11897 | + |
---|
| 11898 | + if (prog->jit_requested && BITS_PER_LONG == 64 && |
---|
| 11899 | + insn->imm == BPF_FUNC_jiffies64) { |
---|
| 11900 | + struct bpf_insn ld_jiffies_addr[2] = { |
---|
| 11901 | + BPF_LD_IMM64(BPF_REG_0, |
---|
| 11902 | + (unsigned long)&jiffies), |
---|
| 11903 | + }; |
---|
| 11904 | + |
---|
| 11905 | + insn_buf[0] = ld_jiffies_addr[0]; |
---|
| 11906 | + insn_buf[1] = ld_jiffies_addr[1]; |
---|
| 11907 | + insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, |
---|
| 11908 | + BPF_REG_0, 0); |
---|
| 11909 | + cnt = 3; |
---|
| 11910 | + |
---|
| 11911 | + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, |
---|
| 11912 | + cnt); |
---|
| 11913 | + if (!new_prog) |
---|
| 11914 | + return -ENOMEM; |
---|
| 11915 | + |
---|
| 11916 | + delta += cnt - 1; |
---|
| 11917 | + env->prog = prog = new_prog; |
---|
| 11918 | + insn = new_prog->insnsi + i + delta; |
---|
| 11919 | + continue; |
---|
6391 | 11920 | } |
---|
6392 | 11921 | |
---|
6393 | 11922 | patch_call_imm: |
---|
.. | .. |
---|
6404 | 11933 | insn->imm = fn->func - __bpf_call_base; |
---|
6405 | 11934 | } |
---|
6406 | 11935 | |
---|
| 11936 | + /* Since poke tab is now finalized, publish aux to tracker. */ |
---|
| 11937 | + for (i = 0; i < prog->aux->size_poke_tab; i++) { |
---|
| 11938 | + map_ptr = prog->aux->poke_tab[i].tail_call.map; |
---|
| 11939 | + if (!map_ptr->ops->map_poke_track || |
---|
| 11940 | + !map_ptr->ops->map_poke_untrack || |
---|
| 11941 | + !map_ptr->ops->map_poke_run) { |
---|
| 11942 | + verbose(env, "bpf verifier is misconfigured\n"); |
---|
| 11943 | + return -EINVAL; |
---|
| 11944 | + } |
---|
| 11945 | + |
---|
| 11946 | + ret = map_ptr->ops->map_poke_track(map_ptr, prog->aux); |
---|
| 11947 | + if (ret < 0) { |
---|
| 11948 | + verbose(env, "tracking tail call prog failed\n"); |
---|
| 11949 | + return ret; |
---|
| 11950 | + } |
---|
| 11951 | + } |
---|
| 11952 | + |
---|
6407 | 11953 | return 0; |
---|
6408 | 11954 | } |
---|
6409 | 11955 | |
---|
.. | .. |
---|
6412 | 11958 | struct bpf_verifier_state_list *sl, *sln; |
---|
6413 | 11959 | int i; |
---|
6414 | 11960 | |
---|
| 11961 | + sl = env->free_list; |
---|
| 11962 | + while (sl) { |
---|
| 11963 | + sln = sl->next; |
---|
| 11964 | + free_verifier_state(&sl->state, false); |
---|
| 11965 | + kfree(sl); |
---|
| 11966 | + sl = sln; |
---|
| 11967 | + } |
---|
| 11968 | + env->free_list = NULL; |
---|
| 11969 | + |
---|
6415 | 11970 | if (!env->explored_states) |
---|
6416 | 11971 | return; |
---|
6417 | 11972 | |
---|
6418 | | - for (i = 0; i < env->prog->len; i++) { |
---|
| 11973 | + for (i = 0; i < state_htab_size(env); i++) { |
---|
6419 | 11974 | sl = env->explored_states[i]; |
---|
6420 | 11975 | |
---|
6421 | | - if (sl) |
---|
6422 | | - while (sl != STATE_LIST_MARK) { |
---|
6423 | | - sln = sl->next; |
---|
6424 | | - free_verifier_state(&sl->state, false); |
---|
6425 | | - kfree(sl); |
---|
6426 | | - sl = sln; |
---|
6427 | | - } |
---|
| 11976 | + while (sl) { |
---|
| 11977 | + sln = sl->next; |
---|
| 11978 | + free_verifier_state(&sl->state, false); |
---|
| 11979 | + kfree(sl); |
---|
| 11980 | + sl = sln; |
---|
| 11981 | + } |
---|
| 11982 | + env->explored_states[i] = NULL; |
---|
6428 | 11983 | } |
---|
6429 | | - |
---|
6430 | | - kfree(env->explored_states); |
---|
6431 | 11984 | } |
---|
6432 | 11985 | |
---|
6433 | | -int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) |
---|
| 11986 | +static int do_check_common(struct bpf_verifier_env *env, int subprog) |
---|
6434 | 11987 | { |
---|
| 11988 | + bool pop_log = !(env->log.level & BPF_LOG_LEVEL2); |
---|
| 11989 | + struct bpf_verifier_state *state; |
---|
| 11990 | + struct bpf_reg_state *regs; |
---|
| 11991 | + int ret, i; |
---|
| 11992 | + |
---|
| 11993 | + env->prev_linfo = NULL; |
---|
| 11994 | + env->pass_cnt++; |
---|
| 11995 | + |
---|
| 11996 | + state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL); |
---|
| 11997 | + if (!state) |
---|
| 11998 | + return -ENOMEM; |
---|
| 11999 | + state->curframe = 0; |
---|
| 12000 | + state->speculative = false; |
---|
| 12001 | + state->branches = 1; |
---|
| 12002 | + state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL); |
---|
| 12003 | + if (!state->frame[0]) { |
---|
| 12004 | + kfree(state); |
---|
| 12005 | + return -ENOMEM; |
---|
| 12006 | + } |
---|
| 12007 | + env->cur_state = state; |
---|
| 12008 | + init_func_state(env, state->frame[0], |
---|
| 12009 | + BPF_MAIN_FUNC /* callsite */, |
---|
| 12010 | + 0 /* frameno */, |
---|
| 12011 | + subprog); |
---|
| 12012 | + |
---|
| 12013 | + state->first_insn_idx = env->subprog_info[subprog].start; |
---|
| 12014 | + state->last_insn_idx = -1; |
---|
| 12015 | + |
---|
| 12016 | + regs = state->frame[state->curframe]->regs; |
---|
| 12017 | + if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) { |
---|
| 12018 | + ret = btf_prepare_func_args(env, subprog, regs); |
---|
| 12019 | + if (ret) |
---|
| 12020 | + goto out; |
---|
| 12021 | + for (i = BPF_REG_1; i <= BPF_REG_5; i++) { |
---|
| 12022 | + if (regs[i].type == PTR_TO_CTX) |
---|
| 12023 | + mark_reg_known_zero(env, regs, i); |
---|
| 12024 | + else if (regs[i].type == SCALAR_VALUE) |
---|
| 12025 | + mark_reg_unknown(env, regs, i); |
---|
| 12026 | + } |
---|
| 12027 | + } else { |
---|
| 12028 | + /* 1st arg to a function */ |
---|
| 12029 | + regs[BPF_REG_1].type = PTR_TO_CTX; |
---|
| 12030 | + mark_reg_known_zero(env, regs, BPF_REG_1); |
---|
| 12031 | + ret = btf_check_func_arg_match(env, subprog, regs); |
---|
| 12032 | + if (ret == -EFAULT) |
---|
| 12033 | + /* unlikely verifier bug. abort. |
---|
| 12034 | + * ret == 0 and ret < 0 are sadly acceptable for |
---|
| 12035 | + * main() function due to backward compatibility. |
---|
| 12036 | + * Like socket filter program may be written as: |
---|
| 12037 | + * int bpf_prog(struct pt_regs *ctx) |
---|
| 12038 | + * and never dereference that ctx in the program. |
---|
| 12039 | + * 'struct pt_regs' is a type mismatch for socket |
---|
| 12040 | + * filter that should be using 'struct __sk_buff'. |
---|
| 12041 | + */ |
---|
| 12042 | + goto out; |
---|
| 12043 | + } |
---|
| 12044 | + |
---|
| 12045 | + ret = do_check(env); |
---|
| 12046 | +out: |
---|
| 12047 | + /* check for NULL is necessary, since cur_state can be freed inside |
---|
| 12048 | + * do_check() under memory pressure. |
---|
| 12049 | + */ |
---|
| 12050 | + if (env->cur_state) { |
---|
| 12051 | + free_verifier_state(env->cur_state, true); |
---|
| 12052 | + env->cur_state = NULL; |
---|
| 12053 | + } |
---|
| 12054 | + while (!pop_stack(env, NULL, NULL, false)); |
---|
| 12055 | + if (!ret && pop_log) |
---|
| 12056 | + bpf_vlog_reset(&env->log, 0); |
---|
| 12057 | + free_states(env); |
---|
| 12058 | + return ret; |
---|
| 12059 | +} |
---|
| 12060 | + |
---|
| 12061 | +/* Verify all global functions in a BPF program one by one based on their BTF. |
---|
| 12062 | + * All global functions must pass verification. Otherwise the whole program is rejected. |
---|
| 12063 | + * Consider: |
---|
| 12064 | + * int bar(int); |
---|
| 12065 | + * int foo(int f) |
---|
| 12066 | + * { |
---|
| 12067 | + * return bar(f); |
---|
| 12068 | + * } |
---|
| 12069 | + * int bar(int b) |
---|
| 12070 | + * { |
---|
| 12071 | + * ... |
---|
| 12072 | + * } |
---|
| 12073 | + * foo() will be verified first for R1=any_scalar_value. During verification it |
---|
| 12074 | + * will be assumed that bar() already verified successfully and call to bar() |
---|
| 12075 | + * from foo() will be checked for type match only. Later bar() will be verified |
---|
| 12076 | + * independently to check that it's safe for R1=any_scalar_value. |
---|
| 12077 | + */ |
---|
| 12078 | +static int do_check_subprogs(struct bpf_verifier_env *env) |
---|
| 12079 | +{ |
---|
| 12080 | + struct bpf_prog_aux *aux = env->prog->aux; |
---|
| 12081 | + int i, ret; |
---|
| 12082 | + |
---|
| 12083 | + if (!aux->func_info) |
---|
| 12084 | + return 0; |
---|
| 12085 | + |
---|
| 12086 | + for (i = 1; i < env->subprog_cnt; i++) { |
---|
| 12087 | + if (aux->func_info_aux[i].linkage != BTF_FUNC_GLOBAL) |
---|
| 12088 | + continue; |
---|
| 12089 | + env->insn_idx = env->subprog_info[i].start; |
---|
| 12090 | + WARN_ON_ONCE(env->insn_idx == 0); |
---|
| 12091 | + ret = do_check_common(env, i); |
---|
| 12092 | + if (ret) { |
---|
| 12093 | + return ret; |
---|
| 12094 | + } else if (env->log.level & BPF_LOG_LEVEL) { |
---|
| 12095 | + verbose(env, |
---|
| 12096 | + "Func#%d is safe for any args that match its prototype\n", |
---|
| 12097 | + i); |
---|
| 12098 | + } |
---|
| 12099 | + } |
---|
| 12100 | + return 0; |
---|
| 12101 | +} |
---|
| 12102 | + |
---|
| 12103 | +static int do_check_main(struct bpf_verifier_env *env) |
---|
| 12104 | +{ |
---|
| 12105 | + int ret; |
---|
| 12106 | + |
---|
| 12107 | + env->insn_idx = 0; |
---|
| 12108 | + ret = do_check_common(env, 0); |
---|
| 12109 | + if (!ret) |
---|
| 12110 | + env->prog->aux->stack_depth = env->subprog_info[0].stack_depth; |
---|
| 12111 | + return ret; |
---|
| 12112 | +} |
---|
| 12113 | + |
---|
| 12114 | + |
---|
| 12115 | +static void print_verification_stats(struct bpf_verifier_env *env) |
---|
| 12116 | +{ |
---|
| 12117 | + int i; |
---|
| 12118 | + |
---|
| 12119 | + if (env->log.level & BPF_LOG_STATS) { |
---|
| 12120 | + verbose(env, "verification time %lld usec\n", |
---|
| 12121 | + div_u64(env->verification_time, 1000)); |
---|
| 12122 | + verbose(env, "stack depth "); |
---|
| 12123 | + for (i = 0; i < env->subprog_cnt; i++) { |
---|
| 12124 | + u32 depth = env->subprog_info[i].stack_depth; |
---|
| 12125 | + |
---|
| 12126 | + verbose(env, "%d", depth); |
---|
| 12127 | + if (i + 1 < env->subprog_cnt) |
---|
| 12128 | + verbose(env, "+"); |
---|
| 12129 | + } |
---|
| 12130 | + verbose(env, "\n"); |
---|
| 12131 | + } |
---|
| 12132 | + verbose(env, "processed %d insns (limit %d) max_states_per_insn %d " |
---|
| 12133 | + "total_states %d peak_states %d mark_read %d\n", |
---|
| 12134 | + env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS, |
---|
| 12135 | + env->max_states_per_insn, env->total_states, |
---|
| 12136 | + env->peak_states, env->longest_mark_read_walk); |
---|
| 12137 | +} |
---|
| 12138 | + |
---|
| 12139 | +static int check_struct_ops_btf_id(struct bpf_verifier_env *env) |
---|
| 12140 | +{ |
---|
| 12141 | + const struct btf_type *t, *func_proto; |
---|
| 12142 | + const struct bpf_struct_ops *st_ops; |
---|
| 12143 | + const struct btf_member *member; |
---|
| 12144 | + struct bpf_prog *prog = env->prog; |
---|
| 12145 | + u32 btf_id, member_idx; |
---|
| 12146 | + const char *mname; |
---|
| 12147 | + |
---|
| 12148 | + if (!prog->gpl_compatible) { |
---|
| 12149 | + verbose(env, "struct ops programs must have a GPL compatible license\n"); |
---|
| 12150 | + return -EINVAL; |
---|
| 12151 | + } |
---|
| 12152 | + |
---|
| 12153 | + btf_id = prog->aux->attach_btf_id; |
---|
| 12154 | + st_ops = bpf_struct_ops_find(btf_id); |
---|
| 12155 | + if (!st_ops) { |
---|
| 12156 | + verbose(env, "attach_btf_id %u is not a supported struct\n", |
---|
| 12157 | + btf_id); |
---|
| 12158 | + return -ENOTSUPP; |
---|
| 12159 | + } |
---|
| 12160 | + |
---|
| 12161 | + t = st_ops->type; |
---|
| 12162 | + member_idx = prog->expected_attach_type; |
---|
| 12163 | + if (member_idx >= btf_type_vlen(t)) { |
---|
| 12164 | + verbose(env, "attach to invalid member idx %u of struct %s\n", |
---|
| 12165 | + member_idx, st_ops->name); |
---|
| 12166 | + return -EINVAL; |
---|
| 12167 | + } |
---|
| 12168 | + |
---|
| 12169 | + member = &btf_type_member(t)[member_idx]; |
---|
| 12170 | + mname = btf_name_by_offset(btf_vmlinux, member->name_off); |
---|
| 12171 | + func_proto = btf_type_resolve_func_ptr(btf_vmlinux, member->type, |
---|
| 12172 | + NULL); |
---|
| 12173 | + if (!func_proto) { |
---|
| 12174 | + verbose(env, "attach to invalid member %s(@idx %u) of struct %s\n", |
---|
| 12175 | + mname, member_idx, st_ops->name); |
---|
| 12176 | + return -EINVAL; |
---|
| 12177 | + } |
---|
| 12178 | + |
---|
| 12179 | + if (st_ops->check_member) { |
---|
| 12180 | + int err = st_ops->check_member(t, member); |
---|
| 12181 | + |
---|
| 12182 | + if (err) { |
---|
| 12183 | + verbose(env, "attach to unsupported member %s of struct %s\n", |
---|
| 12184 | + mname, st_ops->name); |
---|
| 12185 | + return err; |
---|
| 12186 | + } |
---|
| 12187 | + } |
---|
| 12188 | + |
---|
| 12189 | + prog->aux->attach_func_proto = func_proto; |
---|
| 12190 | + prog->aux->attach_func_name = mname; |
---|
| 12191 | + env->ops = st_ops->verifier_ops; |
---|
| 12192 | + |
---|
| 12193 | + return 0; |
---|
| 12194 | +} |
---|
| 12195 | +#define SECURITY_PREFIX "security_" |
---|
| 12196 | + |
---|
| 12197 | +static int check_attach_modify_return(unsigned long addr, const char *func_name) |
---|
| 12198 | +{ |
---|
| 12199 | + if (within_error_injection_list(addr) || |
---|
| 12200 | + !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1)) |
---|
| 12201 | + return 0; |
---|
| 12202 | + |
---|
| 12203 | + return -EINVAL; |
---|
| 12204 | +} |
---|
| 12205 | + |
---|
| 12206 | +/* non exhaustive list of sleepable bpf_lsm_*() functions */ |
---|
| 12207 | +BTF_SET_START(btf_sleepable_lsm_hooks) |
---|
| 12208 | +#ifdef CONFIG_BPF_LSM |
---|
| 12209 | +BTF_ID(func, bpf_lsm_bprm_committed_creds) |
---|
| 12210 | +#else |
---|
| 12211 | +BTF_ID_UNUSED |
---|
| 12212 | +#endif |
---|
| 12213 | +BTF_SET_END(btf_sleepable_lsm_hooks) |
---|
| 12214 | + |
---|
| 12215 | +static int check_sleepable_lsm_hook(u32 btf_id) |
---|
| 12216 | +{ |
---|
| 12217 | + return btf_id_set_contains(&btf_sleepable_lsm_hooks, btf_id); |
---|
| 12218 | +} |
---|
| 12219 | + |
---|
| 12220 | +/* list of non-sleepable functions that are otherwise on |
---|
| 12221 | + * ALLOW_ERROR_INJECTION list |
---|
| 12222 | + */ |
---|
| 12223 | +BTF_SET_START(btf_non_sleepable_error_inject) |
---|
| 12224 | +/* Three functions below can be called from sleepable and non-sleepable context. |
---|
| 12225 | + * Assume non-sleepable from bpf safety point of view. |
---|
| 12226 | + */ |
---|
| 12227 | +BTF_ID(func, __add_to_page_cache_locked) |
---|
| 12228 | +BTF_ID(func, should_fail_alloc_page) |
---|
| 12229 | +BTF_ID(func, should_failslab) |
---|
| 12230 | +BTF_SET_END(btf_non_sleepable_error_inject) |
---|
| 12231 | + |
---|
| 12232 | +static int check_non_sleepable_error_inject(u32 btf_id) |
---|
| 12233 | +{ |
---|
| 12234 | + return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id); |
---|
| 12235 | +} |
---|
| 12236 | + |
---|
| 12237 | +int bpf_check_attach_target(struct bpf_verifier_log *log, |
---|
| 12238 | + const struct bpf_prog *prog, |
---|
| 12239 | + const struct bpf_prog *tgt_prog, |
---|
| 12240 | + u32 btf_id, |
---|
| 12241 | + struct bpf_attach_target_info *tgt_info) |
---|
| 12242 | +{ |
---|
| 12243 | + bool prog_extension = prog->type == BPF_PROG_TYPE_EXT; |
---|
| 12244 | + const char prefix[] = "btf_trace_"; |
---|
| 12245 | + int ret = 0, subprog = -1, i; |
---|
| 12246 | + const struct btf_type *t; |
---|
| 12247 | + bool conservative = true; |
---|
| 12248 | + const char *tname; |
---|
| 12249 | + struct btf *btf; |
---|
| 12250 | + long addr = 0; |
---|
| 12251 | + |
---|
| 12252 | + if (!btf_id) { |
---|
| 12253 | + bpf_log(log, "Tracing programs must provide btf_id\n"); |
---|
| 12254 | + return -EINVAL; |
---|
| 12255 | + } |
---|
| 12256 | + btf = tgt_prog ? tgt_prog->aux->btf : btf_vmlinux; |
---|
| 12257 | + if (!btf) { |
---|
| 12258 | + bpf_log(log, |
---|
| 12259 | + "FENTRY/FEXIT program can only be attached to another program annotated with BTF\n"); |
---|
| 12260 | + return -EINVAL; |
---|
| 12261 | + } |
---|
| 12262 | + t = btf_type_by_id(btf, btf_id); |
---|
| 12263 | + if (!t) { |
---|
| 12264 | + bpf_log(log, "attach_btf_id %u is invalid\n", btf_id); |
---|
| 12265 | + return -EINVAL; |
---|
| 12266 | + } |
---|
| 12267 | + tname = btf_name_by_offset(btf, t->name_off); |
---|
| 12268 | + if (!tname) { |
---|
| 12269 | + bpf_log(log, "attach_btf_id %u doesn't have a name\n", btf_id); |
---|
| 12270 | + return -EINVAL; |
---|
| 12271 | + } |
---|
| 12272 | + if (tgt_prog) { |
---|
| 12273 | + struct bpf_prog_aux *aux = tgt_prog->aux; |
---|
| 12274 | + |
---|
| 12275 | + for (i = 0; i < aux->func_info_cnt; i++) |
---|
| 12276 | + if (aux->func_info[i].type_id == btf_id) { |
---|
| 12277 | + subprog = i; |
---|
| 12278 | + break; |
---|
| 12279 | + } |
---|
| 12280 | + if (subprog == -1) { |
---|
| 12281 | + bpf_log(log, "Subprog %s doesn't exist\n", tname); |
---|
| 12282 | + return -EINVAL; |
---|
| 12283 | + } |
---|
| 12284 | + conservative = aux->func_info_aux[subprog].unreliable; |
---|
| 12285 | + if (prog_extension) { |
---|
| 12286 | + if (conservative) { |
---|
| 12287 | + bpf_log(log, |
---|
| 12288 | + "Cannot replace static functions\n"); |
---|
| 12289 | + return -EINVAL; |
---|
| 12290 | + } |
---|
| 12291 | + if (!prog->jit_requested) { |
---|
| 12292 | + bpf_log(log, |
---|
| 12293 | + "Extension programs should be JITed\n"); |
---|
| 12294 | + return -EINVAL; |
---|
| 12295 | + } |
---|
| 12296 | + } |
---|
| 12297 | + if (!tgt_prog->jited) { |
---|
| 12298 | + bpf_log(log, "Can attach to only JITed progs\n"); |
---|
| 12299 | + return -EINVAL; |
---|
| 12300 | + } |
---|
| 12301 | + if (tgt_prog->type == prog->type) { |
---|
| 12302 | + /* Cannot fentry/fexit another fentry/fexit program. |
---|
| 12303 | + * Cannot attach program extension to another extension. |
---|
| 12304 | + * It's ok to attach fentry/fexit to extension program. |
---|
| 12305 | + */ |
---|
| 12306 | + bpf_log(log, "Cannot recursively attach\n"); |
---|
| 12307 | + return -EINVAL; |
---|
| 12308 | + } |
---|
| 12309 | + if (tgt_prog->type == BPF_PROG_TYPE_TRACING && |
---|
| 12310 | + prog_extension && |
---|
| 12311 | + (tgt_prog->expected_attach_type == BPF_TRACE_FENTRY || |
---|
| 12312 | + tgt_prog->expected_attach_type == BPF_TRACE_FEXIT)) { |
---|
| 12313 | + /* Program extensions can extend all program types |
---|
| 12314 | + * except fentry/fexit. The reason is the following. |
---|
| 12315 | + * The fentry/fexit programs are used for performance |
---|
| 12316 | + * analysis, stats and can be attached to any program |
---|
| 12317 | + * type except themselves. When extension program is |
---|
| 12318 | + * replacing XDP function it is necessary to allow |
---|
| 12319 | + * performance analysis of all functions. Both original |
---|
| 12320 | + * XDP program and its program extension. Hence |
---|
| 12321 | + * attaching fentry/fexit to BPF_PROG_TYPE_EXT is |
---|
| 12322 | + * allowed. If extending of fentry/fexit was allowed it |
---|
| 12323 | + * would be possible to create long call chain |
---|
| 12324 | + * fentry->extension->fentry->extension beyond |
---|
| 12325 | + * reasonable stack size. Hence extending fentry is not |
---|
| 12326 | + * allowed. |
---|
| 12327 | + */ |
---|
| 12328 | + bpf_log(log, "Cannot extend fentry/fexit\n"); |
---|
| 12329 | + return -EINVAL; |
---|
| 12330 | + } |
---|
| 12331 | + } else { |
---|
| 12332 | + if (prog_extension) { |
---|
| 12333 | + bpf_log(log, "Cannot replace kernel functions\n"); |
---|
| 12334 | + return -EINVAL; |
---|
| 12335 | + } |
---|
| 12336 | + } |
---|
| 12337 | + |
---|
| 12338 | + switch (prog->expected_attach_type) { |
---|
| 12339 | + case BPF_TRACE_RAW_TP: |
---|
| 12340 | + if (tgt_prog) { |
---|
| 12341 | + bpf_log(log, |
---|
| 12342 | + "Only FENTRY/FEXIT progs are attachable to another BPF prog\n"); |
---|
| 12343 | + return -EINVAL; |
---|
| 12344 | + } |
---|
| 12345 | + if (!btf_type_is_typedef(t)) { |
---|
| 12346 | + bpf_log(log, "attach_btf_id %u is not a typedef\n", |
---|
| 12347 | + btf_id); |
---|
| 12348 | + return -EINVAL; |
---|
| 12349 | + } |
---|
| 12350 | + if (strncmp(prefix, tname, sizeof(prefix) - 1)) { |
---|
| 12351 | + bpf_log(log, "attach_btf_id %u points to wrong type name %s\n", |
---|
| 12352 | + btf_id, tname); |
---|
| 12353 | + return -EINVAL; |
---|
| 12354 | + } |
---|
| 12355 | + tname += sizeof(prefix) - 1; |
---|
| 12356 | + t = btf_type_by_id(btf, t->type); |
---|
| 12357 | + if (!btf_type_is_ptr(t)) |
---|
| 12358 | + /* should never happen in valid vmlinux build */ |
---|
| 12359 | + return -EINVAL; |
---|
| 12360 | + t = btf_type_by_id(btf, t->type); |
---|
| 12361 | + if (!btf_type_is_func_proto(t)) |
---|
| 12362 | + /* should never happen in valid vmlinux build */ |
---|
| 12363 | + return -EINVAL; |
---|
| 12364 | + |
---|
| 12365 | + break; |
---|
| 12366 | + case BPF_TRACE_ITER: |
---|
| 12367 | + if (!btf_type_is_func(t)) { |
---|
| 12368 | + bpf_log(log, "attach_btf_id %u is not a function\n", |
---|
| 12369 | + btf_id); |
---|
| 12370 | + return -EINVAL; |
---|
| 12371 | + } |
---|
| 12372 | + t = btf_type_by_id(btf, t->type); |
---|
| 12373 | + if (!btf_type_is_func_proto(t)) |
---|
| 12374 | + return -EINVAL; |
---|
| 12375 | + ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel); |
---|
| 12376 | + if (ret) |
---|
| 12377 | + return ret; |
---|
| 12378 | + break; |
---|
| 12379 | + default: |
---|
| 12380 | + if (!prog_extension) |
---|
| 12381 | + return -EINVAL; |
---|
| 12382 | + fallthrough; |
---|
| 12383 | + case BPF_MODIFY_RETURN: |
---|
| 12384 | + case BPF_LSM_MAC: |
---|
| 12385 | + case BPF_TRACE_FENTRY: |
---|
| 12386 | + case BPF_TRACE_FEXIT: |
---|
| 12387 | + if (!btf_type_is_func(t)) { |
---|
| 12388 | + bpf_log(log, "attach_btf_id %u is not a function\n", |
---|
| 12389 | + btf_id); |
---|
| 12390 | + return -EINVAL; |
---|
| 12391 | + } |
---|
| 12392 | + if (prog_extension && |
---|
| 12393 | + btf_check_type_match(log, prog, btf, t)) |
---|
| 12394 | + return -EINVAL; |
---|
| 12395 | + t = btf_type_by_id(btf, t->type); |
---|
| 12396 | + if (!btf_type_is_func_proto(t)) |
---|
| 12397 | + return -EINVAL; |
---|
| 12398 | + |
---|
| 12399 | + if ((prog->aux->saved_dst_prog_type || prog->aux->saved_dst_attach_type) && |
---|
| 12400 | + (!tgt_prog || prog->aux->saved_dst_prog_type != tgt_prog->type || |
---|
| 12401 | + prog->aux->saved_dst_attach_type != tgt_prog->expected_attach_type)) |
---|
| 12402 | + return -EINVAL; |
---|
| 12403 | + |
---|
| 12404 | + if (tgt_prog && conservative) |
---|
| 12405 | + t = NULL; |
---|
| 12406 | + |
---|
| 12407 | + ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel); |
---|
| 12408 | + if (ret < 0) |
---|
| 12409 | + return ret; |
---|
| 12410 | + |
---|
| 12411 | + if (tgt_prog) { |
---|
| 12412 | + if (subprog == 0) |
---|
| 12413 | + addr = (long) tgt_prog->bpf_func; |
---|
| 12414 | + else |
---|
| 12415 | + addr = (long) tgt_prog->aux->func[subprog]->bpf_func; |
---|
| 12416 | + } else { |
---|
| 12417 | + addr = kallsyms_lookup_name(tname); |
---|
| 12418 | + if (!addr) { |
---|
| 12419 | + bpf_log(log, |
---|
| 12420 | + "The address of function %s cannot be found\n", |
---|
| 12421 | + tname); |
---|
| 12422 | + return -ENOENT; |
---|
| 12423 | + } |
---|
| 12424 | + } |
---|
| 12425 | + |
---|
| 12426 | + if (prog->aux->sleepable) { |
---|
| 12427 | + ret = -EINVAL; |
---|
| 12428 | + switch (prog->type) { |
---|
| 12429 | + case BPF_PROG_TYPE_TRACING: |
---|
| 12430 | + /* fentry/fexit/fmod_ret progs can be sleepable only if they are |
---|
| 12431 | + * attached to ALLOW_ERROR_INJECTION and are not in denylist. |
---|
| 12432 | + */ |
---|
| 12433 | + if (!check_non_sleepable_error_inject(btf_id) && |
---|
| 12434 | + within_error_injection_list(addr)) |
---|
| 12435 | + ret = 0; |
---|
| 12436 | + break; |
---|
| 12437 | + case BPF_PROG_TYPE_LSM: |
---|
| 12438 | + /* LSM progs check that they are attached to bpf_lsm_*() funcs. |
---|
| 12439 | + * Only some of them are sleepable. |
---|
| 12440 | + */ |
---|
| 12441 | + if (check_sleepable_lsm_hook(btf_id)) |
---|
| 12442 | + ret = 0; |
---|
| 12443 | + break; |
---|
| 12444 | + default: |
---|
| 12445 | + break; |
---|
| 12446 | + } |
---|
| 12447 | + if (ret) { |
---|
| 12448 | + bpf_log(log, "%s is not sleepable\n", tname); |
---|
| 12449 | + return ret; |
---|
| 12450 | + } |
---|
| 12451 | + } else if (prog->expected_attach_type == BPF_MODIFY_RETURN) { |
---|
| 12452 | + if (tgt_prog) { |
---|
| 12453 | + bpf_log(log, "can't modify return codes of BPF programs\n"); |
---|
| 12454 | + return -EINVAL; |
---|
| 12455 | + } |
---|
| 12456 | + ret = check_attach_modify_return(addr, tname); |
---|
| 12457 | + if (ret) { |
---|
| 12458 | + bpf_log(log, "%s() is not modifiable\n", tname); |
---|
| 12459 | + return ret; |
---|
| 12460 | + } |
---|
| 12461 | + } |
---|
| 12462 | + |
---|
| 12463 | + break; |
---|
| 12464 | + } |
---|
| 12465 | + tgt_info->tgt_addr = addr; |
---|
| 12466 | + tgt_info->tgt_name = tname; |
---|
| 12467 | + tgt_info->tgt_type = t; |
---|
| 12468 | + return 0; |
---|
| 12469 | +} |
---|
| 12470 | + |
---|
| 12471 | +static int check_attach_btf_id(struct bpf_verifier_env *env) |
---|
| 12472 | +{ |
---|
| 12473 | + struct bpf_prog *prog = env->prog; |
---|
| 12474 | + struct bpf_prog *tgt_prog = prog->aux->dst_prog; |
---|
| 12475 | + struct bpf_attach_target_info tgt_info = {}; |
---|
| 12476 | + u32 btf_id = prog->aux->attach_btf_id; |
---|
| 12477 | + struct bpf_trampoline *tr; |
---|
| 12478 | + int ret; |
---|
| 12479 | + u64 key; |
---|
| 12480 | + |
---|
| 12481 | + if (prog->aux->sleepable && prog->type != BPF_PROG_TYPE_TRACING && |
---|
| 12482 | + prog->type != BPF_PROG_TYPE_LSM) { |
---|
| 12483 | + verbose(env, "Only fentry/fexit/fmod_ret and lsm programs can be sleepable\n"); |
---|
| 12484 | + return -EINVAL; |
---|
| 12485 | + } |
---|
| 12486 | + |
---|
| 12487 | + if (prog->type == BPF_PROG_TYPE_STRUCT_OPS) |
---|
| 12488 | + return check_struct_ops_btf_id(env); |
---|
| 12489 | + |
---|
| 12490 | + if (prog->type != BPF_PROG_TYPE_TRACING && |
---|
| 12491 | + prog->type != BPF_PROG_TYPE_LSM && |
---|
| 12492 | + prog->type != BPF_PROG_TYPE_EXT) |
---|
| 12493 | + return 0; |
---|
| 12494 | + |
---|
| 12495 | + ret = bpf_check_attach_target(&env->log, prog, tgt_prog, btf_id, &tgt_info); |
---|
| 12496 | + if (ret) |
---|
| 12497 | + return ret; |
---|
| 12498 | + |
---|
| 12499 | + if (tgt_prog && prog->type == BPF_PROG_TYPE_EXT) { |
---|
| 12500 | + /* to make freplace equivalent to their targets, they need to |
---|
| 12501 | + * inherit env->ops and expected_attach_type for the rest of the |
---|
| 12502 | + * verification |
---|
| 12503 | + */ |
---|
| 12504 | + env->ops = bpf_verifier_ops[tgt_prog->type]; |
---|
| 12505 | + prog->expected_attach_type = tgt_prog->expected_attach_type; |
---|
| 12506 | + } |
---|
| 12507 | + |
---|
| 12508 | + /* store info about the attachment target that will be used later */ |
---|
| 12509 | + prog->aux->attach_func_proto = tgt_info.tgt_type; |
---|
| 12510 | + prog->aux->attach_func_name = tgt_info.tgt_name; |
---|
| 12511 | + |
---|
| 12512 | + if (tgt_prog) { |
---|
| 12513 | + prog->aux->saved_dst_prog_type = tgt_prog->type; |
---|
| 12514 | + prog->aux->saved_dst_attach_type = tgt_prog->expected_attach_type; |
---|
| 12515 | + } |
---|
| 12516 | + |
---|
| 12517 | + if (prog->expected_attach_type == BPF_TRACE_RAW_TP) { |
---|
| 12518 | + prog->aux->attach_btf_trace = true; |
---|
| 12519 | + return 0; |
---|
| 12520 | + } else if (prog->expected_attach_type == BPF_TRACE_ITER) { |
---|
| 12521 | + if (!bpf_iter_prog_supported(prog)) |
---|
| 12522 | + return -EINVAL; |
---|
| 12523 | + return 0; |
---|
| 12524 | + } |
---|
| 12525 | + |
---|
| 12526 | + if (prog->type == BPF_PROG_TYPE_LSM) { |
---|
| 12527 | + ret = bpf_lsm_verify_prog(&env->log, prog); |
---|
| 12528 | + if (ret < 0) |
---|
| 12529 | + return ret; |
---|
| 12530 | + } |
---|
| 12531 | + |
---|
| 12532 | + key = bpf_trampoline_compute_key(tgt_prog, btf_id); |
---|
| 12533 | + tr = bpf_trampoline_get(key, &tgt_info); |
---|
| 12534 | + if (!tr) |
---|
| 12535 | + return -ENOMEM; |
---|
| 12536 | + |
---|
| 12537 | + prog->aux->dst_trampoline = tr; |
---|
| 12538 | + return 0; |
---|
| 12539 | +} |
---|
| 12540 | + |
---|
| 12541 | +struct btf *bpf_get_btf_vmlinux(void) |
---|
| 12542 | +{ |
---|
| 12543 | + if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) { |
---|
| 12544 | + mutex_lock(&bpf_verifier_lock); |
---|
| 12545 | + if (!btf_vmlinux) |
---|
| 12546 | + btf_vmlinux = btf_parse_vmlinux(); |
---|
| 12547 | + mutex_unlock(&bpf_verifier_lock); |
---|
| 12548 | + } |
---|
| 12549 | + return btf_vmlinux; |
---|
| 12550 | +} |
---|
| 12551 | + |
---|
| 12552 | +int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, |
---|
| 12553 | + union bpf_attr __user *uattr) |
---|
| 12554 | +{ |
---|
| 12555 | + u64 start_time = ktime_get_ns(); |
---|
6435 | 12556 | struct bpf_verifier_env *env; |
---|
6436 | 12557 | struct bpf_verifier_log *log; |
---|
6437 | | - int ret = -EINVAL; |
---|
| 12558 | + int i, len, ret = -EINVAL; |
---|
| 12559 | + bool is_priv; |
---|
6438 | 12560 | |
---|
6439 | 12561 | /* no program is valid */ |
---|
6440 | 12562 | if (ARRAY_SIZE(bpf_verifier_ops) == 0) |
---|
.. | .. |
---|
6448 | 12570 | return -ENOMEM; |
---|
6449 | 12571 | log = &env->log; |
---|
6450 | 12572 | |
---|
| 12573 | + len = (*prog)->len; |
---|
6451 | 12574 | env->insn_aux_data = |
---|
6452 | | - vzalloc(array_size(sizeof(struct bpf_insn_aux_data), |
---|
6453 | | - (*prog)->len)); |
---|
| 12575 | + vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len)); |
---|
6454 | 12576 | ret = -ENOMEM; |
---|
6455 | 12577 | if (!env->insn_aux_data) |
---|
6456 | 12578 | goto err_free_env; |
---|
| 12579 | + for (i = 0; i < len; i++) |
---|
| 12580 | + env->insn_aux_data[i].orig_idx = i; |
---|
6457 | 12581 | env->prog = *prog; |
---|
6458 | 12582 | env->ops = bpf_verifier_ops[env->prog->type]; |
---|
| 12583 | + is_priv = bpf_capable(); |
---|
| 12584 | + |
---|
| 12585 | + bpf_get_btf_vmlinux(); |
---|
6459 | 12586 | |
---|
6460 | 12587 | /* grab the mutex to protect few globals used by verifier */ |
---|
6461 | | - mutex_lock(&bpf_verifier_lock); |
---|
| 12588 | + if (!is_priv) |
---|
| 12589 | + mutex_lock(&bpf_verifier_lock); |
---|
6462 | 12590 | |
---|
6463 | 12591 | if (attr->log_level || attr->log_buf || attr->log_size) { |
---|
6464 | 12592 | /* user requested verbose verifier output |
---|
.. | .. |
---|
6468 | 12596 | log->ubuf = (char __user *) (unsigned long) attr->log_buf; |
---|
6469 | 12597 | log->len_total = attr->log_size; |
---|
6470 | 12598 | |
---|
6471 | | - ret = -EINVAL; |
---|
6472 | 12599 | /* log attributes have to be sane */ |
---|
6473 | | - if (log->len_total < 128 || log->len_total > UINT_MAX >> 8 || |
---|
6474 | | - !log->level || !log->ubuf) |
---|
| 12600 | + if (!bpf_verifier_log_attr_valid(log)) { |
---|
| 12601 | + ret = -EINVAL; |
---|
6475 | 12602 | goto err_unlock; |
---|
| 12603 | + } |
---|
| 12604 | + } |
---|
| 12605 | + |
---|
| 12606 | + if (IS_ERR(btf_vmlinux)) { |
---|
| 12607 | + /* Either gcc or pahole or kernel are broken. */ |
---|
| 12608 | + verbose(env, "in-kernel BTF is malformed\n"); |
---|
| 12609 | + ret = PTR_ERR(btf_vmlinux); |
---|
| 12610 | + goto skip_full_check; |
---|
6476 | 12611 | } |
---|
6477 | 12612 | |
---|
6478 | 12613 | env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT); |
---|
6479 | 12614 | if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) |
---|
6480 | 12615 | env->strict_alignment = true; |
---|
6481 | | - |
---|
6482 | 12616 | if (attr->prog_flags & BPF_F_ANY_ALIGNMENT) |
---|
6483 | 12617 | env->strict_alignment = false; |
---|
6484 | 12618 | |
---|
6485 | | - ret = replace_map_fd_with_map_ptr(env); |
---|
6486 | | - if (ret < 0) |
---|
6487 | | - goto skip_full_check; |
---|
| 12619 | + env->allow_ptr_leaks = bpf_allow_ptr_leaks(); |
---|
| 12620 | + env->allow_uninit_stack = bpf_allow_uninit_stack(); |
---|
| 12621 | + env->allow_ptr_to_map_access = bpf_allow_ptr_to_map_access(); |
---|
| 12622 | + env->bypass_spec_v1 = bpf_bypass_spec_v1(); |
---|
| 12623 | + env->bypass_spec_v4 = bpf_bypass_spec_v4(); |
---|
| 12624 | + env->bpf_capable = bpf_capable(); |
---|
6488 | 12625 | |
---|
6489 | | - if (bpf_prog_is_dev_bound(env->prog->aux)) { |
---|
6490 | | - ret = bpf_prog_offload_verifier_prep(env); |
---|
6491 | | - if (ret) |
---|
6492 | | - goto skip_full_check; |
---|
6493 | | - } |
---|
| 12626 | + if (is_priv) |
---|
| 12627 | + env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ; |
---|
6494 | 12628 | |
---|
6495 | | - env->explored_states = kcalloc(env->prog->len, |
---|
| 12629 | + env->explored_states = kvcalloc(state_htab_size(env), |
---|
6496 | 12630 | sizeof(struct bpf_verifier_state_list *), |
---|
6497 | 12631 | GFP_USER); |
---|
6498 | 12632 | ret = -ENOMEM; |
---|
6499 | 12633 | if (!env->explored_states) |
---|
6500 | 12634 | goto skip_full_check; |
---|
6501 | 12635 | |
---|
6502 | | - env->allow_ptr_leaks = capable(CAP_SYS_ADMIN); |
---|
| 12636 | + ret = check_subprogs(env); |
---|
| 12637 | + if (ret < 0) |
---|
| 12638 | + goto skip_full_check; |
---|
| 12639 | + |
---|
| 12640 | + ret = check_btf_info(env, attr, uattr); |
---|
| 12641 | + if (ret < 0) |
---|
| 12642 | + goto skip_full_check; |
---|
| 12643 | + |
---|
| 12644 | + ret = check_attach_btf_id(env); |
---|
| 12645 | + if (ret) |
---|
| 12646 | + goto skip_full_check; |
---|
| 12647 | + |
---|
| 12648 | + ret = resolve_pseudo_ldimm64(env); |
---|
| 12649 | + if (ret < 0) |
---|
| 12650 | + goto skip_full_check; |
---|
| 12651 | + |
---|
| 12652 | + if (bpf_prog_is_dev_bound(env->prog->aux)) { |
---|
| 12653 | + ret = bpf_prog_offload_verifier_prep(env->prog); |
---|
| 12654 | + if (ret) |
---|
| 12655 | + goto skip_full_check; |
---|
| 12656 | + } |
---|
6503 | 12657 | |
---|
6504 | 12658 | ret = check_cfg(env); |
---|
6505 | 12659 | if (ret < 0) |
---|
6506 | 12660 | goto skip_full_check; |
---|
6507 | 12661 | |
---|
6508 | | - ret = do_check(env); |
---|
6509 | | - if (env->cur_state) { |
---|
6510 | | - free_verifier_state(env->cur_state, true); |
---|
6511 | | - env->cur_state = NULL; |
---|
6512 | | - } |
---|
| 12662 | + ret = do_check_subprogs(env); |
---|
| 12663 | + ret = ret ?: do_check_main(env); |
---|
| 12664 | + |
---|
| 12665 | + if (ret == 0 && bpf_prog_is_dev_bound(env->prog->aux)) |
---|
| 12666 | + ret = bpf_prog_offload_finalize(env); |
---|
6513 | 12667 | |
---|
6514 | 12668 | skip_full_check: |
---|
6515 | | - while (!pop_stack(env, NULL, NULL)); |
---|
6516 | | - free_states(env); |
---|
6517 | | - |
---|
6518 | | - if (ret == 0) |
---|
6519 | | - sanitize_dead_code(env); |
---|
| 12669 | + kvfree(env->explored_states); |
---|
6520 | 12670 | |
---|
6521 | 12671 | if (ret == 0) |
---|
6522 | 12672 | ret = check_max_stack_depth(env); |
---|
| 12673 | + |
---|
| 12674 | + /* instruction rewrites happen after this point */ |
---|
| 12675 | + if (is_priv) { |
---|
| 12676 | + if (ret == 0) |
---|
| 12677 | + opt_hard_wire_dead_code_branches(env); |
---|
| 12678 | + if (ret == 0) |
---|
| 12679 | + ret = opt_remove_dead_code(env); |
---|
| 12680 | + if (ret == 0) |
---|
| 12681 | + ret = opt_remove_nops(env); |
---|
| 12682 | + } else { |
---|
| 12683 | + if (ret == 0) |
---|
| 12684 | + sanitize_dead_code(env); |
---|
| 12685 | + } |
---|
6523 | 12686 | |
---|
6524 | 12687 | if (ret == 0) |
---|
6525 | 12688 | /* program is valid, convert *(u32*)(ctx + off) accesses */ |
---|
.. | .. |
---|
6528 | 12691 | if (ret == 0) |
---|
6529 | 12692 | ret = fixup_bpf_calls(env); |
---|
6530 | 12693 | |
---|
| 12694 | + /* do 32-bit optimization after insn patching has done so those patched |
---|
| 12695 | + * insns could be handled correctly. |
---|
| 12696 | + */ |
---|
| 12697 | + if (ret == 0 && !bpf_prog_is_dev_bound(env->prog->aux)) { |
---|
| 12698 | + ret = opt_subreg_zext_lo32_rnd_hi32(env, attr); |
---|
| 12699 | + env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret |
---|
| 12700 | + : false; |
---|
| 12701 | + } |
---|
| 12702 | + |
---|
6531 | 12703 | if (ret == 0) |
---|
6532 | 12704 | ret = fixup_call_args(env); |
---|
| 12705 | + |
---|
| 12706 | + env->verification_time = ktime_get_ns() - start_time; |
---|
| 12707 | + print_verification_stats(env); |
---|
6533 | 12708 | |
---|
6534 | 12709 | if (log->level && bpf_verifier_log_full(log)) |
---|
6535 | 12710 | ret = -ENOSPC; |
---|
.. | .. |
---|
6559 | 12734 | convert_pseudo_ld_imm64(env); |
---|
6560 | 12735 | } |
---|
6561 | 12736 | |
---|
| 12737 | + if (ret == 0) |
---|
| 12738 | + adjust_btf_func(env); |
---|
| 12739 | + |
---|
6562 | 12740 | err_release_maps: |
---|
6563 | 12741 | if (!env->prog->aux->used_maps) |
---|
6564 | 12742 | /* if we didn't copy map pointers into bpf_prog_info, release |
---|
6565 | 12743 | * them now. Otherwise free_used_maps() will release them. |
---|
6566 | 12744 | */ |
---|
6567 | 12745 | release_maps(env); |
---|
| 12746 | + |
---|
| 12747 | + /* extension progs temporarily inherit the attach_type of their targets |
---|
| 12748 | + for verification purposes, so set it back to zero before returning |
---|
| 12749 | + */ |
---|
| 12750 | + if (env->prog->type == BPF_PROG_TYPE_EXT) |
---|
| 12751 | + env->prog->expected_attach_type = 0; |
---|
| 12752 | + |
---|
6568 | 12753 | *prog = env->prog; |
---|
6569 | 12754 | err_unlock: |
---|
6570 | | - mutex_unlock(&bpf_verifier_lock); |
---|
| 12755 | + if (!is_priv) |
---|
| 12756 | + mutex_unlock(&bpf_verifier_lock); |
---|
6571 | 12757 | vfree(env->insn_aux_data); |
---|
6572 | 12758 | err_free_env: |
---|
6573 | 12759 | kfree(env); |
---|