forked from ~ljy/RK356X_SDK_RELEASE

hc
2023-12-08 01573e231f18eb2d99162747186f59511f56b64d
kernel/kernel/bpf/verifier.c
....@@ -1,19 +1,14 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
23 * Copyright (c) 2016 Facebook
3
- *
4
- * This program is free software; you can redistribute it and/or
5
- * modify it under the terms of version 2 of the GNU General Public
6
- * License as published by the Free Software Foundation.
7
- *
8
- * This program is distributed in the hope that it will be useful, but
9
- * WITHOUT ANY WARRANTY; without even the implied warranty of
10
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11
- * General Public License for more details.
4
+ * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
125 */
6
+#include <uapi/linux/btf.h>
137 #include <linux/kernel.h>
148 #include <linux/types.h>
159 #include <linux/slab.h>
1610 #include <linux/bpf.h>
11
+#include <linux/btf.h>
1712 #include <linux/bpf_verifier.h>
1813 #include <linux/filter.h>
1914 #include <net/netlink.h>
....@@ -23,16 +18,22 @@
2318 #include <linux/bsearch.h>
2419 #include <linux/sort.h>
2520 #include <linux/perf_event.h>
21
+#include <linux/ctype.h>
22
+#include <linux/error-injection.h>
23
+#include <linux/bpf_lsm.h>
24
+#include <linux/btf_ids.h>
2625
2726 #include "disasm.h"
2827
2928 static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
30
-#define BPF_PROG_TYPE(_id, _name) \
29
+#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
3130 [_id] = & _name ## _verifier_ops,
3231 #define BPF_MAP_TYPE(_id, _ops)
32
+#define BPF_LINK_TYPE(_id, _name)
3333 #include <linux/bpf_types.h>
3434 #undef BPF_PROG_TYPE
3535 #undef BPF_MAP_TYPE
36
+#undef BPF_LINK_TYPE
3637 };
3738
3839 /* bpf_check() is a static code analyzer that walks eBPF program
....@@ -80,8 +81,8 @@
8081 * (like pointer plus pointer becomes SCALAR_VALUE type)
8182 *
8283 * When verifier sees load or store instructions the type of base register
83
- * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK. These are three pointer
84
- * types recognized by check_mem_access() function.
84
+ * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
85
+ * four pointer types recognized by check_mem_access() function.
8586 *
8687 * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
8788 * and the range of [ptr, ptr + map's value_size) is accessible.
....@@ -140,6 +141,24 @@
140141 *
141142 * After the call R0 is set to return type of the function and registers R1-R5
142143 * are set to NOT_INIT to indicate that they are no longer readable.
144
+ *
145
+ * The following reference types represent a potential reference to a kernel
146
+ * resource which, after first being allocated, must be checked and freed by
147
+ * the BPF program:
148
+ * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
149
+ *
150
+ * When the verifier sees a helper call return a reference type, it allocates a
151
+ * pointer id for the reference and stores it in the current function state.
152
+ * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
153
+ * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
154
+ * passes through a NULL-check conditional. For the branch wherein the state is
155
+ * changed to CONST_IMM, the verifier releases the reference.
156
+ *
157
+ * For each helper function that allocates a reference, such as
158
+ * bpf_sk_lookup_tcp(), there is a corresponding release function, such as
159
+ * bpf_sk_release(). When a reference type passes into the release function,
160
+ * the verifier also releases the reference. If any unchecked or unreleased
161
+ * reference remains at the end of the program, the verifier rejects it.
143162 */
144163
145164 /* verifier_state + insn_idx are pushed to stack when branch is encountered */
....@@ -152,11 +171,15 @@
152171 int insn_idx;
153172 int prev_insn_idx;
154173 struct bpf_verifier_stack_elem *next;
174
+ /* length of verifier log at the time this state was pushed on stack */
175
+ u32 log_pos;
155176 };
156177
157
-#define BPF_COMPLEXITY_LIMIT_INSNS 131072
158
-#define BPF_COMPLEXITY_LIMIT_STACK 1024
178
+#define BPF_COMPLEXITY_LIMIT_JMP_SEQ 8192
159179 #define BPF_COMPLEXITY_LIMIT_STATES 64
180
+
181
+#define BPF_MAP_KEY_POISON (1ULL << 63)
182
+#define BPF_MAP_KEY_SEEN (1ULL << 62)
160183
161184 #define BPF_MAP_PTR_UNPRIV 1UL
162185 #define BPF_MAP_PTR_POISON ((void *)((0xeB9FUL << 1) + \
....@@ -165,12 +188,12 @@
165188
166189 static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
167190 {
168
- return BPF_MAP_PTR(aux->map_state) == BPF_MAP_PTR_POISON;
191
+ return BPF_MAP_PTR(aux->map_ptr_state) == BPF_MAP_PTR_POISON;
169192 }
170193
171194 static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
172195 {
173
- return aux->map_state & BPF_MAP_PTR_UNPRIV;
196
+ return aux->map_ptr_state & BPF_MAP_PTR_UNPRIV;
174197 }
175198
176199 static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
....@@ -178,8 +201,31 @@
178201 {
179202 BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV);
180203 unpriv |= bpf_map_ptr_unpriv(aux);
181
- aux->map_state = (unsigned long)map |
182
- (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL);
204
+ aux->map_ptr_state = (unsigned long)map |
205
+ (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL);
206
+}
207
+
208
+static bool bpf_map_key_poisoned(const struct bpf_insn_aux_data *aux)
209
+{
210
+ return aux->map_key_state & BPF_MAP_KEY_POISON;
211
+}
212
+
213
+static bool bpf_map_key_unseen(const struct bpf_insn_aux_data *aux)
214
+{
215
+ return !(aux->map_key_state & BPF_MAP_KEY_SEEN);
216
+}
217
+
218
+static u64 bpf_map_key_immediate(const struct bpf_insn_aux_data *aux)
219
+{
220
+ return aux->map_key_state & ~(BPF_MAP_KEY_SEEN | BPF_MAP_KEY_POISON);
221
+}
222
+
223
+static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state)
224
+{
225
+ bool poisoned = bpf_map_key_poisoned(aux);
226
+
227
+ aux->map_key_state = state | BPF_MAP_KEY_SEEN |
228
+ (poisoned ? BPF_MAP_KEY_POISON : 0ULL);
183229 }
184230
185231 struct bpf_call_arg_meta {
....@@ -188,10 +234,38 @@
188234 bool pkt_access;
189235 int regno;
190236 int access_size;
237
+ int mem_size;
191238 u64 msize_max_value;
239
+ int ref_obj_id;
240
+ int func_id;
241
+ u32 btf_id;
242
+ u32 ret_btf_id;
192243 };
193244
245
+struct btf *btf_vmlinux;
246
+
194247 static DEFINE_MUTEX(bpf_verifier_lock);
248
+
249
+static const struct bpf_line_info *
250
+find_linfo(const struct bpf_verifier_env *env, u32 insn_off)
251
+{
252
+ const struct bpf_line_info *linfo;
253
+ const struct bpf_prog *prog;
254
+ u32 i, nr_linfo;
255
+
256
+ prog = env->prog;
257
+ nr_linfo = prog->aux->nr_linfo;
258
+
259
+ if (!nr_linfo || insn_off >= prog->len)
260
+ return NULL;
261
+
262
+ linfo = prog->aux->linfo;
263
+ for (i = 1; i < nr_linfo; i++)
264
+ if (insn_off < linfo[i].insn_off)
265
+ break;
266
+
267
+ return &linfo[i - 1];
268
+}
195269
196270 void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
197271 va_list args)
....@@ -206,9 +280,25 @@
206280 n = min(log->len_total - log->len_used - 1, n);
207281 log->kbuf[n] = '\0';
208282
283
+ if (log->level == BPF_LOG_KERNEL) {
284
+ pr_err("BPF:%s\n", log->kbuf);
285
+ return;
286
+ }
209287 if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1))
210288 log->len_used += n;
211289 else
290
+ log->ubuf = NULL;
291
+}
292
+
293
+static void bpf_vlog_reset(struct bpf_verifier_log *log, u32 new_pos)
294
+{
295
+ char zero = 0;
296
+
297
+ if (!bpf_verifier_log_needed(log))
298
+ return;
299
+
300
+ log->len_used = new_pos;
301
+ if (put_user(zero, log->ubuf + new_pos))
212302 log->ubuf = NULL;
213303 }
214304
....@@ -243,10 +333,167 @@
243333 va_end(args);
244334 }
245335
336
+__printf(2, 3) void bpf_log(struct bpf_verifier_log *log,
337
+ const char *fmt, ...)
338
+{
339
+ va_list args;
340
+
341
+ if (!bpf_verifier_log_needed(log))
342
+ return;
343
+
344
+ va_start(args, fmt);
345
+ bpf_verifier_vlog(log, fmt, args);
346
+ va_end(args);
347
+}
348
+
349
+static const char *ltrim(const char *s)
350
+{
351
+ while (isspace(*s))
352
+ s++;
353
+
354
+ return s;
355
+}
356
+
357
+__printf(3, 4) static void verbose_linfo(struct bpf_verifier_env *env,
358
+ u32 insn_off,
359
+ const char *prefix_fmt, ...)
360
+{
361
+ const struct bpf_line_info *linfo;
362
+
363
+ if (!bpf_verifier_log_needed(&env->log))
364
+ return;
365
+
366
+ linfo = find_linfo(env, insn_off);
367
+ if (!linfo || linfo == env->prev_linfo)
368
+ return;
369
+
370
+ if (prefix_fmt) {
371
+ va_list args;
372
+
373
+ va_start(args, prefix_fmt);
374
+ bpf_verifier_vlog(&env->log, prefix_fmt, args);
375
+ va_end(args);
376
+ }
377
+
378
+ verbose(env, "%s\n",
379
+ ltrim(btf_name_by_offset(env->prog->aux->btf,
380
+ linfo->line_off)));
381
+
382
+ env->prev_linfo = linfo;
383
+}
384
+
246385 static bool type_is_pkt_pointer(enum bpf_reg_type type)
247386 {
248387 return type == PTR_TO_PACKET ||
249388 type == PTR_TO_PACKET_META;
389
+}
390
+
391
+static bool type_is_sk_pointer(enum bpf_reg_type type)
392
+{
393
+ return type == PTR_TO_SOCKET ||
394
+ type == PTR_TO_SOCK_COMMON ||
395
+ type == PTR_TO_TCP_SOCK ||
396
+ type == PTR_TO_XDP_SOCK;
397
+}
398
+
399
+static bool reg_type_not_null(enum bpf_reg_type type)
400
+{
401
+ return type == PTR_TO_SOCKET ||
402
+ type == PTR_TO_TCP_SOCK ||
403
+ type == PTR_TO_MAP_VALUE ||
404
+ type == PTR_TO_SOCK_COMMON;
405
+}
406
+
407
+static bool reg_type_may_be_null(enum bpf_reg_type type)
408
+{
409
+ return type == PTR_TO_MAP_VALUE_OR_NULL ||
410
+ type == PTR_TO_SOCKET_OR_NULL ||
411
+ type == PTR_TO_SOCK_COMMON_OR_NULL ||
412
+ type == PTR_TO_TCP_SOCK_OR_NULL ||
413
+ type == PTR_TO_BTF_ID_OR_NULL ||
414
+ type == PTR_TO_MEM_OR_NULL ||
415
+ type == PTR_TO_RDONLY_BUF_OR_NULL ||
416
+ type == PTR_TO_RDWR_BUF_OR_NULL;
417
+}
418
+
419
+static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
420
+{
421
+ return reg->type == PTR_TO_MAP_VALUE &&
422
+ map_value_has_spin_lock(reg->map_ptr);
423
+}
424
+
425
+static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type)
426
+{
427
+ return type == PTR_TO_SOCKET ||
428
+ type == PTR_TO_SOCKET_OR_NULL ||
429
+ type == PTR_TO_TCP_SOCK ||
430
+ type == PTR_TO_TCP_SOCK_OR_NULL ||
431
+ type == PTR_TO_MEM ||
432
+ type == PTR_TO_MEM_OR_NULL;
433
+}
434
+
435
+static bool arg_type_may_be_refcounted(enum bpf_arg_type type)
436
+{
437
+ return type == ARG_PTR_TO_SOCK_COMMON;
438
+}
439
+
440
+static bool arg_type_may_be_null(enum bpf_arg_type type)
441
+{
442
+ return type == ARG_PTR_TO_MAP_VALUE_OR_NULL ||
443
+ type == ARG_PTR_TO_MEM_OR_NULL ||
444
+ type == ARG_PTR_TO_CTX_OR_NULL ||
445
+ type == ARG_PTR_TO_SOCKET_OR_NULL ||
446
+ type == ARG_PTR_TO_ALLOC_MEM_OR_NULL;
447
+}
448
+
449
+/* Determine whether the function releases some resources allocated by another
450
+ * function call. The first reference type argument will be assumed to be
451
+ * released by release_reference().
452
+ */
453
+static bool is_release_function(enum bpf_func_id func_id)
454
+{
455
+ return func_id == BPF_FUNC_sk_release ||
456
+ func_id == BPF_FUNC_ringbuf_submit ||
457
+ func_id == BPF_FUNC_ringbuf_discard;
458
+}
459
+
460
+static bool may_be_acquire_function(enum bpf_func_id func_id)
461
+{
462
+ return func_id == BPF_FUNC_sk_lookup_tcp ||
463
+ func_id == BPF_FUNC_sk_lookup_udp ||
464
+ func_id == BPF_FUNC_skc_lookup_tcp ||
465
+ func_id == BPF_FUNC_map_lookup_elem ||
466
+ func_id == BPF_FUNC_ringbuf_reserve;
467
+}
468
+
469
+static bool is_acquire_function(enum bpf_func_id func_id,
470
+ const struct bpf_map *map)
471
+{
472
+ enum bpf_map_type map_type = map ? map->map_type : BPF_MAP_TYPE_UNSPEC;
473
+
474
+ if (func_id == BPF_FUNC_sk_lookup_tcp ||
475
+ func_id == BPF_FUNC_sk_lookup_udp ||
476
+ func_id == BPF_FUNC_skc_lookup_tcp ||
477
+ func_id == BPF_FUNC_ringbuf_reserve)
478
+ return true;
479
+
480
+ if (func_id == BPF_FUNC_map_lookup_elem &&
481
+ (map_type == BPF_MAP_TYPE_SOCKMAP ||
482
+ map_type == BPF_MAP_TYPE_SOCKHASH))
483
+ return true;
484
+
485
+ return false;
486
+}
487
+
488
+static bool is_ptr_cast_function(enum bpf_func_id func_id)
489
+{
490
+ return func_id == BPF_FUNC_tcp_sock ||
491
+ func_id == BPF_FUNC_sk_fullsock ||
492
+ func_id == BPF_FUNC_skc_to_tcp_sock ||
493
+ func_id == BPF_FUNC_skc_to_tcp6_sock ||
494
+ func_id == BPF_FUNC_skc_to_udp6_sock ||
495
+ func_id == BPF_FUNC_skc_to_tcp_timewait_sock ||
496
+ func_id == BPF_FUNC_skc_to_tcp_request_sock;
250497 }
251498
252499 /* string representation of 'enum bpf_reg_type' */
....@@ -261,17 +508,44 @@
261508 [PTR_TO_PACKET] = "pkt",
262509 [PTR_TO_PACKET_META] = "pkt_meta",
263510 [PTR_TO_PACKET_END] = "pkt_end",
511
+ [PTR_TO_FLOW_KEYS] = "flow_keys",
512
+ [PTR_TO_SOCKET] = "sock",
513
+ [PTR_TO_SOCKET_OR_NULL] = "sock_or_null",
514
+ [PTR_TO_SOCK_COMMON] = "sock_common",
515
+ [PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null",
516
+ [PTR_TO_TCP_SOCK] = "tcp_sock",
517
+ [PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
518
+ [PTR_TO_TP_BUFFER] = "tp_buffer",
519
+ [PTR_TO_XDP_SOCK] = "xdp_sock",
520
+ [PTR_TO_BTF_ID] = "ptr_",
521
+ [PTR_TO_BTF_ID_OR_NULL] = "ptr_or_null_",
522
+ [PTR_TO_PERCPU_BTF_ID] = "percpu_ptr_",
523
+ [PTR_TO_MEM] = "mem",
524
+ [PTR_TO_MEM_OR_NULL] = "mem_or_null",
525
+ [PTR_TO_RDONLY_BUF] = "rdonly_buf",
526
+ [PTR_TO_RDONLY_BUF_OR_NULL] = "rdonly_buf_or_null",
527
+ [PTR_TO_RDWR_BUF] = "rdwr_buf",
528
+ [PTR_TO_RDWR_BUF_OR_NULL] = "rdwr_buf_or_null",
529
+};
530
+
531
+static char slot_type_char[] = {
532
+ [STACK_INVALID] = '?',
533
+ [STACK_SPILL] = 'r',
534
+ [STACK_MISC] = 'm',
535
+ [STACK_ZERO] = '0',
264536 };
265537
266538 static void print_liveness(struct bpf_verifier_env *env,
267539 enum bpf_reg_liveness live)
268540 {
269
- if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN))
541
+ if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN | REG_LIVE_DONE))
270542 verbose(env, "_");
271543 if (live & REG_LIVE_READ)
272544 verbose(env, "r");
273545 if (live & REG_LIVE_WRITTEN)
274546 verbose(env, "w");
547
+ if (live & REG_LIVE_DONE)
548
+ verbose(env, "D");
275549 }
276550
277551 static struct bpf_func_state *func(struct bpf_verifier_env *env,
....@@ -280,6 +554,12 @@
280554 struct bpf_verifier_state *cur = env->cur_state;
281555
282556 return cur->frame[reg->frameno];
557
+}
558
+
559
+const char *kernel_type_name(u32 id)
560
+{
561
+ return btf_name_by_offset(btf_vmlinux,
562
+ btf_type_by_id(btf_vmlinux, id)->name_off);
283563 }
284564
285565 static void print_verifier_state(struct bpf_verifier_env *env,
....@@ -299,14 +579,20 @@
299579 verbose(env, " R%d", i);
300580 print_liveness(env, reg->live);
301581 verbose(env, "=%s", reg_type_str[t]);
582
+ if (t == SCALAR_VALUE && reg->precise)
583
+ verbose(env, "P");
302584 if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
303585 tnum_is_const(reg->var_off)) {
304586 /* reg->off should be 0 for SCALAR_VALUE */
305587 verbose(env, "%lld", reg->var_off.value + reg->off);
306
- if (t == PTR_TO_STACK)
307
- verbose(env, ",call_%d", func(env, reg)->callsite);
308588 } else {
589
+ if (t == PTR_TO_BTF_ID ||
590
+ t == PTR_TO_BTF_ID_OR_NULL ||
591
+ t == PTR_TO_PERCPU_BTF_ID)
592
+ verbose(env, "%s", kernel_type_name(reg->btf_id));
309593 verbose(env, "(id=%d", reg->id);
594
+ if (reg_type_may_be_refcounted_or_null(t))
595
+ verbose(env, ",ref_obj_id=%d", reg->ref_obj_id);
310596 if (t != SCALAR_VALUE)
311597 verbose(env, ",off=%d", reg->off);
312598 if (type_is_pkt_pointer(t))
....@@ -344,77 +630,189 @@
344630 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
345631 verbose(env, ",var_off=%s", tn_buf);
346632 }
633
+ if (reg->s32_min_value != reg->smin_value &&
634
+ reg->s32_min_value != S32_MIN)
635
+ verbose(env, ",s32_min_value=%d",
636
+ (int)(reg->s32_min_value));
637
+ if (reg->s32_max_value != reg->smax_value &&
638
+ reg->s32_max_value != S32_MAX)
639
+ verbose(env, ",s32_max_value=%d",
640
+ (int)(reg->s32_max_value));
641
+ if (reg->u32_min_value != reg->umin_value &&
642
+ reg->u32_min_value != U32_MIN)
643
+ verbose(env, ",u32_min_value=%d",
644
+ (int)(reg->u32_min_value));
645
+ if (reg->u32_max_value != reg->umax_value &&
646
+ reg->u32_max_value != U32_MAX)
647
+ verbose(env, ",u32_max_value=%d",
648
+ (int)(reg->u32_max_value));
347649 }
348650 verbose(env, ")");
349651 }
350652 }
351653 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
352
- if (state->stack[i].slot_type[0] == STACK_SPILL) {
353
- verbose(env, " fp%d",
354
- (-i - 1) * BPF_REG_SIZE);
355
- print_liveness(env, state->stack[i].spilled_ptr.live);
356
- verbose(env, "=%s",
357
- reg_type_str[state->stack[i].spilled_ptr.type]);
654
+ char types_buf[BPF_REG_SIZE + 1];
655
+ bool valid = false;
656
+ int j;
657
+
658
+ for (j = 0; j < BPF_REG_SIZE; j++) {
659
+ if (state->stack[i].slot_type[j] != STACK_INVALID)
660
+ valid = true;
661
+ types_buf[j] = slot_type_char[
662
+ state->stack[i].slot_type[j]];
358663 }
359
- if (state->stack[i].slot_type[0] == STACK_ZERO)
360
- verbose(env, " fp%d=0", (-i - 1) * BPF_REG_SIZE);
664
+ types_buf[BPF_REG_SIZE] = 0;
665
+ if (!valid)
666
+ continue;
667
+ verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
668
+ print_liveness(env, state->stack[i].spilled_ptr.live);
669
+ if (state->stack[i].slot_type[0] == STACK_SPILL) {
670
+ reg = &state->stack[i].spilled_ptr;
671
+ t = reg->type;
672
+ verbose(env, "=%s", reg_type_str[t]);
673
+ if (t == SCALAR_VALUE && reg->precise)
674
+ verbose(env, "P");
675
+ if (t == SCALAR_VALUE && tnum_is_const(reg->var_off))
676
+ verbose(env, "%lld", reg->var_off.value + reg->off);
677
+ } else {
678
+ verbose(env, "=%s", types_buf);
679
+ }
680
+ }
681
+ if (state->acquired_refs && state->refs[0].id) {
682
+ verbose(env, " refs=%d", state->refs[0].id);
683
+ for (i = 1; i < state->acquired_refs; i++)
684
+ if (state->refs[i].id)
685
+ verbose(env, ",%d", state->refs[i].id);
361686 }
362687 verbose(env, "\n");
363688 }
364689
365
-static int copy_stack_state(struct bpf_func_state *dst,
366
- const struct bpf_func_state *src)
367
-{
368
- if (!src->stack)
369
- return 0;
370
- if (WARN_ON_ONCE(dst->allocated_stack < src->allocated_stack)) {
371
- /* internal bug, make state invalid to reject the program */
372
- memset(dst, 0, sizeof(*dst));
373
- return -EFAULT;
374
- }
375
- memcpy(dst->stack, src->stack,
376
- sizeof(*src->stack) * (src->allocated_stack / BPF_REG_SIZE));
377
- return 0;
690
+#define COPY_STATE_FN(NAME, COUNT, FIELD, SIZE) \
691
+static int copy_##NAME##_state(struct bpf_func_state *dst, \
692
+ const struct bpf_func_state *src) \
693
+{ \
694
+ if (!src->FIELD) \
695
+ return 0; \
696
+ if (WARN_ON_ONCE(dst->COUNT < src->COUNT)) { \
697
+ /* internal bug, make state invalid to reject the program */ \
698
+ memset(dst, 0, sizeof(*dst)); \
699
+ return -EFAULT; \
700
+ } \
701
+ memcpy(dst->FIELD, src->FIELD, \
702
+ sizeof(*src->FIELD) * (src->COUNT / SIZE)); \
703
+ return 0; \
378704 }
705
+/* copy_reference_state() */
706
+COPY_STATE_FN(reference, acquired_refs, refs, 1)
707
+/* copy_stack_state() */
708
+COPY_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
709
+#undef COPY_STATE_FN
710
+
711
+#define REALLOC_STATE_FN(NAME, COUNT, FIELD, SIZE) \
712
+static int realloc_##NAME##_state(struct bpf_func_state *state, int size, \
713
+ bool copy_old) \
714
+{ \
715
+ u32 old_size = state->COUNT; \
716
+ struct bpf_##NAME##_state *new_##FIELD; \
717
+ int slot = size / SIZE; \
718
+ \
719
+ if (size <= old_size || !size) { \
720
+ if (copy_old) \
721
+ return 0; \
722
+ state->COUNT = slot * SIZE; \
723
+ if (!size && old_size) { \
724
+ kfree(state->FIELD); \
725
+ state->FIELD = NULL; \
726
+ } \
727
+ return 0; \
728
+ } \
729
+ new_##FIELD = kmalloc_array(slot, sizeof(struct bpf_##NAME##_state), \
730
+ GFP_KERNEL); \
731
+ if (!new_##FIELD) \
732
+ return -ENOMEM; \
733
+ if (copy_old) { \
734
+ if (state->FIELD) \
735
+ memcpy(new_##FIELD, state->FIELD, \
736
+ sizeof(*new_##FIELD) * (old_size / SIZE)); \
737
+ memset(new_##FIELD + old_size / SIZE, 0, \
738
+ sizeof(*new_##FIELD) * (size - old_size) / SIZE); \
739
+ } \
740
+ state->COUNT = slot * SIZE; \
741
+ kfree(state->FIELD); \
742
+ state->FIELD = new_##FIELD; \
743
+ return 0; \
744
+}
745
+/* realloc_reference_state() */
746
+REALLOC_STATE_FN(reference, acquired_refs, refs, 1)
747
+/* realloc_stack_state() */
748
+REALLOC_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
749
+#undef REALLOC_STATE_FN
379750
380751 /* do_check() starts with zero-sized stack in struct bpf_verifier_state to
381752 * make it consume minimal amount of memory. check_stack_write() access from
382753 * the program calls into realloc_func_state() to grow the stack size.
383
- * Note there is a non-zero parent pointer inside each reg of bpf_verifier_state
384
- * which this function copies over. It points to corresponding reg in previous
385
- * bpf_verifier_state which is never reallocated
754
+ * Note there is a non-zero 'parent' pointer inside bpf_verifier_state
755
+ * which realloc_stack_state() copies over. It points to previous
756
+ * bpf_verifier_state which is never reallocated.
386757 */
387
-static int realloc_func_state(struct bpf_func_state *state, int size,
388
- bool copy_old)
758
+static int realloc_func_state(struct bpf_func_state *state, int stack_size,
759
+ int refs_size, bool copy_old)
389760 {
390
- u32 old_size = state->allocated_stack;
391
- struct bpf_stack_state *new_stack;
392
- int slot = size / BPF_REG_SIZE;
761
+ int err = realloc_reference_state(state, refs_size, copy_old);
762
+ if (err)
763
+ return err;
764
+ return realloc_stack_state(state, stack_size, copy_old);
765
+}
393766
394
- if (size <= old_size || !size) {
395
- if (copy_old)
767
+/* Acquire a pointer id from the env and update the state->refs to include
768
+ * this new pointer reference.
769
+ * On success, returns a valid pointer id to associate with the register
770
+ * On failure, returns a negative errno.
771
+ */
772
+static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
773
+{
774
+ struct bpf_func_state *state = cur_func(env);
775
+ int new_ofs = state->acquired_refs;
776
+ int id, err;
777
+
778
+ err = realloc_reference_state(state, state->acquired_refs + 1, true);
779
+ if (err)
780
+ return err;
781
+ id = ++env->id_gen;
782
+ state->refs[new_ofs].id = id;
783
+ state->refs[new_ofs].insn_idx = insn_idx;
784
+
785
+ return id;
786
+}
787
+
788
+/* release function corresponding to acquire_reference_state(). Idempotent. */
789
+static int release_reference_state(struct bpf_func_state *state, int ptr_id)
790
+{
791
+ int i, last_idx;
792
+
793
+ last_idx = state->acquired_refs - 1;
794
+ for (i = 0; i < state->acquired_refs; i++) {
795
+ if (state->refs[i].id == ptr_id) {
796
+ if (last_idx && i != last_idx)
797
+ memcpy(&state->refs[i], &state->refs[last_idx],
798
+ sizeof(*state->refs));
799
+ memset(&state->refs[last_idx], 0, sizeof(*state->refs));
800
+ state->acquired_refs--;
396801 return 0;
397
- state->allocated_stack = slot * BPF_REG_SIZE;
398
- if (!size && old_size) {
399
- kfree(state->stack);
400
- state->stack = NULL;
401802 }
402
- return 0;
403803 }
404
- new_stack = kmalloc_array(slot, sizeof(struct bpf_stack_state),
405
- GFP_KERNEL);
406
- if (!new_stack)
407
- return -ENOMEM;
408
- if (copy_old) {
409
- if (state->stack)
410
- memcpy(new_stack, state->stack,
411
- sizeof(*new_stack) * (old_size / BPF_REG_SIZE));
412
- memset(new_stack + old_size / BPF_REG_SIZE, 0,
413
- sizeof(*new_stack) * (size - old_size) / BPF_REG_SIZE);
414
- }
415
- state->allocated_stack = slot * BPF_REG_SIZE;
416
- kfree(state->stack);
417
- state->stack = new_stack;
804
+ return -EINVAL;
805
+}
806
+
807
+static int transfer_reference_state(struct bpf_func_state *dst,
808
+ struct bpf_func_state *src)
809
+{
810
+ int err = realloc_reference_state(dst, src->acquired_refs, false);
811
+ if (err)
812
+ return err;
813
+ err = copy_reference_state(dst, src);
814
+ if (err)
815
+ return err;
418816 return 0;
419817 }
420818
....@@ -422,8 +820,16 @@
422820 {
423821 if (!state)
424822 return;
823
+ kfree(state->refs);
425824 kfree(state->stack);
426825 kfree(state);
826
+}
827
+
828
+static void clear_jmp_history(struct bpf_verifier_state *state)
829
+{
830
+ kfree(state->jmp_history);
831
+ state->jmp_history = NULL;
832
+ state->jmp_history_cnt = 0;
427833 }
428834
429835 static void free_verifier_state(struct bpf_verifier_state *state,
....@@ -435,6 +841,7 @@
435841 free_func_state(state->frame[i]);
436842 state->frame[i] = NULL;
437843 }
844
+ clear_jmp_history(state);
438845 if (free_self)
439846 kfree(state);
440847 }
....@@ -447,10 +854,14 @@
447854 {
448855 int err;
449856
450
- err = realloc_func_state(dst, src->allocated_stack, false);
857
+ err = realloc_func_state(dst, src->allocated_stack, src->acquired_refs,
858
+ false);
451859 if (err)
452860 return err;
453
- memcpy(dst, src, offsetof(struct bpf_func_state, allocated_stack));
861
+ memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs));
862
+ err = copy_reference_state(dst, src);
863
+ if (err)
864
+ return err;
454865 return copy_stack_state(dst, src);
455866 }
456867
....@@ -458,7 +869,17 @@
458869 const struct bpf_verifier_state *src)
459870 {
460871 struct bpf_func_state *dst;
872
+ u32 jmp_sz = sizeof(struct bpf_idx_pair) * src->jmp_history_cnt;
461873 int i, err;
874
+
875
+ if (dst_state->jmp_history_cnt < src->jmp_history_cnt) {
876
+ kfree(dst_state->jmp_history);
877
+ dst_state->jmp_history = kmalloc(jmp_sz, GFP_USER);
878
+ if (!dst_state->jmp_history)
879
+ return -ENOMEM;
880
+ }
881
+ memcpy(dst_state->jmp_history, src->jmp_history, jmp_sz);
882
+ dst_state->jmp_history_cnt = src->jmp_history_cnt;
462883
463884 /* if dst has more stack frames then src frame, free them */
464885 for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
....@@ -467,6 +888,11 @@
467888 }
468889 dst_state->speculative = src->speculative;
469890 dst_state->curframe = src->curframe;
891
+ dst_state->active_spin_lock = src->active_spin_lock;
892
+ dst_state->branches = src->branches;
893
+ dst_state->parent = src->parent;
894
+ dst_state->first_insn_idx = src->first_insn_idx;
895
+ dst_state->last_insn_idx = src->last_insn_idx;
470896 for (i = 0; i <= src->curframe; i++) {
471897 dst = dst_state->frame[i];
472898 if (!dst) {
....@@ -482,8 +908,25 @@
482908 return 0;
483909 }
484910
911
+static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
912
+{
913
+ while (st) {
914
+ u32 br = --st->branches;
915
+
916
+ /* WARN_ON(br > 1) technically makes sense here,
917
+ * but see comment in push_stack(), hence:
918
+ */
919
+ WARN_ONCE((int)br < 0,
920
+ "BUG update_branch_counts:branches_to_explore=%d\n",
921
+ br);
922
+ if (br)
923
+ break;
924
+ st = st->parent;
925
+ }
926
+}
927
+
485928 static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
486
- int *insn_idx)
929
+ int *insn_idx, bool pop_log)
487930 {
488931 struct bpf_verifier_state *cur = env->cur_state;
489932 struct bpf_verifier_stack_elem *elem, *head = env->head;
....@@ -497,6 +940,8 @@
497940 if (err)
498941 return err;
499942 }
943
+ if (pop_log)
944
+ bpf_vlog_reset(&env->log, head->log_pos);
500945 if (insn_idx)
501946 *insn_idx = head->insn_idx;
502947 if (prev_insn_idx)
....@@ -524,22 +969,36 @@
524969 elem->insn_idx = insn_idx;
525970 elem->prev_insn_idx = prev_insn_idx;
526971 elem->next = env->head;
972
+ elem->log_pos = env->log.len_used;
527973 env->head = elem;
528974 env->stack_size++;
529975 err = copy_verifier_state(&elem->st, cur);
530976 if (err)
531977 goto err;
532978 elem->st.speculative |= speculative;
533
- if (env->stack_size > BPF_COMPLEXITY_LIMIT_STACK) {
534
- verbose(env, "BPF program is too complex\n");
979
+ if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
980
+ verbose(env, "The sequence of %d jumps is too complex.\n",
981
+ env->stack_size);
535982 goto err;
983
+ }
984
+ if (elem->st.parent) {
985
+ ++elem->st.parent->branches;
986
+ /* WARN_ON(branches > 2) technically makes sense here,
987
+ * but
988
+ * 1. speculative states will bump 'branches' for non-branch
989
+ * instructions
990
+ * 2. is_state_visited() heuristics may decide not to create
991
+ * a new state for a sequence of branches and all such current
992
+ * and cloned states will be pointing to a single parent state
993
+ * which might have large 'branches' count.
994
+ */
536995 }
537996 return &elem->st;
538997 err:
539998 free_verifier_state(env->cur_state, true);
540999 env->cur_state = NULL;
5411000 /* pop all elements and return */
542
- while (!pop_stack(env, NULL, NULL));
1001
+ while (!pop_stack(env, NULL, NULL, false));
5431002 return NULL;
5441003 }
5451004
....@@ -548,7 +1007,23 @@
5481007 BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
5491008 };
5501009
551
-static void __mark_reg_not_init(struct bpf_reg_state *reg);
1010
+static void __mark_reg_not_init(const struct bpf_verifier_env *env,
1011
+ struct bpf_reg_state *reg);
1012
+
1013
+/* This helper doesn't clear reg->id */
1014
+static void ___mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1015
+{
1016
+ reg->var_off = tnum_const(imm);
1017
+ reg->smin_value = (s64)imm;
1018
+ reg->smax_value = (s64)imm;
1019
+ reg->umin_value = imm;
1020
+ reg->umax_value = imm;
1021
+
1022
+ reg->s32_min_value = (s32)imm;
1023
+ reg->s32_max_value = (s32)imm;
1024
+ reg->u32_min_value = (u32)imm;
1025
+ reg->u32_max_value = (u32)imm;
1026
+}
5521027
5531028 /* Mark the unknown part of a register (variable offset or scalar value) as
5541029 * known to have the value @imm.
....@@ -558,11 +1033,16 @@
5581033 /* Clear id, off, and union(map_ptr, range) */
5591034 memset(((u8 *)reg) + sizeof(reg->type), 0,
5601035 offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
561
- reg->var_off = tnum_const(imm);
562
- reg->smin_value = (s64)imm;
563
- reg->smax_value = (s64)imm;
564
- reg->umin_value = imm;
565
- reg->umax_value = imm;
1036
+ ___mark_reg_known(reg, imm);
1037
+}
1038
+
1039
+static void __mark_reg32_known(struct bpf_reg_state *reg, u64 imm)
1040
+{
1041
+ reg->var_off = tnum_const_subreg(reg->var_off, imm);
1042
+ reg->s32_min_value = (s32)imm;
1043
+ reg->s32_max_value = (s32)imm;
1044
+ reg->u32_min_value = (u32)imm;
1045
+ reg->u32_max_value = (u32)imm;
5661046 }
5671047
5681048 /* Mark the 'variable offset' part of a register as zero. This should be
....@@ -586,7 +1066,7 @@
5861066 verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
5871067 /* Something bad happened, let's kill all regs */
5881068 for (regno = 0; regno < MAX_BPF_REG; regno++)
589
- __mark_reg_not_init(regs + regno);
1069
+ __mark_reg_not_init(env, regs + regno);
5901070 return;
5911071 }
5921072 __mark_reg_known_zero(regs + regno);
....@@ -617,8 +1097,52 @@
6171097 tnum_equals_const(reg->var_off, 0);
6181098 }
6191099
620
-/* Attempts to improve min/max values based on var_off information */
621
-static void __update_reg_bounds(struct bpf_reg_state *reg)
1100
+/* Reset the min/max bounds of a register */
1101
+static void __mark_reg_unbounded(struct bpf_reg_state *reg)
1102
+{
1103
+ reg->smin_value = S64_MIN;
1104
+ reg->smax_value = S64_MAX;
1105
+ reg->umin_value = 0;
1106
+ reg->umax_value = U64_MAX;
1107
+
1108
+ reg->s32_min_value = S32_MIN;
1109
+ reg->s32_max_value = S32_MAX;
1110
+ reg->u32_min_value = 0;
1111
+ reg->u32_max_value = U32_MAX;
1112
+}
1113
+
1114
+static void __mark_reg64_unbounded(struct bpf_reg_state *reg)
1115
+{
1116
+ reg->smin_value = S64_MIN;
1117
+ reg->smax_value = S64_MAX;
1118
+ reg->umin_value = 0;
1119
+ reg->umax_value = U64_MAX;
1120
+}
1121
+
1122
+static void __mark_reg32_unbounded(struct bpf_reg_state *reg)
1123
+{
1124
+ reg->s32_min_value = S32_MIN;
1125
+ reg->s32_max_value = S32_MAX;
1126
+ reg->u32_min_value = 0;
1127
+ reg->u32_max_value = U32_MAX;
1128
+}
1129
+
1130
+static void __update_reg32_bounds(struct bpf_reg_state *reg)
1131
+{
1132
+ struct tnum var32_off = tnum_subreg(reg->var_off);
1133
+
1134
+ /* min signed is max(sign bit) | min(other bits) */
1135
+ reg->s32_min_value = max_t(s32, reg->s32_min_value,
1136
+ var32_off.value | (var32_off.mask & S32_MIN));
1137
+ /* max signed is min(sign bit) | max(other bits) */
1138
+ reg->s32_max_value = min_t(s32, reg->s32_max_value,
1139
+ var32_off.value | (var32_off.mask & S32_MAX));
1140
+ reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)var32_off.value);
1141
+ reg->u32_max_value = min(reg->u32_max_value,
1142
+ (u32)(var32_off.value | var32_off.mask));
1143
+}
1144
+
1145
+static void __update_reg64_bounds(struct bpf_reg_state *reg)
6221146 {
6231147 /* min signed is max(sign bit) | min(other bits) */
6241148 reg->smin_value = max_t(s64, reg->smin_value,
....@@ -631,8 +1155,48 @@
6311155 reg->var_off.value | reg->var_off.mask);
6321156 }
6331157
1158
+static void __update_reg_bounds(struct bpf_reg_state *reg)
1159
+{
1160
+ __update_reg32_bounds(reg);
1161
+ __update_reg64_bounds(reg);
1162
+}
1163
+
6341164 /* Uses signed min/max values to inform unsigned, and vice-versa */
635
-static void __reg_deduce_bounds(struct bpf_reg_state *reg)
1165
+static void __reg32_deduce_bounds(struct bpf_reg_state *reg)
1166
+{
1167
+ /* Learn sign from signed bounds.
1168
+ * If we cannot cross the sign boundary, then signed and unsigned bounds
1169
+ * are the same, so combine. This works even in the negative case, e.g.
1170
+ * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
1171
+ */
1172
+ if (reg->s32_min_value >= 0 || reg->s32_max_value < 0) {
1173
+ reg->s32_min_value = reg->u32_min_value =
1174
+ max_t(u32, reg->s32_min_value, reg->u32_min_value);
1175
+ reg->s32_max_value = reg->u32_max_value =
1176
+ min_t(u32, reg->s32_max_value, reg->u32_max_value);
1177
+ return;
1178
+ }
1179
+ /* Learn sign from unsigned bounds. Signed bounds cross the sign
1180
+ * boundary, so we must be careful.
1181
+ */
1182
+ if ((s32)reg->u32_max_value >= 0) {
1183
+ /* Positive. We can't learn anything from the smin, but smax
1184
+ * is positive, hence safe.
1185
+ */
1186
+ reg->s32_min_value = reg->u32_min_value;
1187
+ reg->s32_max_value = reg->u32_max_value =
1188
+ min_t(u32, reg->s32_max_value, reg->u32_max_value);
1189
+ } else if ((s32)reg->u32_min_value < 0) {
1190
+ /* Negative. We can't learn anything from the smax, but smin
1191
+ * is negative, hence safe.
1192
+ */
1193
+ reg->s32_min_value = reg->u32_min_value =
1194
+ max_t(u32, reg->s32_min_value, reg->u32_min_value);
1195
+ reg->s32_max_value = reg->u32_max_value;
1196
+ }
1197
+}
1198
+
1199
+static void __reg64_deduce_bounds(struct bpf_reg_state *reg)
6361200 {
6371201 /* Learn sign from signed bounds.
6381202 * If we cannot cross the sign boundary, then signed and unsigned bounds
....@@ -666,25 +1230,112 @@
6661230 }
6671231 }
6681232
1233
+static void __reg_deduce_bounds(struct bpf_reg_state *reg)
1234
+{
1235
+ __reg32_deduce_bounds(reg);
1236
+ __reg64_deduce_bounds(reg);
1237
+}
1238
+
6691239 /* Attempts to improve var_off based on unsigned min/max information */
6701240 static void __reg_bound_offset(struct bpf_reg_state *reg)
6711241 {
672
- reg->var_off = tnum_intersect(reg->var_off,
673
- tnum_range(reg->umin_value,
674
- reg->umax_value));
1242
+ struct tnum var64_off = tnum_intersect(reg->var_off,
1243
+ tnum_range(reg->umin_value,
1244
+ reg->umax_value));
1245
+ struct tnum var32_off = tnum_intersect(tnum_subreg(reg->var_off),
1246
+ tnum_range(reg->u32_min_value,
1247
+ reg->u32_max_value));
1248
+
1249
+ reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off);
6751250 }
6761251
677
-/* Reset the min/max bounds of a register */
678
-static void __mark_reg_unbounded(struct bpf_reg_state *reg)
1252
+static void reg_bounds_sync(struct bpf_reg_state *reg)
6791253 {
680
- reg->smin_value = S64_MIN;
681
- reg->smax_value = S64_MAX;
682
- reg->umin_value = 0;
683
- reg->umax_value = U64_MAX;
1254
+ /* We might have learned new bounds from the var_off. */
1255
+ __update_reg_bounds(reg);
1256
+ /* We might have learned something about the sign bit. */
1257
+ __reg_deduce_bounds(reg);
1258
+ /* We might have learned some bits from the bounds. */
1259
+ __reg_bound_offset(reg);
1260
+ /* Intersecting with the old var_off might have improved our bounds
1261
+ * slightly, e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
1262
+ * then new var_off is (0; 0x7f...fc) which improves our umax.
1263
+ */
1264
+ __update_reg_bounds(reg);
1265
+}
1266
+
1267
+static bool __reg32_bound_s64(s32 a)
1268
+{
1269
+ return a >= 0 && a <= S32_MAX;
1270
+}
1271
+
1272
+static void __reg_assign_32_into_64(struct bpf_reg_state *reg)
1273
+{
1274
+ reg->umin_value = reg->u32_min_value;
1275
+ reg->umax_value = reg->u32_max_value;
1276
+
1277
+ /* Attempt to pull 32-bit signed bounds into 64-bit bounds but must
1278
+ * be positive otherwise set to worse case bounds and refine later
1279
+ * from tnum.
1280
+ */
1281
+ if (__reg32_bound_s64(reg->s32_min_value) &&
1282
+ __reg32_bound_s64(reg->s32_max_value)) {
1283
+ reg->smin_value = reg->s32_min_value;
1284
+ reg->smax_value = reg->s32_max_value;
1285
+ } else {
1286
+ reg->smin_value = 0;
1287
+ reg->smax_value = U32_MAX;
1288
+ }
1289
+}
1290
+
1291
+static void __reg_combine_32_into_64(struct bpf_reg_state *reg)
1292
+{
1293
+ /* special case when 64-bit register has upper 32-bit register
1294
+ * zeroed. Typically happens after zext or <<32, >>32 sequence
1295
+ * allowing us to use 32-bit bounds directly,
1296
+ */
1297
+ if (tnum_equals_const(tnum_clear_subreg(reg->var_off), 0)) {
1298
+ __reg_assign_32_into_64(reg);
1299
+ } else {
1300
+ /* Otherwise the best we can do is push lower 32bit known and
1301
+ * unknown bits into register (var_off set from jmp logic)
1302
+ * then learn as much as possible from the 64-bit tnum
1303
+ * known and unknown bits. The previous smin/smax bounds are
1304
+ * invalid here because of jmp32 compare so mark them unknown
1305
+ * so they do not impact tnum bounds calculation.
1306
+ */
1307
+ __mark_reg64_unbounded(reg);
1308
+ }
1309
+ reg_bounds_sync(reg);
1310
+}
1311
+
1312
+static bool __reg64_bound_s32(s64 a)
1313
+{
1314
+ return a >= S32_MIN && a <= S32_MAX;
1315
+}
1316
+
1317
+static bool __reg64_bound_u32(u64 a)
1318
+{
1319
+ return a >= U32_MIN && a <= U32_MAX;
1320
+}
1321
+
1322
+static void __reg_combine_64_into_32(struct bpf_reg_state *reg)
1323
+{
1324
+ __mark_reg32_unbounded(reg);
1325
+ if (__reg64_bound_s32(reg->smin_value) && __reg64_bound_s32(reg->smax_value)) {
1326
+ reg->s32_min_value = (s32)reg->smin_value;
1327
+ reg->s32_max_value = (s32)reg->smax_value;
1328
+ }
1329
+ if (__reg64_bound_u32(reg->umin_value) && __reg64_bound_u32(reg->umax_value)) {
1330
+ reg->u32_min_value = (u32)reg->umin_value;
1331
+ reg->u32_max_value = (u32)reg->umax_value;
1332
+ }
1333
+ reg_bounds_sync(reg);
6841334 }
6851335
6861336 /* Mark a register as having a completely unknown (scalar) value. */
687
-static void __mark_reg_unknown(struct bpf_reg_state *reg)
1337
+static void __mark_reg_unknown(const struct bpf_verifier_env *env,
1338
+ struct bpf_reg_state *reg)
6881339 {
6891340 /*
6901341 * Clear type, id, off, and union(map_ptr, range) and
....@@ -694,6 +1345,7 @@
6941345 reg->type = SCALAR_VALUE;
6951346 reg->var_off = tnum_unknown;
6961347 reg->frameno = 0;
1348
+ reg->precise = env->subprog_cnt > 1 || !env->bpf_capable;
6971349 __mark_reg_unbounded(reg);
6981350 }
6991351
....@@ -704,15 +1356,16 @@
7041356 verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
7051357 /* Something bad happened, let's kill all regs except FP */
7061358 for (regno = 0; regno < BPF_REG_FP; regno++)
707
- __mark_reg_not_init(regs + regno);
1359
+ __mark_reg_not_init(env, regs + regno);
7081360 return;
7091361 }
710
- __mark_reg_unknown(regs + regno);
1362
+ __mark_reg_unknown(env, regs + regno);
7111363 }
7121364
713
-static void __mark_reg_not_init(struct bpf_reg_state *reg)
1365
+static void __mark_reg_not_init(const struct bpf_verifier_env *env,
1366
+ struct bpf_reg_state *reg)
7141367 {
715
- __mark_reg_unknown(reg);
1368
+ __mark_reg_unknown(env, reg);
7161369 reg->type = NOT_INIT;
7171370 }
7181371
....@@ -723,12 +1376,26 @@
7231376 verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
7241377 /* Something bad happened, let's kill all regs except FP */
7251378 for (regno = 0; regno < BPF_REG_FP; regno++)
726
- __mark_reg_not_init(regs + regno);
1379
+ __mark_reg_not_init(env, regs + regno);
7271380 return;
7281381 }
729
- __mark_reg_not_init(regs + regno);
1382
+ __mark_reg_not_init(env, regs + regno);
7301383 }
7311384
1385
+static void mark_btf_ld_reg(struct bpf_verifier_env *env,
1386
+ struct bpf_reg_state *regs, u32 regno,
1387
+ enum bpf_reg_type reg_type, u32 btf_id)
1388
+{
1389
+ if (reg_type == SCALAR_VALUE) {
1390
+ mark_reg_unknown(env, regs, regno);
1391
+ return;
1392
+ }
1393
+ mark_reg_known_zero(env, regs, regno);
1394
+ regs[regno].type = PTR_TO_BTF_ID;
1395
+ regs[regno].btf_id = btf_id;
1396
+}
1397
+
1398
+#define DEF_NOT_SUBREG (0)
7321399 static void init_reg_state(struct bpf_verifier_env *env,
7331400 struct bpf_func_state *state)
7341401 {
....@@ -739,16 +1406,13 @@
7391406 mark_reg_not_init(env, regs, i);
7401407 regs[i].live = REG_LIVE_NONE;
7411408 regs[i].parent = NULL;
1409
+ regs[i].subreg_def = DEF_NOT_SUBREG;
7421410 }
7431411
7441412 /* frame pointer */
7451413 regs[BPF_REG_FP].type = PTR_TO_STACK;
7461414 mark_reg_known_zero(env, regs, BPF_REG_FP);
7471415 regs[BPF_REG_FP].frameno = state->frameno;
748
-
749
- /* 1st arg to a function */
750
- regs[BPF_REG_1].type = PTR_TO_CTX;
751
- mark_reg_known_zero(env, regs, BPF_REG_1);
7521416 }
7531417
7541418 #define BPF_MAIN_FUNC (-1)
....@@ -826,13 +1490,10 @@
8261490 continue;
8271491 if (insn[i].src_reg != BPF_PSEUDO_CALL)
8281492 continue;
829
- if (!env->allow_ptr_leaks) {
830
- verbose(env, "function calls to other bpf functions are allowed for root only\n");
1493
+ if (!env->bpf_capable) {
1494
+ verbose(env,
1495
+ "function calls to other bpf functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
8311496 return -EPERM;
832
- }
833
- if (bpf_prog_is_dev_bound(env->prog->aux)) {
834
- verbose(env, "function calls in offloaded programs are not supported yet\n");
835
- return -EINVAL;
8361497 }
8371498 ret = add_subprog(env, i + insn[i].imm + 1);
8381499 if (ret < 0)
....@@ -844,7 +1505,7 @@
8441505 */
8451506 subprog[env->subprog_cnt].start = insn_cnt;
8461507
847
- if (env->log.level > 1)
1508
+ if (env->log.level & BPF_LOG_LEVEL2)
8481509 for (i = 0; i < env->subprog_cnt; i++)
8491510 verbose(env, "func#%d @%d\n", i, subprog[i].start);
8501511
....@@ -854,7 +1515,14 @@
8541515 for (i = 0; i < insn_cnt; i++) {
8551516 u8 code = insn[i].code;
8561517
857
- if (BPF_CLASS(code) != BPF_JMP)
1518
+ if (code == (BPF_JMP | BPF_CALL) &&
1519
+ insn[i].imm == BPF_FUNC_tail_call &&
1520
+ insn[i].src_reg != BPF_PSEUDO_CALL)
1521
+ subprog[cur_subprog].has_tail_call = true;
1522
+ if (BPF_CLASS(code) == BPF_LD &&
1523
+ (BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND))
1524
+ subprog[cur_subprog].has_ld_abs = true;
1525
+ if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32)
8581526 goto next;
8591527 if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
8601528 goto next;
....@@ -888,21 +1556,165 @@
8881556 */
8891557 static int mark_reg_read(struct bpf_verifier_env *env,
8901558 const struct bpf_reg_state *state,
891
- struct bpf_reg_state *parent)
1559
+ struct bpf_reg_state *parent, u8 flag)
8921560 {
8931561 bool writes = parent == state->parent; /* Observe write marks */
1562
+ int cnt = 0;
8941563
8951564 while (parent) {
8961565 /* if read wasn't screened by an earlier write ... */
8971566 if (writes && state->live & REG_LIVE_WRITTEN)
8981567 break;
1568
+ if (parent->live & REG_LIVE_DONE) {
1569
+ verbose(env, "verifier BUG type %s var_off %lld off %d\n",
1570
+ reg_type_str[parent->type],
1571
+ parent->var_off.value, parent->off);
1572
+ return -EFAULT;
1573
+ }
1574
+ /* The first condition is more likely to be true than the
1575
+ * second, checked it first.
1576
+ */
1577
+ if ((parent->live & REG_LIVE_READ) == flag ||
1578
+ parent->live & REG_LIVE_READ64)
1579
+ /* The parentage chain never changes and
1580
+ * this parent was already marked as LIVE_READ.
1581
+ * There is no need to keep walking the chain again and
1582
+ * keep re-marking all parents as LIVE_READ.
1583
+ * This case happens when the same register is read
1584
+ * multiple times without writes into it in-between.
1585
+ * Also, if parent has the stronger REG_LIVE_READ64 set,
1586
+ * then no need to set the weak REG_LIVE_READ32.
1587
+ */
1588
+ break;
8991589 /* ... then we depend on parent's value */
900
- parent->live |= REG_LIVE_READ;
1590
+ parent->live |= flag;
1591
+ /* REG_LIVE_READ64 overrides REG_LIVE_READ32. */
1592
+ if (flag == REG_LIVE_READ64)
1593
+ parent->live &= ~REG_LIVE_READ32;
9011594 state = parent;
9021595 parent = state->parent;
9031596 writes = true;
1597
+ cnt++;
9041598 }
1599
+
1600
+ if (env->longest_mark_read_walk < cnt)
1601
+ env->longest_mark_read_walk = cnt;
9051602 return 0;
1603
+}
1604
+
1605
+/* This function is supposed to be used by the following 32-bit optimization
1606
+ * code only. It returns TRUE if the source or destination register operates
1607
+ * on 64-bit, otherwise return FALSE.
1608
+ */
1609
+static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn,
1610
+ u32 regno, struct bpf_reg_state *reg, enum reg_arg_type t)
1611
+{
1612
+ u8 code, class, op;
1613
+
1614
+ code = insn->code;
1615
+ class = BPF_CLASS(code);
1616
+ op = BPF_OP(code);
1617
+ if (class == BPF_JMP) {
1618
+ /* BPF_EXIT for "main" will reach here. Return TRUE
1619
+ * conservatively.
1620
+ */
1621
+ if (op == BPF_EXIT)
1622
+ return true;
1623
+ if (op == BPF_CALL) {
1624
+ /* BPF to BPF call will reach here because of marking
1625
+ * caller saved clobber with DST_OP_NO_MARK for which we
1626
+ * don't care the register def because they are anyway
1627
+ * marked as NOT_INIT already.
1628
+ */
1629
+ if (insn->src_reg == BPF_PSEUDO_CALL)
1630
+ return false;
1631
+ /* Helper call will reach here because of arg type
1632
+ * check, conservatively return TRUE.
1633
+ */
1634
+ if (t == SRC_OP)
1635
+ return true;
1636
+
1637
+ return false;
1638
+ }
1639
+ }
1640
+
1641
+ if (class == BPF_ALU64 || class == BPF_JMP ||
1642
+ /* BPF_END always use BPF_ALU class. */
1643
+ (class == BPF_ALU && op == BPF_END && insn->imm == 64))
1644
+ return true;
1645
+
1646
+ if (class == BPF_ALU || class == BPF_JMP32)
1647
+ return false;
1648
+
1649
+ if (class == BPF_LDX) {
1650
+ if (t != SRC_OP)
1651
+ return BPF_SIZE(code) == BPF_DW;
1652
+ /* LDX source must be ptr. */
1653
+ return true;
1654
+ }
1655
+
1656
+ if (class == BPF_STX) {
1657
+ if (reg->type != SCALAR_VALUE)
1658
+ return true;
1659
+ return BPF_SIZE(code) == BPF_DW;
1660
+ }
1661
+
1662
+ if (class == BPF_LD) {
1663
+ u8 mode = BPF_MODE(code);
1664
+
1665
+ /* LD_IMM64 */
1666
+ if (mode == BPF_IMM)
1667
+ return true;
1668
+
1669
+ /* Both LD_IND and LD_ABS return 32-bit data. */
1670
+ if (t != SRC_OP)
1671
+ return false;
1672
+
1673
+ /* Implicit ctx ptr. */
1674
+ if (regno == BPF_REG_6)
1675
+ return true;
1676
+
1677
+ /* Explicit source could be any width. */
1678
+ return true;
1679
+ }
1680
+
1681
+ if (class == BPF_ST)
1682
+ /* The only source register for BPF_ST is a ptr. */
1683
+ return true;
1684
+
1685
+ /* Conservatively return true at default. */
1686
+ return true;
1687
+}
1688
+
1689
+/* Return TRUE if INSN doesn't have explicit value define. */
1690
+static bool insn_no_def(struct bpf_insn *insn)
1691
+{
1692
+ u8 class = BPF_CLASS(insn->code);
1693
+
1694
+ return (class == BPF_JMP || class == BPF_JMP32 ||
1695
+ class == BPF_STX || class == BPF_ST);
1696
+}
1697
+
1698
+/* Return TRUE if INSN has defined any 32-bit value explicitly. */
1699
+static bool insn_has_def32(struct bpf_verifier_env *env, struct bpf_insn *insn)
1700
+{
1701
+ if (insn_no_def(insn))
1702
+ return false;
1703
+
1704
+ return !is_reg64(env, insn, insn->dst_reg, NULL, DST_OP);
1705
+}
1706
+
1707
+static void mark_insn_zext(struct bpf_verifier_env *env,
1708
+ struct bpf_reg_state *reg)
1709
+{
1710
+ s32 def_idx = reg->subreg_def;
1711
+
1712
+ if (def_idx == DEF_NOT_SUBREG)
1713
+ return;
1714
+
1715
+ env->insn_aux_data[def_idx - 1].zext_dst = true;
1716
+ /* The dst will be zero extended, so won't be sub-register anymore. */
1717
+ reg->subreg_def = DEF_NOT_SUBREG;
9061718 }
9071719
9081720 static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
....@@ -910,34 +1722,473 @@
9101722 {
9111723 struct bpf_verifier_state *vstate = env->cur_state;
9121724 struct bpf_func_state *state = vstate->frame[vstate->curframe];
913
- struct bpf_reg_state *regs = state->regs;
1725
+ struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
1726
+ struct bpf_reg_state *reg, *regs = state->regs;
1727
+ bool rw64;
9141728
9151729 if (regno >= MAX_BPF_REG) {
9161730 verbose(env, "R%d is invalid\n", regno);
9171731 return -EINVAL;
9181732 }
9191733
1734
+ reg = &regs[regno];
1735
+ rw64 = is_reg64(env, insn, regno, reg, t);
9201736 if (t == SRC_OP) {
9211737 /* check whether register used as source operand can be read */
922
- if (regs[regno].type == NOT_INIT) {
1738
+ if (reg->type == NOT_INIT) {
9231739 verbose(env, "R%d !read_ok\n", regno);
9241740 return -EACCES;
9251741 }
9261742 /* We don't need to worry about FP liveness because it's read-only */
927
- if (regno != BPF_REG_FP)
928
- return mark_reg_read(env, &regs[regno],
929
- regs[regno].parent);
1743
+ if (regno == BPF_REG_FP)
1744
+ return 0;
1745
+
1746
+ if (rw64)
1747
+ mark_insn_zext(env, reg);
1748
+
1749
+ return mark_reg_read(env, reg, reg->parent,
1750
+ rw64 ? REG_LIVE_READ64 : REG_LIVE_READ32);
9301751 } else {
9311752 /* check whether register used as dest operand can be written to */
9321753 if (regno == BPF_REG_FP) {
9331754 verbose(env, "frame pointer is read only\n");
9341755 return -EACCES;
9351756 }
936
- regs[regno].live |= REG_LIVE_WRITTEN;
1757
+ reg->live |= REG_LIVE_WRITTEN;
1758
+ reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1;
9371759 if (t == DST_OP)
9381760 mark_reg_unknown(env, regs, regno);
9391761 }
9401762 return 0;
1763
+}
1764
+
1765
+/* for any branch, call, exit record the history of jmps in the given state */
1766
+static int push_jmp_history(struct bpf_verifier_env *env,
1767
+ struct bpf_verifier_state *cur)
1768
+{
1769
+ u32 cnt = cur->jmp_history_cnt;
1770
+ struct bpf_idx_pair *p;
1771
+
1772
+ cnt++;
1773
+ p = krealloc(cur->jmp_history, cnt * sizeof(*p), GFP_USER);
1774
+ if (!p)
1775
+ return -ENOMEM;
1776
+ p[cnt - 1].idx = env->insn_idx;
1777
+ p[cnt - 1].prev_idx = env->prev_insn_idx;
1778
+ cur->jmp_history = p;
1779
+ cur->jmp_history_cnt = cnt;
1780
+ return 0;
1781
+}
1782
+
1783
+/* Backtrack one insn at a time. If idx is not at the top of recorded
1784
+ * history then previous instruction came from straight line execution.
1785
+ */
1786
+static int get_prev_insn_idx(struct bpf_verifier_state *st, int i,
1787
+ u32 *history)
1788
+{
1789
+ u32 cnt = *history;
1790
+
1791
+ if (cnt && st->jmp_history[cnt - 1].idx == i) {
1792
+ i = st->jmp_history[cnt - 1].prev_idx;
1793
+ (*history)--;
1794
+ } else {
1795
+ i--;
1796
+ }
1797
+ return i;
1798
+}
1799
+
1800
+/* For given verifier state backtrack_insn() is called from the last insn to
1801
+ * the first insn. Its purpose is to compute a bitmask of registers and
1802
+ * stack slots that needs precision in the parent verifier state.
1803
+ */
1804
+static int backtrack_insn(struct bpf_verifier_env *env, int idx,
1805
+ u32 *reg_mask, u64 *stack_mask)
1806
+{
1807
+ const struct bpf_insn_cbs cbs = {
1808
+ .cb_print = verbose,
1809
+ .private_data = env,
1810
+ };
1811
+ struct bpf_insn *insn = env->prog->insnsi + idx;
1812
+ u8 class = BPF_CLASS(insn->code);
1813
+ u8 opcode = BPF_OP(insn->code);
1814
+ u8 mode = BPF_MODE(insn->code);
1815
+ u32 dreg = 1u << insn->dst_reg;
1816
+ u32 sreg = 1u << insn->src_reg;
1817
+ u32 spi;
1818
+
1819
+ if (insn->code == 0)
1820
+ return 0;
1821
+ if (env->log.level & BPF_LOG_LEVEL) {
1822
+ verbose(env, "regs=%x stack=%llx before ", *reg_mask, *stack_mask);
1823
+ verbose(env, "%d: ", idx);
1824
+ print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
1825
+ }
1826
+
1827
+ if (class == BPF_ALU || class == BPF_ALU64) {
1828
+ if (!(*reg_mask & dreg))
1829
+ return 0;
1830
+ if (opcode == BPF_MOV) {
1831
+ if (BPF_SRC(insn->code) == BPF_X) {
1832
+ /* dreg = sreg
1833
+ * dreg needs precision after this insn
1834
+ * sreg needs precision before this insn
1835
+ */
1836
+ *reg_mask &= ~dreg;
1837
+ *reg_mask |= sreg;
1838
+ } else {
1839
+ /* dreg = K
1840
+ * dreg needs precision after this insn.
1841
+ * Corresponding register is already marked
1842
+ * as precise=true in this verifier state.
1843
+ * No further markings in parent are necessary
1844
+ */
1845
+ *reg_mask &= ~dreg;
1846
+ }
1847
+ } else {
1848
+ if (BPF_SRC(insn->code) == BPF_X) {
1849
+ /* dreg += sreg
1850
+ * both dreg and sreg need precision
1851
+ * before this insn
1852
+ */
1853
+ *reg_mask |= sreg;
1854
+ } /* else dreg += K
1855
+ * dreg still needs precision before this insn
1856
+ */
1857
+ }
1858
+ } else if (class == BPF_LDX) {
1859
+ if (!(*reg_mask & dreg))
1860
+ return 0;
1861
+ *reg_mask &= ~dreg;
1862
+
1863
+ /* scalars can only be spilled into stack w/o losing precision.
1864
+ * Load from any other memory can be zero extended.
1865
+ * The desire to keep that precision is already indicated
1866
+ * by 'precise' mark in corresponding register of this state.
1867
+ * No further tracking necessary.
1868
+ */
1869
+ if (insn->src_reg != BPF_REG_FP)
1870
+ return 0;
1871
+ if (BPF_SIZE(insn->code) != BPF_DW)
1872
+ return 0;
1873
+
1874
+ /* dreg = *(u64 *)[fp - off] was a fill from the stack.
1875
+ * that [fp - off] slot contains scalar that needs to be
1876
+ * tracked with precision
1877
+ */
1878
+ spi = (-insn->off - 1) / BPF_REG_SIZE;
1879
+ if (spi >= 64) {
1880
+ verbose(env, "BUG spi %d\n", spi);
1881
+ WARN_ONCE(1, "verifier backtracking bug");
1882
+ return -EFAULT;
1883
+ }
1884
+ *stack_mask |= 1ull << spi;
1885
+ } else if (class == BPF_STX || class == BPF_ST) {
1886
+ if (*reg_mask & dreg)
1887
+ /* stx & st shouldn't be using _scalar_ dst_reg
1888
+ * to access memory. It means backtracking
1889
+ * encountered a case of pointer subtraction.
1890
+ */
1891
+ return -ENOTSUPP;
1892
+ /* scalars can only be spilled into stack */
1893
+ if (insn->dst_reg != BPF_REG_FP)
1894
+ return 0;
1895
+ if (BPF_SIZE(insn->code) != BPF_DW)
1896
+ return 0;
1897
+ spi = (-insn->off - 1) / BPF_REG_SIZE;
1898
+ if (spi >= 64) {
1899
+ verbose(env, "BUG spi %d\n", spi);
1900
+ WARN_ONCE(1, "verifier backtracking bug");
1901
+ return -EFAULT;
1902
+ }
1903
+ if (!(*stack_mask & (1ull << spi)))
1904
+ return 0;
1905
+ *stack_mask &= ~(1ull << spi);
1906
+ if (class == BPF_STX)
1907
+ *reg_mask |= sreg;
1908
+ } else if (class == BPF_JMP || class == BPF_JMP32) {
1909
+ if (opcode == BPF_CALL) {
1910
+ if (insn->src_reg == BPF_PSEUDO_CALL)
1911
+ return -ENOTSUPP;
1912
+ /* regular helper call sets R0 */
1913
+ *reg_mask &= ~1;
1914
+ if (*reg_mask & 0x3f) {
1915
+ /* if backtracing was looking for registers R1-R5
1916
+ * they should have been found already.
1917
+ */
1918
+ verbose(env, "BUG regs %x\n", *reg_mask);
1919
+ WARN_ONCE(1, "verifier backtracking bug");
1920
+ return -EFAULT;
1921
+ }
1922
+ } else if (opcode == BPF_EXIT) {
1923
+ return -ENOTSUPP;
1924
+ }
1925
+ } else if (class == BPF_LD) {
1926
+ if (!(*reg_mask & dreg))
1927
+ return 0;
1928
+ *reg_mask &= ~dreg;
1929
+ /* It's ld_imm64 or ld_abs or ld_ind.
1930
+ * For ld_imm64 no further tracking of precision
1931
+ * into parent is necessary
1932
+ */
1933
+ if (mode == BPF_IND || mode == BPF_ABS)
1934
+ /* to be analyzed */
1935
+ return -ENOTSUPP;
1936
+ }
1937
+ return 0;
1938
+}
1939
+
1940
+/* the scalar precision tracking algorithm:
1941
+ * . at the start all registers have precise=false.
1942
+ * . scalar ranges are tracked as normal through alu and jmp insns.
1943
+ * . once precise value of the scalar register is used in:
1944
+ * . ptr + scalar alu
1945
+ * . if (scalar cond K|scalar)
1946
+ * . helper_call(.., scalar, ...) where ARG_CONST is expected
1947
+ * backtrack through the verifier states and mark all registers and
1948
+ * stack slots with spilled constants that these scalar regisers
1949
+ * should be precise.
1950
+ * . during state pruning two registers (or spilled stack slots)
1951
+ * are equivalent if both are not precise.
1952
+ *
1953
+ * Note the verifier cannot simply walk register parentage chain,
1954
+ * since many different registers and stack slots could have been
1955
+ * used to compute single precise scalar.
1956
+ *
1957
+ * The approach of starting with precise=true for all registers and then
1958
+ * backtrack to mark a register as not precise when the verifier detects
1959
+ * that program doesn't care about specific value (e.g., when helper
1960
+ * takes register as ARG_ANYTHING parameter) is not safe.
1961
+ *
1962
+ * It's ok to walk single parentage chain of the verifier states.
1963
+ * It's possible that this backtracking will go all the way till 1st insn.
1964
+ * All other branches will be explored for needing precision later.
1965
+ *
1966
+ * The backtracking needs to deal with cases like:
1967
+ * R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0)
1968
+ * r9 -= r8
1969
+ * r5 = r9
1970
+ * if r5 > 0x79f goto pc+7
1971
+ * R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff))
1972
+ * r5 += 1
1973
+ * ...
1974
+ * call bpf_perf_event_output#25
1975
+ * where .arg5_type = ARG_CONST_SIZE_OR_ZERO
1976
+ *
1977
+ * and this case:
1978
+ * r6 = 1
1979
+ * call foo // uses callee's r6 inside to compute r0
1980
+ * r0 += r6
1981
+ * if r0 == 0 goto
1982
+ *
1983
+ * to track above reg_mask/stack_mask needs to be independent for each frame.
1984
+ *
1985
+ * Also if parent's curframe > frame where backtracking started,
1986
+ * the verifier need to mark registers in both frames, otherwise callees
1987
+ * may incorrectly prune callers. This is similar to
1988
+ * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences")
1989
+ *
1990
+ * For now backtracking falls back into conservative marking.
1991
+ */
1992
+static void mark_all_scalars_precise(struct bpf_verifier_env *env,
1993
+ struct bpf_verifier_state *st)
1994
+{
1995
+ struct bpf_func_state *func;
1996
+ struct bpf_reg_state *reg;
1997
+ int i, j;
1998
+
1999
+ /* big hammer: mark all scalars precise in this path.
2000
+ * pop_stack may still get !precise scalars.
2001
+ */
2002
+ for (; st; st = st->parent)
2003
+ for (i = 0; i <= st->curframe; i++) {
2004
+ func = st->frame[i];
2005
+ for (j = 0; j < BPF_REG_FP; j++) {
2006
+ reg = &func->regs[j];
2007
+ if (reg->type != SCALAR_VALUE)
2008
+ continue;
2009
+ reg->precise = true;
2010
+ }
2011
+ for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
2012
+ if (func->stack[j].slot_type[0] != STACK_SPILL)
2013
+ continue;
2014
+ reg = &func->stack[j].spilled_ptr;
2015
+ if (reg->type != SCALAR_VALUE)
2016
+ continue;
2017
+ reg->precise = true;
2018
+ }
2019
+ }
2020
+}
2021
+
2022
+static int __mark_chain_precision(struct bpf_verifier_env *env, int regno,
2023
+ int spi)
2024
+{
2025
+ struct bpf_verifier_state *st = env->cur_state;
2026
+ int first_idx = st->first_insn_idx;
2027
+ int last_idx = env->insn_idx;
2028
+ struct bpf_func_state *func;
2029
+ struct bpf_reg_state *reg;
2030
+ u32 reg_mask = regno >= 0 ? 1u << regno : 0;
2031
+ u64 stack_mask = spi >= 0 ? 1ull << spi : 0;
2032
+ bool skip_first = true;
2033
+ bool new_marks = false;
2034
+ int i, err;
2035
+
2036
+ if (!env->bpf_capable)
2037
+ return 0;
2038
+
2039
+ func = st->frame[st->curframe];
2040
+ if (regno >= 0) {
2041
+ reg = &func->regs[regno];
2042
+ if (reg->type != SCALAR_VALUE) {
2043
+ WARN_ONCE(1, "backtracing misuse");
2044
+ return -EFAULT;
2045
+ }
2046
+ if (!reg->precise)
2047
+ new_marks = true;
2048
+ else
2049
+ reg_mask = 0;
2050
+ reg->precise = true;
2051
+ }
2052
+
2053
+ while (spi >= 0) {
2054
+ if (func->stack[spi].slot_type[0] != STACK_SPILL) {
2055
+ stack_mask = 0;
2056
+ break;
2057
+ }
2058
+ reg = &func->stack[spi].spilled_ptr;
2059
+ if (reg->type != SCALAR_VALUE) {
2060
+ stack_mask = 0;
2061
+ break;
2062
+ }
2063
+ if (!reg->precise)
2064
+ new_marks = true;
2065
+ else
2066
+ stack_mask = 0;
2067
+ reg->precise = true;
2068
+ break;
2069
+ }
2070
+
2071
+ if (!new_marks)
2072
+ return 0;
2073
+ if (!reg_mask && !stack_mask)
2074
+ return 0;
2075
+ for (;;) {
2076
+ DECLARE_BITMAP(mask, 64);
2077
+ u32 history = st->jmp_history_cnt;
2078
+
2079
+ if (env->log.level & BPF_LOG_LEVEL)
2080
+ verbose(env, "last_idx %d first_idx %d\n", last_idx, first_idx);
2081
+ for (i = last_idx;;) {
2082
+ if (skip_first) {
2083
+ err = 0;
2084
+ skip_first = false;
2085
+ } else {
2086
+ err = backtrack_insn(env, i, &reg_mask, &stack_mask);
2087
+ }
2088
+ if (err == -ENOTSUPP) {
2089
+ mark_all_scalars_precise(env, st);
2090
+ return 0;
2091
+ } else if (err) {
2092
+ return err;
2093
+ }
2094
+ if (!reg_mask && !stack_mask)
2095
+ /* Found assignment(s) into tracked register in this state.
2096
+ * Since this state is already marked, just return.
2097
+ * Nothing to be tracked further in the parent state.
2098
+ */
2099
+ return 0;
2100
+ if (i == first_idx)
2101
+ break;
2102
+ i = get_prev_insn_idx(st, i, &history);
2103
+ if (i >= env->prog->len) {
2104
+ /* This can happen if backtracking reached insn 0
2105
+ * and there are still reg_mask or stack_mask
2106
+ * to backtrack.
2107
+ * It means the backtracking missed the spot where
2108
+ * particular register was initialized with a constant.
2109
+ */
2110
+ verbose(env, "BUG backtracking idx %d\n", i);
2111
+ WARN_ONCE(1, "verifier backtracking bug");
2112
+ return -EFAULT;
2113
+ }
2114
+ }
2115
+ st = st->parent;
2116
+ if (!st)
2117
+ break;
2118
+
2119
+ new_marks = false;
2120
+ func = st->frame[st->curframe];
2121
+ bitmap_from_u64(mask, reg_mask);
2122
+ for_each_set_bit(i, mask, 32) {
2123
+ reg = &func->regs[i];
2124
+ if (reg->type != SCALAR_VALUE) {
2125
+ reg_mask &= ~(1u << i);
2126
+ continue;
2127
+ }
2128
+ if (!reg->precise)
2129
+ new_marks = true;
2130
+ reg->precise = true;
2131
+ }
2132
+
2133
+ bitmap_from_u64(mask, stack_mask);
2134
+ for_each_set_bit(i, mask, 64) {
2135
+ if (i >= func->allocated_stack / BPF_REG_SIZE) {
2136
+ /* the sequence of instructions:
2137
+ * 2: (bf) r3 = r10
2138
+ * 3: (7b) *(u64 *)(r3 -8) = r0
2139
+ * 4: (79) r4 = *(u64 *)(r10 -8)
2140
+ * doesn't contain jmps. It's backtracked
2141
+ * as a single block.
2142
+ * During backtracking insn 3 is not recognized as
2143
+ * stack access, so at the end of backtracking
2144
+ * stack slot fp-8 is still marked in stack_mask.
2145
+ * However the parent state may not have accessed
2146
+ * fp-8 and it's "unallocated" stack space.
2147
+ * In such case fallback to conservative.
2148
+ */
2149
+ mark_all_scalars_precise(env, st);
2150
+ return 0;
2151
+ }
2152
+
2153
+ if (func->stack[i].slot_type[0] != STACK_SPILL) {
2154
+ stack_mask &= ~(1ull << i);
2155
+ continue;
2156
+ }
2157
+ reg = &func->stack[i].spilled_ptr;
2158
+ if (reg->type != SCALAR_VALUE) {
2159
+ stack_mask &= ~(1ull << i);
2160
+ continue;
2161
+ }
2162
+ if (!reg->precise)
2163
+ new_marks = true;
2164
+ reg->precise = true;
2165
+ }
2166
+ if (env->log.level & BPF_LOG_LEVEL) {
2167
+ print_verifier_state(env, func);
2168
+ verbose(env, "parent %s regs=%x stack=%llx marks\n",
2169
+ new_marks ? "didn't have" : "already had",
2170
+ reg_mask, stack_mask);
2171
+ }
2172
+
2173
+ if (!reg_mask && !stack_mask)
2174
+ break;
2175
+ if (!new_marks)
2176
+ break;
2177
+
2178
+ last_idx = st->last_insn_idx;
2179
+ first_idx = st->first_insn_idx;
2180
+ }
2181
+ return 0;
2182
+}
2183
+
2184
+static int mark_chain_precision(struct bpf_verifier_env *env, int regno)
2185
+{
2186
+ return __mark_chain_precision(env, regno, -1);
2187
+}
2188
+
2189
+static int mark_chain_precision_stack(struct bpf_verifier_env *env, int spi)
2190
+{
2191
+ return __mark_chain_precision(env, -1, spi);
9412192 }
9422193
9432194 static bool is_spillable_regtype(enum bpf_reg_type type)
....@@ -950,7 +2201,24 @@
9502201 case PTR_TO_PACKET:
9512202 case PTR_TO_PACKET_META:
9522203 case PTR_TO_PACKET_END:
2204
+ case PTR_TO_FLOW_KEYS:
9532205 case CONST_PTR_TO_MAP:
2206
+ case PTR_TO_SOCKET:
2207
+ case PTR_TO_SOCKET_OR_NULL:
2208
+ case PTR_TO_SOCK_COMMON:
2209
+ case PTR_TO_SOCK_COMMON_OR_NULL:
2210
+ case PTR_TO_TCP_SOCK:
2211
+ case PTR_TO_TCP_SOCK_OR_NULL:
2212
+ case PTR_TO_XDP_SOCK:
2213
+ case PTR_TO_BTF_ID:
2214
+ case PTR_TO_BTF_ID_OR_NULL:
2215
+ case PTR_TO_RDONLY_BUF:
2216
+ case PTR_TO_RDONLY_BUF_OR_NULL:
2217
+ case PTR_TO_RDWR_BUF:
2218
+ case PTR_TO_RDWR_BUF_OR_NULL:
2219
+ case PTR_TO_PERCPU_BTF_ID:
2220
+ case PTR_TO_MEM:
2221
+ case PTR_TO_MEM_OR_NULL:
9542222 return true;
9552223 default:
9562224 return false;
....@@ -968,6 +2236,29 @@
9682236 return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off);
9692237 }
9702238
2239
+static bool __is_scalar_unbounded(struct bpf_reg_state *reg)
2240
+{
2241
+ return tnum_is_unknown(reg->var_off) &&
2242
+ reg->smin_value == S64_MIN && reg->smax_value == S64_MAX &&
2243
+ reg->umin_value == 0 && reg->umax_value == U64_MAX &&
2244
+ reg->s32_min_value == S32_MIN && reg->s32_max_value == S32_MAX &&
2245
+ reg->u32_min_value == 0 && reg->u32_max_value == U32_MAX;
2246
+}
2247
+
2248
+static bool register_is_bounded(struct bpf_reg_state *reg)
2249
+{
2250
+ return reg->type == SCALAR_VALUE && !__is_scalar_unbounded(reg);
2251
+}
2252
+
2253
+static bool __is_pointer_value(bool allow_ptr_leaks,
2254
+ const struct bpf_reg_state *reg)
2255
+{
2256
+ if (allow_ptr_leaks)
2257
+ return false;
2258
+
2259
+ return reg->type != SCALAR_VALUE;
2260
+}
2261
+
9712262 static void save_register_state(struct bpf_func_state *state,
9722263 int spi, struct bpf_reg_state *reg)
9732264 {
....@@ -980,19 +2271,22 @@
9802271 state->stack[spi].slot_type[i] = STACK_SPILL;
9812272 }
9822273
983
-/* check_stack_read/write functions track spill/fill of registers,
2274
+/* check_stack_{read,write}_fixed_off functions track spill/fill of registers,
9842275 * stack boundary and alignment are checked in check_mem_access()
9852276 */
986
-static int check_stack_write(struct bpf_verifier_env *env,
987
- struct bpf_func_state *state, /* func where register points to */
988
- int off, int size, int value_regno, int insn_idx)
2277
+static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
2278
+ /* stack frame we're writing to */
2279
+ struct bpf_func_state *state,
2280
+ int off, int size, int value_regno,
2281
+ int insn_idx)
9892282 {
9902283 struct bpf_func_state *cur; /* state of the current function */
9912284 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
2285
+ u32 dst_reg = env->prog->insnsi[insn_idx].dst_reg;
9922286 struct bpf_reg_state *reg = NULL;
9932287
9942288 err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE),
995
- true);
2289
+ state->acquired_refs, true);
9962290 if (err)
9972291 return err;
9982292 /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
....@@ -1008,7 +2302,7 @@
10082302 cur = env->cur_state->frame[env->cur_state->curframe];
10092303 if (value_regno >= 0)
10102304 reg = &cur->regs[value_regno];
1011
- if (!env->allow_ptr_leaks) {
2305
+ if (!env->bypass_spec_v4) {
10122306 bool sanitize = reg && is_spillable_regtype(reg->type);
10132307
10142308 for (i = 0; i < size; i++) {
....@@ -1022,12 +2316,24 @@
10222316 env->insn_aux_data[insn_idx].sanitize_stack_spill = true;
10232317 }
10242318
1025
- if (reg && size == BPF_REG_SIZE && register_is_const(reg) &&
1026
- !register_is_null(reg) && env->allow_ptr_leaks) {
2319
+ if (reg && size == BPF_REG_SIZE && register_is_bounded(reg) &&
2320
+ !register_is_null(reg) && env->bpf_capable) {
2321
+ if (dst_reg != BPF_REG_FP) {
2322
+ /* The backtracking logic can only recognize explicit
2323
+ * stack slot address like [fp - 8]. Other spill of
2324
+ * scalar via different register has to be conervative.
2325
+ * Backtrack from here and mark all registers as precise
2326
+ * that contributed into 'reg' being a constant.
2327
+ */
2328
+ err = mark_chain_precision(env, value_regno);
2329
+ if (err)
2330
+ return err;
2331
+ }
10272332 save_register_state(state, spi, reg);
10282333 } else if (reg && is_spillable_regtype(reg->type)) {
10292334 /* register containing pointer is being spilled into stack */
10302335 if (size != BPF_REG_SIZE) {
2336
+ verbose_linfo(env, insn_idx, "; ");
10312337 verbose(env, "invalid size of register spill\n");
10322338 return -EACCES;
10332339 }
....@@ -1058,8 +2364,13 @@
10582364 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
10592365
10602366 /* when we zero initialize stack slots mark them as such */
1061
- if (reg && register_is_null(reg))
2367
+ if (reg && register_is_null(reg)) {
2368
+ /* backtracking doesn't work for STACK_ZERO yet. */
2369
+ err = mark_chain_precision(env, value_regno);
2370
+ if (err)
2371
+ return err;
10622372 type = STACK_ZERO;
2373
+ }
10632374
10642375 /* Mark slots affected by this stack write. */
10652376 for (i = 0; i < size; i++)
....@@ -1069,9 +2380,175 @@
10692380 return 0;
10702381 }
10712382
1072
-static int check_stack_read(struct bpf_verifier_env *env,
1073
- struct bpf_func_state *reg_state /* func where register points to */,
1074
- int off, int size, int value_regno)
2383
+/* Write the stack: 'stack[ptr_regno + off] = value_regno'. 'ptr_regno' is
2384
+ * known to contain a variable offset.
2385
+ * This function checks whether the write is permitted and conservatively
2386
+ * tracks the effects of the write, considering that each stack slot in the
2387
+ * dynamic range is potentially written to.
2388
+ *
2389
+ * 'off' includes 'regno->off'.
2390
+ * 'value_regno' can be -1, meaning that an unknown value is being written to
2391
+ * the stack.
2392
+ *
2393
+ * Spilled pointers in range are not marked as written because we don't know
2394
+ * what's going to be actually written. This means that read propagation for
2395
+ * future reads cannot be terminated by this write.
2396
+ *
2397
+ * For privileged programs, uninitialized stack slots are considered
2398
+ * initialized by this write (even though we don't know exactly what offsets
2399
+ * are going to be written to). The idea is that we don't want the verifier to
2400
+ * reject future reads that access slots written to through variable offsets.
2401
+ */
2402
+static int check_stack_write_var_off(struct bpf_verifier_env *env,
2403
+ /* func where register points to */
2404
+ struct bpf_func_state *state,
2405
+ int ptr_regno, int off, int size,
2406
+ int value_regno, int insn_idx)
2407
+{
2408
+ struct bpf_func_state *cur; /* state of the current function */
2409
+ int min_off, max_off;
2410
+ int i, err;
2411
+ struct bpf_reg_state *ptr_reg = NULL, *value_reg = NULL;
2412
+ bool writing_zero = false;
2413
+ /* set if the fact that we're writing a zero is used to let any
2414
+ * stack slots remain STACK_ZERO
2415
+ */
2416
+ bool zero_used = false;
2417
+
2418
+ cur = env->cur_state->frame[env->cur_state->curframe];
2419
+ ptr_reg = &cur->regs[ptr_regno];
2420
+ min_off = ptr_reg->smin_value + off;
2421
+ max_off = ptr_reg->smax_value + off + size;
2422
+ if (value_regno >= 0)
2423
+ value_reg = &cur->regs[value_regno];
2424
+ if (value_reg && register_is_null(value_reg))
2425
+ writing_zero = true;
2426
+
2427
+ err = realloc_func_state(state, round_up(-min_off, BPF_REG_SIZE),
2428
+ state->acquired_refs, true);
2429
+ if (err)
2430
+ return err;
2431
+
2432
+
2433
+ /* Variable offset writes destroy any spilled pointers in range. */
2434
+ for (i = min_off; i < max_off; i++) {
2435
+ u8 new_type, *stype;
2436
+ int slot, spi;
2437
+
2438
+ slot = -i - 1;
2439
+ spi = slot / BPF_REG_SIZE;
2440
+ stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
2441
+
2442
+ if (!env->allow_ptr_leaks
2443
+ && *stype != NOT_INIT
2444
+ && *stype != SCALAR_VALUE) {
2445
+ /* Reject the write if there's are spilled pointers in
2446
+ * range. If we didn't reject here, the ptr status
2447
+ * would be erased below (even though not all slots are
2448
+ * actually overwritten), possibly opening the door to
2449
+ * leaks.
2450
+ */
2451
+ verbose(env, "spilled ptr in range of var-offset stack write; insn %d, ptr off: %d",
2452
+ insn_idx, i);
2453
+ return -EINVAL;
2454
+ }
2455
+
2456
+ /* Erase all spilled pointers. */
2457
+ state->stack[spi].spilled_ptr.type = NOT_INIT;
2458
+
2459
+ /* Update the slot type. */
2460
+ new_type = STACK_MISC;
2461
+ if (writing_zero && *stype == STACK_ZERO) {
2462
+ new_type = STACK_ZERO;
2463
+ zero_used = true;
2464
+ }
2465
+ /* If the slot is STACK_INVALID, we check whether it's OK to
2466
+ * pretend that it will be initialized by this write. The slot
2467
+ * might not actually be written to, and so if we mark it as
2468
+ * initialized future reads might leak uninitialized memory.
2469
+ * For privileged programs, we will accept such reads to slots
2470
+ * that may or may not be written because, if we're reject
2471
+ * them, the error would be too confusing.
2472
+ */
2473
+ if (*stype == STACK_INVALID && !env->allow_uninit_stack) {
2474
+ verbose(env, "uninit stack in range of var-offset write prohibited for !root; insn %d, off: %d",
2475
+ insn_idx, i);
2476
+ return -EINVAL;
2477
+ }
2478
+ *stype = new_type;
2479
+ }
2480
+ if (zero_used) {
2481
+ /* backtracking doesn't work for STACK_ZERO yet. */
2482
+ err = mark_chain_precision(env, value_regno);
2483
+ if (err)
2484
+ return err;
2485
+ }
2486
+ return 0;
2487
+}
2488
+
2489
+/* When register 'dst_regno' is assigned some values from stack[min_off,
2490
+ * max_off), we set the register's type according to the types of the
2491
+ * respective stack slots. If all the stack values are known to be zeros, then
2492
+ * so is the destination reg. Otherwise, the register is considered to be
2493
+ * SCALAR. This function does not deal with register filling; the caller must
2494
+ * ensure that all spilled registers in the stack range have been marked as
2495
+ * read.
2496
+ */
2497
+static void mark_reg_stack_read(struct bpf_verifier_env *env,
2498
+ /* func where src register points to */
2499
+ struct bpf_func_state *ptr_state,
2500
+ int min_off, int max_off, int dst_regno)
2501
+{
2502
+ struct bpf_verifier_state *vstate = env->cur_state;
2503
+ struct bpf_func_state *state = vstate->frame[vstate->curframe];
2504
+ int i, slot, spi;
2505
+ u8 *stype;
2506
+ int zeros = 0;
2507
+
2508
+ for (i = min_off; i < max_off; i++) {
2509
+ slot = -i - 1;
2510
+ spi = slot / BPF_REG_SIZE;
2511
+ stype = ptr_state->stack[spi].slot_type;
2512
+ if (stype[slot % BPF_REG_SIZE] != STACK_ZERO)
2513
+ break;
2514
+ zeros++;
2515
+ }
2516
+ if (zeros == max_off - min_off) {
2517
+ /* any access_size read into register is zero extended,
2518
+ * so the whole register == const_zero
2519
+ */
2520
+ __mark_reg_const_zero(&state->regs[dst_regno]);
2521
+ /* backtracking doesn't support STACK_ZERO yet,
2522
+ * so mark it precise here, so that later
2523
+ * backtracking can stop here.
2524
+ * Backtracking may not need this if this register
2525
+ * doesn't participate in pointer adjustment.
2526
+ * Forward propagation of precise flag is not
2527
+ * necessary either. This mark is only to stop
2528
+ * backtracking. Any register that contributed
2529
+ * to const 0 was marked precise before spill.
2530
+ */
2531
+ state->regs[dst_regno].precise = true;
2532
+ } else {
2533
+ /* have read misc data from the stack */
2534
+ mark_reg_unknown(env, state->regs, dst_regno);
2535
+ }
2536
+ state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
2537
+}
2538
+
2539
+/* Read the stack at 'off' and put the results into the register indicated by
2540
+ * 'dst_regno'. It handles reg filling if the addressed stack slot is a
2541
+ * spilled reg.
2542
+ *
2543
+ * 'dst_regno' can be -1, meaning that the read value is not going to a
2544
+ * register.
2545
+ *
2546
+ * The access is assumed to be within the current stack bounds.
2547
+ */
2548
+static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
2549
+ /* func where src register points to */
2550
+ struct bpf_func_state *reg_state,
2551
+ int off, int size, int dst_regno)
10752552 {
10762553 struct bpf_verifier_state *vstate = env->cur_state;
10772554 struct bpf_func_state *state = vstate->frame[vstate->curframe];
....@@ -1079,25 +2556,21 @@
10792556 struct bpf_reg_state *reg;
10802557 u8 *stype;
10812558
1082
- if (reg_state->allocated_stack <= slot) {
1083
- verbose(env, "invalid read from stack off %d+0 size %d\n",
1084
- off, size);
1085
- return -EACCES;
1086
- }
10872559 stype = reg_state->stack[spi].slot_type;
10882560 reg = &reg_state->stack[spi].spilled_ptr;
10892561
10902562 if (stype[0] == STACK_SPILL) {
10912563 if (size != BPF_REG_SIZE) {
10922564 if (reg->type != SCALAR_VALUE) {
2565
+ verbose_linfo(env, env->insn_idx, "; ");
10932566 verbose(env, "invalid size of register fill\n");
10942567 return -EACCES;
10952568 }
1096
- if (value_regno >= 0) {
1097
- mark_reg_unknown(env, state->regs, value_regno);
1098
- state->regs[value_regno].live |= REG_LIVE_WRITTEN;
2569
+ if (dst_regno >= 0) {
2570
+ mark_reg_unknown(env, state->regs, dst_regno);
2571
+ state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
10992572 }
1100
- mark_reg_read(env, reg, reg->parent);
2573
+ mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
11012574 return 0;
11022575 }
11032576 for (i = 1; i < BPF_REG_SIZE; i++) {
....@@ -1107,102 +2580,259 @@
11072580 }
11082581 }
11092582
1110
- if (value_regno >= 0) {
2583
+ if (dst_regno >= 0) {
11112584 /* restore register state from stack */
1112
- state->regs[value_regno] = *reg;
2585
+ state->regs[dst_regno] = *reg;
11132586 /* mark reg as written since spilled pointer state likely
11142587 * has its liveness marks cleared by is_state_visited()
11152588 * which resets stack/reg liveness for state transitions
11162589 */
1117
- state->regs[value_regno].live |= REG_LIVE_WRITTEN;
2590
+ state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
2591
+ } else if (__is_pointer_value(env->allow_ptr_leaks, reg)) {
2592
+ /* If dst_regno==-1, the caller is asking us whether
2593
+ * it is acceptable to use this value as a SCALAR_VALUE
2594
+ * (e.g. for XADD).
2595
+ * We must not allow unprivileged callers to do that
2596
+ * with spilled pointers.
2597
+ */
2598
+ verbose(env, "leaking pointer from stack off %d\n",
2599
+ off);
2600
+ return -EACCES;
11182601 }
1119
- mark_reg_read(env, reg, reg->parent);
2602
+ mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
11202603 } else {
1121
- int zeros = 0;
2604
+ u8 type;
11222605
11232606 for (i = 0; i < size; i++) {
1124
- if (stype[(slot - i) % BPF_REG_SIZE] == STACK_MISC)
2607
+ type = stype[(slot - i) % BPF_REG_SIZE];
2608
+ if (type == STACK_MISC)
11252609 continue;
1126
- if (stype[(slot - i) % BPF_REG_SIZE] == STACK_ZERO) {
1127
- zeros++;
2610
+ if (type == STACK_ZERO)
11282611 continue;
1129
- }
11302612 verbose(env, "invalid read from stack off %d+%d size %d\n",
11312613 off, i, size);
11322614 return -EACCES;
11332615 }
1134
- mark_reg_read(env, reg, reg->parent);
1135
- if (value_regno >= 0) {
1136
- if (zeros == size) {
1137
- /* any size read into register is zero extended,
1138
- * so the whole register == const_zero
1139
- */
1140
- __mark_reg_const_zero(&state->regs[value_regno]);
1141
- } else {
1142
- /* have read misc data from the stack */
1143
- mark_reg_unknown(env, state->regs, value_regno);
1144
- }
1145
- state->regs[value_regno].live |= REG_LIVE_WRITTEN;
1146
- }
2616
+ mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
2617
+ if (dst_regno >= 0)
2618
+ mark_reg_stack_read(env, reg_state, off, off + size, dst_regno);
11472619 }
11482620 return 0;
11492621 }
11502622
1151
-static int check_stack_access(struct bpf_verifier_env *env,
1152
- const struct bpf_reg_state *reg,
1153
- int off, int size)
2623
+enum stack_access_src {
2624
+ ACCESS_DIRECT = 1, /* the access is performed by an instruction */
2625
+ ACCESS_HELPER = 2, /* the access is performed by a helper */
2626
+};
2627
+
2628
+static int check_stack_range_initialized(struct bpf_verifier_env *env,
2629
+ int regno, int off, int access_size,
2630
+ bool zero_size_allowed,
2631
+ enum stack_access_src type,
2632
+ struct bpf_call_arg_meta *meta);
2633
+
2634
+static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
11542635 {
1155
- /* Stack accesses must be at a fixed offset, so that we
1156
- * can determine what type of data were returned. See
1157
- * check_stack_read().
2636
+ return cur_regs(env) + regno;
2637
+}
2638
+
2639
+/* Read the stack at 'ptr_regno + off' and put the result into the register
2640
+ * 'dst_regno'.
2641
+ * 'off' includes the pointer register's fixed offset(i.e. 'ptr_regno.off'),
2642
+ * but not its variable offset.
2643
+ * 'size' is assumed to be <= reg size and the access is assumed to be aligned.
2644
+ *
2645
+ * As opposed to check_stack_read_fixed_off, this function doesn't deal with
2646
+ * filling registers (i.e. reads of spilled register cannot be detected when
2647
+ * the offset is not fixed). We conservatively mark 'dst_regno' as containing
2648
+ * SCALAR_VALUE. That's why we assert that the 'ptr_regno' has a variable
2649
+ * offset; for a fixed offset check_stack_read_fixed_off should be used
2650
+ * instead.
2651
+ */
2652
+static int check_stack_read_var_off(struct bpf_verifier_env *env,
2653
+ int ptr_regno, int off, int size, int dst_regno)
2654
+{
2655
+ /* The state of the source register. */
2656
+ struct bpf_reg_state *reg = reg_state(env, ptr_regno);
2657
+ struct bpf_func_state *ptr_state = func(env, reg);
2658
+ int err;
2659
+ int min_off, max_off;
2660
+
2661
+ /* Note that we pass a NULL meta, so raw access will not be permitted.
11582662 */
1159
- if (!tnum_is_const(reg->var_off)) {
2663
+ err = check_stack_range_initialized(env, ptr_regno, off, size,
2664
+ false, ACCESS_DIRECT, NULL);
2665
+ if (err)
2666
+ return err;
2667
+
2668
+ min_off = reg->smin_value + off;
2669
+ max_off = reg->smax_value + off;
2670
+ mark_reg_stack_read(env, ptr_state, min_off, max_off + size, dst_regno);
2671
+ return 0;
2672
+}
2673
+
2674
+/* check_stack_read dispatches to check_stack_read_fixed_off or
2675
+ * check_stack_read_var_off.
2676
+ *
2677
+ * The caller must ensure that the offset falls within the allocated stack
2678
+ * bounds.
2679
+ *
2680
+ * 'dst_regno' is a register which will receive the value from the stack. It
2681
+ * can be -1, meaning that the read value is not going to a register.
2682
+ */
2683
+static int check_stack_read(struct bpf_verifier_env *env,
2684
+ int ptr_regno, int off, int size,
2685
+ int dst_regno)
2686
+{
2687
+ struct bpf_reg_state *reg = reg_state(env, ptr_regno);
2688
+ struct bpf_func_state *state = func(env, reg);
2689
+ int err;
2690
+ /* Some accesses are only permitted with a static offset. */
2691
+ bool var_off = !tnum_is_const(reg->var_off);
2692
+
2693
+ /* The offset is required to be static when reads don't go to a
2694
+ * register, in order to not leak pointers (see
2695
+ * check_stack_read_fixed_off).
2696
+ */
2697
+ if (dst_regno < 0 && var_off) {
11602698 char tn_buf[48];
11612699
11622700 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
1163
- verbose(env, "variable stack access var_off=%s off=%d size=%d\n",
2701
+ verbose(env, "variable offset stack pointer cannot be passed into helper function; var_off=%s off=%d size=%d\n",
11642702 tn_buf, off, size);
11652703 return -EACCES;
11662704 }
2705
+ /* Variable offset is prohibited for unprivileged mode for simplicity
2706
+ * since it requires corresponding support in Spectre masking for stack
2707
+ * ALU. See also retrieve_ptr_limit().
2708
+ */
2709
+ if (!env->bypass_spec_v1 && var_off) {
2710
+ char tn_buf[48];
11672711
1168
- if (off >= 0 || off < -MAX_BPF_STACK) {
1169
- verbose(env, "invalid stack off=%d size=%d\n", off, size);
2712
+ tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
2713
+ verbose(env, "R%d variable offset stack access prohibited for !root, var_off=%s\n",
2714
+ ptr_regno, tn_buf);
11702715 return -EACCES;
11712716 }
11722717
1173
- return 0;
2718
+ if (!var_off) {
2719
+ off += reg->var_off.value;
2720
+ err = check_stack_read_fixed_off(env, state, off, size,
2721
+ dst_regno);
2722
+ } else {
2723
+ /* Variable offset stack reads need more conservative handling
2724
+ * than fixed offset ones. Note that dst_regno >= 0 on this
2725
+ * branch.
2726
+ */
2727
+ err = check_stack_read_var_off(env, ptr_regno, off, size,
2728
+ dst_regno);
2729
+ }
2730
+ return err;
11742731 }
11752732
1176
-/* check read/write into map element returned by bpf_map_lookup_elem() */
1177
-static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
1178
- int size, bool zero_size_allowed)
2733
+
2734
+/* check_stack_write dispatches to check_stack_write_fixed_off or
2735
+ * check_stack_write_var_off.
2736
+ *
2737
+ * 'ptr_regno' is the register used as a pointer into the stack.
2738
+ * 'off' includes 'ptr_regno->off', but not its variable offset (if any).
2739
+ * 'value_regno' is the register whose value we're writing to the stack. It can
2740
+ * be -1, meaning that we're not writing from a register.
2741
+ *
2742
+ * The caller must ensure that the offset falls within the maximum stack size.
2743
+ */
2744
+static int check_stack_write(struct bpf_verifier_env *env,
2745
+ int ptr_regno, int off, int size,
2746
+ int value_regno, int insn_idx)
2747
+{
2748
+ struct bpf_reg_state *reg = reg_state(env, ptr_regno);
2749
+ struct bpf_func_state *state = func(env, reg);
2750
+ int err;
2751
+
2752
+ if (tnum_is_const(reg->var_off)) {
2753
+ off += reg->var_off.value;
2754
+ err = check_stack_write_fixed_off(env, state, off, size,
2755
+ value_regno, insn_idx);
2756
+ } else {
2757
+ /* Variable offset stack reads need more conservative handling
2758
+ * than fixed offset ones.
2759
+ */
2760
+ err = check_stack_write_var_off(env, state,
2761
+ ptr_regno, off, size,
2762
+ value_regno, insn_idx);
2763
+ }
2764
+ return err;
2765
+}
2766
+
2767
+static int check_map_access_type(struct bpf_verifier_env *env, u32 regno,
2768
+ int off, int size, enum bpf_access_type type)
11792769 {
11802770 struct bpf_reg_state *regs = cur_regs(env);
11812771 struct bpf_map *map = regs[regno].map_ptr;
2772
+ u32 cap = bpf_map_flags_to_cap(map);
11822773
1183
- if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) ||
1184
- off + size > map->value_size) {
1185
- verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
2774
+ if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) {
2775
+ verbose(env, "write into map forbidden, value_size=%d off=%d size=%d\n",
11862776 map->value_size, off, size);
11872777 return -EACCES;
11882778 }
2779
+
2780
+ if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) {
2781
+ verbose(env, "read from map forbidden, value_size=%d off=%d size=%d\n",
2782
+ map->value_size, off, size);
2783
+ return -EACCES;
2784
+ }
2785
+
11892786 return 0;
11902787 }
11912788
1192
-/* check read/write into a map element with possible variable offset */
1193
-static int check_map_access(struct bpf_verifier_env *env, u32 regno,
1194
- int off, int size, bool zero_size_allowed)
2789
+/* check read/write into memory region (e.g., map value, ringbuf sample, etc) */
2790
+static int __check_mem_access(struct bpf_verifier_env *env, int regno,
2791
+ int off, int size, u32 mem_size,
2792
+ bool zero_size_allowed)
2793
+{
2794
+ bool size_ok = size > 0 || (size == 0 && zero_size_allowed);
2795
+ struct bpf_reg_state *reg;
2796
+
2797
+ if (off >= 0 && size_ok && (u64)off + size <= mem_size)
2798
+ return 0;
2799
+
2800
+ reg = &cur_regs(env)[regno];
2801
+ switch (reg->type) {
2802
+ case PTR_TO_MAP_VALUE:
2803
+ verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
2804
+ mem_size, off, size);
2805
+ break;
2806
+ case PTR_TO_PACKET:
2807
+ case PTR_TO_PACKET_META:
2808
+ case PTR_TO_PACKET_END:
2809
+ verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
2810
+ off, size, regno, reg->id, off, mem_size);
2811
+ break;
2812
+ case PTR_TO_MEM:
2813
+ default:
2814
+ verbose(env, "invalid access to memory, mem_size=%u off=%d size=%d\n",
2815
+ mem_size, off, size);
2816
+ }
2817
+
2818
+ return -EACCES;
2819
+}
2820
+
2821
+/* check read/write into a memory region with possible variable offset */
2822
+static int check_mem_region_access(struct bpf_verifier_env *env, u32 regno,
2823
+ int off, int size, u32 mem_size,
2824
+ bool zero_size_allowed)
11952825 {
11962826 struct bpf_verifier_state *vstate = env->cur_state;
11972827 struct bpf_func_state *state = vstate->frame[vstate->curframe];
11982828 struct bpf_reg_state *reg = &state->regs[regno];
11992829 int err;
12002830
1201
- /* We may have adjusted the register to this map value, so we
2831
+ /* We may have adjusted the register pointing to memory region, so we
12022832 * need to try adding each of min_value and max_value to off
12032833 * to make sure our theoretical access will be safe.
12042834 */
1205
- if (env->log.level)
2835
+ if (env->log.level & BPF_LOG_LEVEL)
12062836 print_verifier_state(env, state);
12072837
12082838 /* The minimum value is only important with signed
....@@ -1219,10 +2849,10 @@
12192849 regno);
12202850 return -EACCES;
12212851 }
1222
- err = __check_map_access(env, regno, reg->smin_value + off, size,
1223
- zero_size_allowed);
2852
+ err = __check_mem_access(env, regno, reg->smin_value + off, size,
2853
+ mem_size, zero_size_allowed);
12242854 if (err) {
1225
- verbose(env, "R%d min value is outside of the array range\n",
2855
+ verbose(env, "R%d min value is outside of the allowed memory range\n",
12262856 regno);
12272857 return err;
12282858 }
....@@ -1232,33 +2862,79 @@
12322862 * If reg->umax_value + off could overflow, treat that as unbounded too.
12332863 */
12342864 if (reg->umax_value >= BPF_MAX_VAR_OFF) {
1235
- verbose(env, "R%d unbounded memory access, make sure to bounds check any array access into a map\n",
2865
+ verbose(env, "R%d unbounded memory access, make sure to bounds check any such access\n",
12362866 regno);
12372867 return -EACCES;
12382868 }
1239
- err = __check_map_access(env, regno, reg->umax_value + off, size,
1240
- zero_size_allowed);
1241
- if (err)
1242
- verbose(env, "R%d max value is outside of the array range\n",
2869
+ err = __check_mem_access(env, regno, reg->umax_value + off, size,
2870
+ mem_size, zero_size_allowed);
2871
+ if (err) {
2872
+ verbose(env, "R%d max value is outside of the allowed memory range\n",
12432873 regno);
2874
+ return err;
2875
+ }
2876
+
2877
+ return 0;
2878
+}
2879
+
2880
+/* check read/write into a map element with possible variable offset */
2881
+static int check_map_access(struct bpf_verifier_env *env, u32 regno,
2882
+ int off, int size, bool zero_size_allowed)
2883
+{
2884
+ struct bpf_verifier_state *vstate = env->cur_state;
2885
+ struct bpf_func_state *state = vstate->frame[vstate->curframe];
2886
+ struct bpf_reg_state *reg = &state->regs[regno];
2887
+ struct bpf_map *map = reg->map_ptr;
2888
+ int err;
2889
+
2890
+ err = check_mem_region_access(env, regno, off, size, map->value_size,
2891
+ zero_size_allowed);
2892
+ if (err)
2893
+ return err;
2894
+
2895
+ if (map_value_has_spin_lock(map)) {
2896
+ u32 lock = map->spin_lock_off;
2897
+
2898
+ /* if any part of struct bpf_spin_lock can be touched by
2899
+ * load/store reject this program.
2900
+ * To check that [x1, x2) overlaps with [y1, y2)
2901
+ * it is sufficient to check x1 < y2 && y1 < x2.
2902
+ */
2903
+ if (reg->smin_value + off < lock + sizeof(struct bpf_spin_lock) &&
2904
+ lock < reg->umax_value + off + size) {
2905
+ verbose(env, "bpf_spin_lock cannot be accessed directly by load/store\n");
2906
+ return -EACCES;
2907
+ }
2908
+ }
12442909 return err;
12452910 }
12462911
12472912 #define MAX_PACKET_OFF 0xffff
12482913
2914
+static enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog)
2915
+{
2916
+ return prog->aux->dst_prog ? prog->aux->dst_prog->type : prog->type;
2917
+}
2918
+
12492919 static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
12502920 const struct bpf_call_arg_meta *meta,
12512921 enum bpf_access_type t)
12522922 {
1253
- switch (env->prog->type) {
2923
+ enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
2924
+
2925
+ switch (prog_type) {
2926
+ /* Program types only with direct read access go here! */
12542927 case BPF_PROG_TYPE_LWT_IN:
12552928 case BPF_PROG_TYPE_LWT_OUT:
12562929 case BPF_PROG_TYPE_LWT_SEG6LOCAL:
12572930 case BPF_PROG_TYPE_SK_REUSEPORT:
1258
- /* dst_input() and dst_output() can't write for now */
2931
+ case BPF_PROG_TYPE_FLOW_DISSECTOR:
2932
+ case BPF_PROG_TYPE_CGROUP_SKB:
12592933 if (t == BPF_WRITE)
12602934 return false;
1261
- /* fallthrough */
2935
+ fallthrough;
2936
+
2937
+ /* Program types with direct read + write access go here! */
12622938 case BPF_PROG_TYPE_SCHED_CLS:
12632939 case BPF_PROG_TYPE_SCHED_ACT:
12642940 case BPF_PROG_TYPE_XDP:
....@@ -1270,24 +2946,16 @@
12702946
12712947 env->seen_direct_write = true;
12722948 return true;
2949
+
2950
+ case BPF_PROG_TYPE_CGROUP_SOCKOPT:
2951
+ if (t == BPF_WRITE)
2952
+ env->seen_direct_write = true;
2953
+
2954
+ return true;
2955
+
12732956 default:
12742957 return false;
12752958 }
1276
-}
1277
-
1278
-static int __check_packet_access(struct bpf_verifier_env *env, u32 regno,
1279
- int off, int size, bool zero_size_allowed)
1280
-{
1281
- struct bpf_reg_state *regs = cur_regs(env);
1282
- struct bpf_reg_state *reg = &regs[regno];
1283
-
1284
- if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) ||
1285
- (u64)off + size > reg->range) {
1286
- verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
1287
- off, size, regno, reg->id, reg->off, reg->range);
1288
- return -EACCES;
1289
- }
1290
- return 0;
12912959 }
12922960
12932961 static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
....@@ -1310,20 +2978,36 @@
13102978 regno);
13112979 return -EACCES;
13122980 }
1313
- err = __check_packet_access(env, regno, off, size, zero_size_allowed);
2981
+
2982
+ err = reg->range < 0 ? -EINVAL :
2983
+ __check_mem_access(env, regno, off, size, reg->range,
2984
+ zero_size_allowed);
13142985 if (err) {
13152986 verbose(env, "R%d offset is outside of the packet\n", regno);
13162987 return err;
13172988 }
2989
+
2990
+ /* __check_mem_access has made sure "off + size - 1" is within u16.
2991
+ * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
2992
+ * otherwise find_good_pkt_pointers would have refused to set range info
2993
+ * that __check_mem_access would have rejected this pkt access.
2994
+ * Therefore, "off + reg->umax_value + size - 1" won't overflow u32.
2995
+ */
2996
+ env->prog->aux->max_pkt_offset =
2997
+ max_t(u32, env->prog->aux->max_pkt_offset,
2998
+ off + reg->umax_value + size - 1);
2999
+
13183000 return err;
13193001 }
13203002
13213003 /* check access to 'struct bpf_context' fields. Supports fixed offsets only */
13223004 static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
1323
- enum bpf_access_type t, enum bpf_reg_type *reg_type)
3005
+ enum bpf_access_type t, enum bpf_reg_type *reg_type,
3006
+ u32 *btf_id)
13243007 {
13253008 struct bpf_insn_access_aux info = {
13263009 .reg_type = *reg_type,
3010
+ .log = &env->log,
13273011 };
13283012
13293013 if (env->ops->is_valid_access &&
....@@ -1337,7 +3021,10 @@
13373021 */
13383022 *reg_type = info.reg_type;
13393023
1340
- env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
3024
+ if (*reg_type == PTR_TO_BTF_ID || *reg_type == PTR_TO_BTF_ID_OR_NULL)
3025
+ *btf_id = info.btf_id;
3026
+ else
3027
+ env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
13413028 /* remember the offset of last byte accessed in ctx */
13423029 if (env->prog->aux->max_ctx_offset < off + size)
13433030 env->prog->aux->max_ctx_offset = off + size;
....@@ -1348,32 +3035,95 @@
13483035 return -EACCES;
13493036 }
13503037
1351
-static bool __is_pointer_value(bool allow_ptr_leaks,
1352
- const struct bpf_reg_state *reg)
3038
+static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
3039
+ int size)
13533040 {
1354
- if (allow_ptr_leaks)
1355
- return false;
3041
+ if (size < 0 || off < 0 ||
3042
+ (u64)off + size > sizeof(struct bpf_flow_keys)) {
3043
+ verbose(env, "invalid access to flow keys off=%d size=%d\n",
3044
+ off, size);
3045
+ return -EACCES;
3046
+ }
3047
+ return 0;
3048
+}
13563049
1357
- return reg->type != SCALAR_VALUE;
3050
+static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
3051
+ u32 regno, int off, int size,
3052
+ enum bpf_access_type t)
3053
+{
3054
+ struct bpf_reg_state *regs = cur_regs(env);
3055
+ struct bpf_reg_state *reg = &regs[regno];
3056
+ struct bpf_insn_access_aux info = {};
3057
+ bool valid;
3058
+
3059
+ if (reg->smin_value < 0) {
3060
+ verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
3061
+ regno);
3062
+ return -EACCES;
3063
+ }
3064
+
3065
+ switch (reg->type) {
3066
+ case PTR_TO_SOCK_COMMON:
3067
+ valid = bpf_sock_common_is_valid_access(off, size, t, &info);
3068
+ break;
3069
+ case PTR_TO_SOCKET:
3070
+ valid = bpf_sock_is_valid_access(off, size, t, &info);
3071
+ break;
3072
+ case PTR_TO_TCP_SOCK:
3073
+ valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
3074
+ break;
3075
+ case PTR_TO_XDP_SOCK:
3076
+ valid = bpf_xdp_sock_is_valid_access(off, size, t, &info);
3077
+ break;
3078
+ default:
3079
+ valid = false;
3080
+ }
3081
+
3082
+
3083
+ if (valid) {
3084
+ env->insn_aux_data[insn_idx].ctx_field_size =
3085
+ info.ctx_field_size;
3086
+ return 0;
3087
+ }
3088
+
3089
+ verbose(env, "R%d invalid %s access off=%d size=%d\n",
3090
+ regno, reg_type_str[reg->type], off, size);
3091
+
3092
+ return -EACCES;
13583093 }
13593094
13603095 static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
13613096 {
1362
- return __is_pointer_value(env->allow_ptr_leaks, cur_regs(env) + regno);
3097
+ return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno));
13633098 }
13643099
13653100 static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
13663101 {
1367
- const struct bpf_reg_state *reg = cur_regs(env) + regno;
3102
+ const struct bpf_reg_state *reg = reg_state(env, regno);
13683103
13693104 return reg->type == PTR_TO_CTX;
13703105 }
13713106
3107
+static bool is_sk_reg(struct bpf_verifier_env *env, int regno)
3108
+{
3109
+ const struct bpf_reg_state *reg = reg_state(env, regno);
3110
+
3111
+ return type_is_sk_pointer(reg->type);
3112
+}
3113
+
13723114 static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
13733115 {
1374
- const struct bpf_reg_state *reg = cur_regs(env) + regno;
3116
+ const struct bpf_reg_state *reg = reg_state(env, regno);
13753117
13763118 return type_is_pkt_pointer(reg->type);
3119
+}
3120
+
3121
+static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
3122
+{
3123
+ const struct bpf_reg_state *reg = reg_state(env, regno);
3124
+
3125
+ /* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */
3126
+ return reg->type == PTR_TO_FLOW_KEYS;
13773127 }
13783128
13793129 static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
....@@ -1449,6 +3199,9 @@
14493199 * right in front, treat it the very same way.
14503200 */
14513201 return check_pkt_ptr_alignment(env, reg, off, size, strict);
3202
+ case PTR_TO_FLOW_KEYS:
3203
+ pointer_desc = "flow keys ";
3204
+ break;
14523205 case PTR_TO_MAP_VALUE:
14533206 pointer_desc = "value ";
14543207 break;
....@@ -1457,11 +3210,23 @@
14573210 break;
14583211 case PTR_TO_STACK:
14593212 pointer_desc = "stack ";
1460
- /* The stack spill tracking logic in check_stack_write()
1461
- * and check_stack_read() relies on stack accesses being
3213
+ /* The stack spill tracking logic in check_stack_write_fixed_off()
3214
+ * and check_stack_read_fixed_off() relies on stack accesses being
14623215 * aligned.
14633216 */
14643217 strict = true;
3218
+ break;
3219
+ case PTR_TO_SOCKET:
3220
+ pointer_desc = "sock ";
3221
+ break;
3222
+ case PTR_TO_SOCK_COMMON:
3223
+ pointer_desc = "sock_common ";
3224
+ break;
3225
+ case PTR_TO_TCP_SOCK:
3226
+ pointer_desc = "tcp_sock ";
3227
+ break;
3228
+ case PTR_TO_XDP_SOCK:
3229
+ pointer_desc = "xdp_sock ";
14653230 break;
14663231 default:
14673232 break;
....@@ -1495,10 +3260,37 @@
14953260 int depth = 0, frame = 0, idx = 0, i = 0, subprog_end;
14963261 struct bpf_subprog_info *subprog = env->subprog_info;
14973262 struct bpf_insn *insn = env->prog->insnsi;
3263
+ bool tail_call_reachable = false;
14983264 int ret_insn[MAX_CALL_FRAMES];
14993265 int ret_prog[MAX_CALL_FRAMES];
3266
+ int j;
15003267
15013268 process_func:
3269
+ /* protect against potential stack overflow that might happen when
3270
+ * bpf2bpf calls get combined with tailcalls. Limit the caller's stack
3271
+ * depth for such case down to 256 so that the worst case scenario
3272
+ * would result in 8k stack size (32 which is tailcall limit * 256 =
3273
+ * 8k).
3274
+ *
3275
+ * To get the idea what might happen, see an example:
3276
+ * func1 -> sub rsp, 128
3277
+ * subfunc1 -> sub rsp, 256
3278
+ * tailcall1 -> add rsp, 256
3279
+ * func2 -> sub rsp, 192 (total stack size = 128 + 192 = 320)
3280
+ * subfunc2 -> sub rsp, 64
3281
+ * subfunc22 -> sub rsp, 128
3282
+ * tailcall2 -> add rsp, 128
3283
+ * func3 -> sub rsp, 32 (total stack size 128 + 192 + 64 + 32 = 416)
3284
+ *
3285
+ * tailcall will unwind the current stack frame but it will not get rid
3286
+ * of caller's stack as shown on the example above.
3287
+ */
3288
+ if (idx && subprog[idx].has_tail_call && depth >= 256) {
3289
+ verbose(env,
3290
+ "tail_calls are not allowed when call stack of previous frames is %d bytes. Too large\n",
3291
+ depth);
3292
+ return -EACCES;
3293
+ }
15023294 /* round up to 32-bytes, since this is granularity
15033295 * of interpreter stack size
15043296 */
....@@ -1527,13 +3319,29 @@
15273319 i);
15283320 return -EFAULT;
15293321 }
3322
+
3323
+ if (subprog[idx].has_tail_call)
3324
+ tail_call_reachable = true;
3325
+
15303326 frame++;
15313327 if (frame >= MAX_CALL_FRAMES) {
1532
- WARN_ONCE(1, "verifier bug. Call stack is too deep\n");
1533
- return -EFAULT;
3328
+ verbose(env, "the call stack of %d frames is too deep !\n",
3329
+ frame);
3330
+ return -E2BIG;
15343331 }
15353332 goto process_func;
15363333 }
3334
+ /* if tail call got detected across bpf2bpf calls then mark each of the
3335
+ * currently present subprog frames as tail call reachable subprogs;
3336
+ * this info will be utilized by JIT so that we will be preserving the
3337
+ * tail call counter throughout bpf2bpf calls combined with tailcalls
3338
+ */
3339
+ if (tail_call_reachable)
3340
+ for (j = 0; j < frame; j++)
3341
+ subprog[ret_prog[j]].tail_call_reachable = true;
3342
+ if (subprog[0].tail_call_reachable)
3343
+ env->prog->aux->tail_call_reachable = true;
3344
+
15373345 /* end of for() loop means the last insn of the 'subprog'
15383346 * was reached. Doesn't matter whether it was JA or EXIT
15393347 */
....@@ -1562,8 +3370,8 @@
15623370 }
15633371 #endif
15643372
1565
-static int check_ctx_reg(struct bpf_verifier_env *env,
1566
- const struct bpf_reg_state *reg, int regno)
3373
+int check_ctx_reg(struct bpf_verifier_env *env,
3374
+ const struct bpf_reg_state *reg, int regno)
15673375 {
15683376 /* Access to ctx or passing it to a helper is only allowed in
15693377 * its original, unmodified form.
....@@ -1584,6 +3392,72 @@
15843392 }
15853393
15863394 return 0;
3395
+}
3396
+
3397
+static int __check_buffer_access(struct bpf_verifier_env *env,
3398
+ const char *buf_info,
3399
+ const struct bpf_reg_state *reg,
3400
+ int regno, int off, int size)
3401
+{
3402
+ if (off < 0) {
3403
+ verbose(env,
3404
+ "R%d invalid %s buffer access: off=%d, size=%d\n",
3405
+ regno, buf_info, off, size);
3406
+ return -EACCES;
3407
+ }
3408
+ if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
3409
+ char tn_buf[48];
3410
+
3411
+ tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3412
+ verbose(env,
3413
+ "R%d invalid variable buffer offset: off=%d, var_off=%s\n",
3414
+ regno, off, tn_buf);
3415
+ return -EACCES;
3416
+ }
3417
+
3418
+ return 0;
3419
+}
3420
+
3421
+static int check_tp_buffer_access(struct bpf_verifier_env *env,
3422
+ const struct bpf_reg_state *reg,
3423
+ int regno, int off, int size)
3424
+{
3425
+ int err;
3426
+
3427
+ err = __check_buffer_access(env, "tracepoint", reg, regno, off, size);
3428
+ if (err)
3429
+ return err;
3430
+
3431
+ if (off + size > env->prog->aux->max_tp_access)
3432
+ env->prog->aux->max_tp_access = off + size;
3433
+
3434
+ return 0;
3435
+}
3436
+
3437
+static int check_buffer_access(struct bpf_verifier_env *env,
3438
+ const struct bpf_reg_state *reg,
3439
+ int regno, int off, int size,
3440
+ bool zero_size_allowed,
3441
+ const char *buf_info,
3442
+ u32 *max_access)
3443
+{
3444
+ int err;
3445
+
3446
+ err = __check_buffer_access(env, buf_info, reg, regno, off, size);
3447
+ if (err)
3448
+ return err;
3449
+
3450
+ if (off + size > *max_access)
3451
+ *max_access = off + size;
3452
+
3453
+ return 0;
3454
+}
3455
+
3456
+/* BPF architecture zero extends alu32 ops into 64-bit registesr */
3457
+static void zext_32_to_64(struct bpf_reg_state *reg)
3458
+{
3459
+ reg->var_off = tnum_subreg(reg->var_off);
3460
+ __reg_assign_32_into_64(reg);
15873461 }
15883462
15893463 /* truncate register to smaller size (in bytes)
....@@ -1607,6 +3481,255 @@
16073481 }
16083482 reg->smin_value = reg->umin_value;
16093483 reg->smax_value = reg->umax_value;
3484
+
3485
+ /* If size is smaller than 32bit register the 32bit register
3486
+ * values are also truncated so we push 64-bit bounds into
3487
+ * 32-bit bounds. Above were truncated < 32-bits already.
3488
+ */
3489
+ if (size >= 4)
3490
+ return;
3491
+ __reg_combine_64_into_32(reg);
3492
+}
3493
+
3494
+static bool bpf_map_is_rdonly(const struct bpf_map *map)
3495
+{
3496
+ /* A map is considered read-only if the following condition are true:
3497
+ *
3498
+ * 1) BPF program side cannot change any of the map content. The
3499
+ * BPF_F_RDONLY_PROG flag is throughout the lifetime of a map
3500
+ * and was set at map creation time.
3501
+ * 2) The map value(s) have been initialized from user space by a
3502
+ * loader and then "frozen", such that no new map update/delete
3503
+ * operations from syscall side are possible for the rest of
3504
+ * the map's lifetime from that point onwards.
3505
+ * 3) Any parallel/pending map update/delete operations from syscall
3506
+ * side have been completed. Only after that point, it's safe to
3507
+ * assume that map value(s) are immutable.
3508
+ */
3509
+ return (map->map_flags & BPF_F_RDONLY_PROG) &&
3510
+ READ_ONCE(map->frozen) &&
3511
+ !bpf_map_write_active(map);
3512
+}
3513
+
3514
+static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val)
3515
+{
3516
+ void *ptr;
3517
+ u64 addr;
3518
+ int err;
3519
+
3520
+ err = map->ops->map_direct_value_addr(map, &addr, off);
3521
+ if (err)
3522
+ return err;
3523
+ ptr = (void *)(long)addr + off;
3524
+
3525
+ switch (size) {
3526
+ case sizeof(u8):
3527
+ *val = (u64)*(u8 *)ptr;
3528
+ break;
3529
+ case sizeof(u16):
3530
+ *val = (u64)*(u16 *)ptr;
3531
+ break;
3532
+ case sizeof(u32):
3533
+ *val = (u64)*(u32 *)ptr;
3534
+ break;
3535
+ case sizeof(u64):
3536
+ *val = *(u64 *)ptr;
3537
+ break;
3538
+ default:
3539
+ return -EINVAL;
3540
+ }
3541
+ return 0;
3542
+}
3543
+
3544
+static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
3545
+ struct bpf_reg_state *regs,
3546
+ int regno, int off, int size,
3547
+ enum bpf_access_type atype,
3548
+ int value_regno)
3549
+{
3550
+ struct bpf_reg_state *reg = regs + regno;
3551
+ const struct btf_type *t = btf_type_by_id(btf_vmlinux, reg->btf_id);
3552
+ const char *tname = btf_name_by_offset(btf_vmlinux, t->name_off);
3553
+ u32 btf_id;
3554
+ int ret;
3555
+
3556
+ if (off < 0) {
3557
+ verbose(env,
3558
+ "R%d is ptr_%s invalid negative access: off=%d\n",
3559
+ regno, tname, off);
3560
+ return -EACCES;
3561
+ }
3562
+ if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
3563
+ char tn_buf[48];
3564
+
3565
+ tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3566
+ verbose(env,
3567
+ "R%d is ptr_%s invalid variable offset: off=%d, var_off=%s\n",
3568
+ regno, tname, off, tn_buf);
3569
+ return -EACCES;
3570
+ }
3571
+
3572
+ if (env->ops->btf_struct_access) {
3573
+ ret = env->ops->btf_struct_access(&env->log, t, off, size,
3574
+ atype, &btf_id);
3575
+ } else {
3576
+ if (atype != BPF_READ) {
3577
+ verbose(env, "only read is supported\n");
3578
+ return -EACCES;
3579
+ }
3580
+
3581
+ ret = btf_struct_access(&env->log, t, off, size, atype,
3582
+ &btf_id);
3583
+ }
3584
+
3585
+ if (ret < 0)
3586
+ return ret;
3587
+
3588
+ if (atype == BPF_READ && value_regno >= 0)
3589
+ mark_btf_ld_reg(env, regs, value_regno, ret, btf_id);
3590
+
3591
+ return 0;
3592
+}
3593
+
3594
+static int check_ptr_to_map_access(struct bpf_verifier_env *env,
3595
+ struct bpf_reg_state *regs,
3596
+ int regno, int off, int size,
3597
+ enum bpf_access_type atype,
3598
+ int value_regno)
3599
+{
3600
+ struct bpf_reg_state *reg = regs + regno;
3601
+ struct bpf_map *map = reg->map_ptr;
3602
+ const struct btf_type *t;
3603
+ const char *tname;
3604
+ u32 btf_id;
3605
+ int ret;
3606
+
3607
+ if (!btf_vmlinux) {
3608
+ verbose(env, "map_ptr access not supported without CONFIG_DEBUG_INFO_BTF\n");
3609
+ return -ENOTSUPP;
3610
+ }
3611
+
3612
+ if (!map->ops->map_btf_id || !*map->ops->map_btf_id) {
3613
+ verbose(env, "map_ptr access not supported for map type %d\n",
3614
+ map->map_type);
3615
+ return -ENOTSUPP;
3616
+ }
3617
+
3618
+ t = btf_type_by_id(btf_vmlinux, *map->ops->map_btf_id);
3619
+ tname = btf_name_by_offset(btf_vmlinux, t->name_off);
3620
+
3621
+ if (!env->allow_ptr_to_map_access) {
3622
+ verbose(env,
3623
+ "%s access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
3624
+ tname);
3625
+ return -EPERM;
3626
+ }
3627
+
3628
+ if (off < 0) {
3629
+ verbose(env, "R%d is %s invalid negative access: off=%d\n",
3630
+ regno, tname, off);
3631
+ return -EACCES;
3632
+ }
3633
+
3634
+ if (atype != BPF_READ) {
3635
+ verbose(env, "only read from %s is supported\n", tname);
3636
+ return -EACCES;
3637
+ }
3638
+
3639
+ ret = btf_struct_access(&env->log, t, off, size, atype, &btf_id);
3640
+ if (ret < 0)
3641
+ return ret;
3642
+
3643
+ if (value_regno >= 0)
3644
+ mark_btf_ld_reg(env, regs, value_regno, ret, btf_id);
3645
+
3646
+ return 0;
3647
+}
3648
+
3649
+/* Check that the stack access at the given offset is within bounds. The
3650
+ * maximum valid offset is -1.
3651
+ *
3652
+ * The minimum valid offset is -MAX_BPF_STACK for writes, and
3653
+ * -state->allocated_stack for reads.
3654
+ */
3655
+static int check_stack_slot_within_bounds(int off,
3656
+ struct bpf_func_state *state,
3657
+ enum bpf_access_type t)
3658
+{
3659
+ int min_valid_off;
3660
+
3661
+ if (t == BPF_WRITE)
3662
+ min_valid_off = -MAX_BPF_STACK;
3663
+ else
3664
+ min_valid_off = -state->allocated_stack;
3665
+
3666
+ if (off < min_valid_off || off > -1)
3667
+ return -EACCES;
3668
+ return 0;
3669
+}
3670
+
3671
+/* Check that the stack access at 'regno + off' falls within the maximum stack
3672
+ * bounds.
3673
+ *
3674
+ * 'off' includes `regno->offset`, but not its dynamic part (if any).
3675
+ */
3676
+static int check_stack_access_within_bounds(
3677
+ struct bpf_verifier_env *env,
3678
+ int regno, int off, int access_size,
3679
+ enum stack_access_src src, enum bpf_access_type type)
3680
+{
3681
+ struct bpf_reg_state *regs = cur_regs(env);
3682
+ struct bpf_reg_state *reg = regs + regno;
3683
+ struct bpf_func_state *state = func(env, reg);
3684
+ int min_off, max_off;
3685
+ int err;
3686
+ char *err_extra;
3687
+
3688
+ if (src == ACCESS_HELPER)
3689
+ /* We don't know if helpers are reading or writing (or both). */
3690
+ err_extra = " indirect access to";
3691
+ else if (type == BPF_READ)
3692
+ err_extra = " read from";
3693
+ else
3694
+ err_extra = " write to";
3695
+
3696
+ if (tnum_is_const(reg->var_off)) {
3697
+ min_off = reg->var_off.value + off;
3698
+ if (access_size > 0)
3699
+ max_off = min_off + access_size - 1;
3700
+ else
3701
+ max_off = min_off;
3702
+ } else {
3703
+ if (reg->smax_value >= BPF_MAX_VAR_OFF ||
3704
+ reg->smin_value <= -BPF_MAX_VAR_OFF) {
3705
+ verbose(env, "invalid unbounded variable-offset%s stack R%d\n",
3706
+ err_extra, regno);
3707
+ return -EACCES;
3708
+ }
3709
+ min_off = reg->smin_value + off;
3710
+ if (access_size > 0)
3711
+ max_off = reg->smax_value + off + access_size - 1;
3712
+ else
3713
+ max_off = min_off;
3714
+ }
3715
+
3716
+ err = check_stack_slot_within_bounds(min_off, state, type);
3717
+ if (!err)
3718
+ err = check_stack_slot_within_bounds(max_off, state, type);
3719
+
3720
+ if (err) {
3721
+ if (tnum_is_const(reg->var_off)) {
3722
+ verbose(env, "invalid%s stack R%d off=%d size=%d\n",
3723
+ err_extra, regno, off, access_size);
3724
+ } else {
3725
+ char tn_buf[48];
3726
+
3727
+ tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3728
+ verbose(env, "invalid variable-offset%s stack R%d var_off=%s size=%d\n",
3729
+ err_extra, regno, tn_buf, access_size);
3730
+ }
3731
+ }
3732
+ return err;
16103733 }
16113734
16123735 /* check whether memory at (regno + off) is accessible for t = (read | write)
....@@ -1642,13 +3765,44 @@
16423765 verbose(env, "R%d leaks addr into map\n", value_regno);
16433766 return -EACCES;
16443767 }
1645
-
3768
+ err = check_map_access_type(env, regno, off, size, t);
3769
+ if (err)
3770
+ return err;
16463771 err = check_map_access(env, regno, off, size, false);
3772
+ if (!err && t == BPF_READ && value_regno >= 0) {
3773
+ struct bpf_map *map = reg->map_ptr;
3774
+
3775
+ /* if map is read-only, track its contents as scalars */
3776
+ if (tnum_is_const(reg->var_off) &&
3777
+ bpf_map_is_rdonly(map) &&
3778
+ map->ops->map_direct_value_addr) {
3779
+ int map_off = off + reg->var_off.value;
3780
+ u64 val = 0;
3781
+
3782
+ err = bpf_map_direct_read(map, map_off, size,
3783
+ &val);
3784
+ if (err)
3785
+ return err;
3786
+
3787
+ regs[value_regno].type = SCALAR_VALUE;
3788
+ __mark_reg_known(&regs[value_regno], val);
3789
+ } else {
3790
+ mark_reg_unknown(env, regs, value_regno);
3791
+ }
3792
+ }
3793
+ } else if (reg->type == PTR_TO_MEM) {
3794
+ if (t == BPF_WRITE && value_regno >= 0 &&
3795
+ is_pointer_value(env, value_regno)) {
3796
+ verbose(env, "R%d leaks addr into mem\n", value_regno);
3797
+ return -EACCES;
3798
+ }
3799
+ err = check_mem_region_access(env, regno, off, size,
3800
+ reg->mem_size, false);
16473801 if (!err && t == BPF_READ && value_regno >= 0)
16483802 mark_reg_unknown(env, regs, value_regno);
1649
-
16503803 } else if (reg->type == PTR_TO_CTX) {
16513804 enum bpf_reg_type reg_type = SCALAR_VALUE;
3805
+ u32 btf_id = 0;
16523806
16533807 if (t == BPF_WRITE && value_regno >= 0 &&
16543808 is_pointer_value(env, value_regno)) {
....@@ -1660,23 +3814,37 @@
16603814 if (err < 0)
16613815 return err;
16623816
1663
- err = check_ctx_access(env, insn_idx, off, size, t, &reg_type);
3817
+ err = check_ctx_access(env, insn_idx, off, size, t, &reg_type, &btf_id);
3818
+ if (err)
3819
+ verbose_linfo(env, insn_idx, "; ");
16643820 if (!err && t == BPF_READ && value_regno >= 0) {
16653821 /* ctx access returns either a scalar, or a
16663822 * PTR_TO_PACKET[_META,_END]. In the latter
16673823 * case, we know the offset is zero.
16683824 */
1669
- if (reg_type == SCALAR_VALUE)
3825
+ if (reg_type == SCALAR_VALUE) {
16703826 mark_reg_unknown(env, regs, value_regno);
1671
- else
3827
+ } else {
16723828 mark_reg_known_zero(env, regs,
16733829 value_regno);
3830
+ if (reg_type_may_be_null(reg_type))
3831
+ regs[value_regno].id = ++env->id_gen;
3832
+ /* A load of ctx field could have different
3833
+ * actual load size with the one encoded in the
3834
+ * insn. When the dst is PTR, it is for sure not
3835
+ * a sub-register.
3836
+ */
3837
+ regs[value_regno].subreg_def = DEF_NOT_SUBREG;
3838
+ if (reg_type == PTR_TO_BTF_ID ||
3839
+ reg_type == PTR_TO_BTF_ID_OR_NULL)
3840
+ regs[value_regno].btf_id = btf_id;
3841
+ }
16743842 regs[value_regno].type = reg_type;
16753843 }
16763844
16773845 } else if (reg->type == PTR_TO_STACK) {
1678
- off += reg->var_off.value;
1679
- err = check_stack_access(env, reg, off, size);
3846
+ /* Basic bounds checks. */
3847
+ err = check_stack_access_within_bounds(env, regno, off, size, ACCESS_DIRECT, t);
16803848 if (err)
16813849 return err;
16823850
....@@ -1685,12 +3853,12 @@
16853853 if (err)
16863854 return err;
16873855
1688
- if (t == BPF_WRITE)
1689
- err = check_stack_write(env, state, off, size,
1690
- value_regno, insn_idx);
1691
- else
1692
- err = check_stack_read(env, state, off, size,
3856
+ if (t == BPF_READ)
3857
+ err = check_stack_read(env, regno, off, size,
16933858 value_regno);
3859
+ else
3860
+ err = check_stack_write(env, regno, off, size,
3861
+ value_regno, insn_idx);
16943862 } else if (reg_is_pkt_pointer(reg)) {
16953863 if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
16963864 verbose(env, "cannot write into packet\n");
....@@ -1703,6 +3871,53 @@
17033871 return -EACCES;
17043872 }
17053873 err = check_packet_access(env, regno, off, size, false);
3874
+ if (!err && t == BPF_READ && value_regno >= 0)
3875
+ mark_reg_unknown(env, regs, value_regno);
3876
+ } else if (reg->type == PTR_TO_FLOW_KEYS) {
3877
+ if (t == BPF_WRITE && value_regno >= 0 &&
3878
+ is_pointer_value(env, value_regno)) {
3879
+ verbose(env, "R%d leaks addr into flow keys\n",
3880
+ value_regno);
3881
+ return -EACCES;
3882
+ }
3883
+
3884
+ err = check_flow_keys_access(env, off, size);
3885
+ if (!err && t == BPF_READ && value_regno >= 0)
3886
+ mark_reg_unknown(env, regs, value_regno);
3887
+ } else if (type_is_sk_pointer(reg->type)) {
3888
+ if (t == BPF_WRITE) {
3889
+ verbose(env, "R%d cannot write into %s\n",
3890
+ regno, reg_type_str[reg->type]);
3891
+ return -EACCES;
3892
+ }
3893
+ err = check_sock_access(env, insn_idx, regno, off, size, t);
3894
+ if (!err && value_regno >= 0)
3895
+ mark_reg_unknown(env, regs, value_regno);
3896
+ } else if (reg->type == PTR_TO_TP_BUFFER) {
3897
+ err = check_tp_buffer_access(env, reg, regno, off, size);
3898
+ if (!err && t == BPF_READ && value_regno >= 0)
3899
+ mark_reg_unknown(env, regs, value_regno);
3900
+ } else if (reg->type == PTR_TO_BTF_ID) {
3901
+ err = check_ptr_to_btf_access(env, regs, regno, off, size, t,
3902
+ value_regno);
3903
+ } else if (reg->type == CONST_PTR_TO_MAP) {
3904
+ err = check_ptr_to_map_access(env, regs, regno, off, size, t,
3905
+ value_regno);
3906
+ } else if (reg->type == PTR_TO_RDONLY_BUF) {
3907
+ if (t == BPF_WRITE) {
3908
+ verbose(env, "R%d cannot write into %s\n",
3909
+ regno, reg_type_str[reg->type]);
3910
+ return -EACCES;
3911
+ }
3912
+ err = check_buffer_access(env, reg, regno, off, size, false,
3913
+ "rdonly",
3914
+ &env->prog->aux->max_rdonly_access);
3915
+ if (!err && value_regno >= 0)
3916
+ mark_reg_unknown(env, regs, value_regno);
3917
+ } else if (reg->type == PTR_TO_RDWR_BUF) {
3918
+ err = check_buffer_access(env, reg, regno, off, size, false,
3919
+ "rdwr",
3920
+ &env->prog->aux->max_rdwr_access);
17063921 if (!err && t == BPF_READ && value_regno >= 0)
17073922 mark_reg_unknown(env, regs, value_regno);
17083923 } else {
....@@ -1745,10 +3960,12 @@
17453960 }
17463961
17473962 if (is_ctx_reg(env, insn->dst_reg) ||
1748
- is_pkt_reg(env, insn->dst_reg)) {
3963
+ is_pkt_reg(env, insn->dst_reg) ||
3964
+ is_flow_key_reg(env, insn->dst_reg) ||
3965
+ is_sk_reg(env, insn->dst_reg)) {
17493966 verbose(env, "BPF_XADD stores into R%d %s is not allowed\n",
1750
- insn->dst_reg, is_ctx_reg(env, insn->dst_reg) ?
1751
- "context" : "packet");
3967
+ insn->dst_reg,
3968
+ reg_type_str[reg_state(env, insn->dst_reg)->type]);
17523969 return -EACCES;
17533970 }
17543971
....@@ -1763,73 +3980,65 @@
17633980 BPF_SIZE(insn->code), BPF_WRITE, -1, true);
17643981 }
17653982
1766
-static int __check_stack_boundary(struct bpf_verifier_env *env, u32 regno,
1767
- int off, int access_size,
1768
- bool zero_size_allowed)
1769
-{
1770
- struct bpf_reg_state *reg = cur_regs(env) + regno;
1771
-
1772
- if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
1773
- access_size < 0 || (access_size == 0 && !zero_size_allowed)) {
1774
- if (tnum_is_const(reg->var_off)) {
1775
- verbose(env, "invalid stack type R%d off=%d access_size=%d\n",
1776
- regno, off, access_size);
1777
- } else {
1778
- char tn_buf[48];
1779
-
1780
- tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
1781
- verbose(env, "invalid stack type R%d var_off=%s access_size=%d\n",
1782
- regno, tn_buf, access_size);
1783
- }
1784
- return -EACCES;
1785
- }
1786
- return 0;
1787
-}
1788
-
1789
-/* when register 'regno' is passed into function that will read 'access_size'
1790
- * bytes from that pointer, make sure that it's within stack boundary
1791
- * and all elements of stack are initialized.
1792
- * Unlike most pointer bounds-checking functions, this one doesn't take an
1793
- * 'off' argument, so it has to add in reg->off itself.
3983
+/* When register 'regno' is used to read the stack (either directly or through
3984
+ * a helper function) make sure that it's within stack boundary and, depending
3985
+ * on the access type, that all elements of the stack are initialized.
3986
+ *
3987
+ * 'off' includes 'regno->off', but not its dynamic part (if any).
3988
+ *
3989
+ * All registers that have been spilled on the stack in the slots within the
3990
+ * read offsets are marked as read.
17943991 */
1795
-static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
1796
- int access_size, bool zero_size_allowed,
1797
- struct bpf_call_arg_meta *meta)
3992
+static int check_stack_range_initialized(
3993
+ struct bpf_verifier_env *env, int regno, int off,
3994
+ int access_size, bool zero_size_allowed,
3995
+ enum stack_access_src type, struct bpf_call_arg_meta *meta)
17983996 {
1799
- struct bpf_reg_state *reg = cur_regs(env) + regno;
3997
+ struct bpf_reg_state *reg = reg_state(env, regno);
18003998 struct bpf_func_state *state = func(env, reg);
18013999 int err, min_off, max_off, i, j, slot, spi;
4000
+ char *err_extra = type == ACCESS_HELPER ? " indirect" : "";
4001
+ enum bpf_access_type bounds_check_type;
4002
+ /* Some accesses can write anything into the stack, others are
4003
+ * read-only.
4004
+ */
4005
+ bool clobber = false;
18024006
1803
- if (reg->type != PTR_TO_STACK) {
1804
- /* Allow zero-byte read from NULL, regardless of pointer type */
1805
- if (zero_size_allowed && access_size == 0 &&
1806
- register_is_null(reg))
1807
- return 0;
1808
-
1809
- verbose(env, "R%d type=%s expected=%s\n", regno,
1810
- reg_type_str[reg->type],
1811
- reg_type_str[PTR_TO_STACK]);
4007
+ if (access_size == 0 && !zero_size_allowed) {
4008
+ verbose(env, "invalid zero-sized read\n");
18124009 return -EACCES;
18134010 }
18144011
4012
+ if (type == ACCESS_HELPER) {
4013
+ /* The bounds checks for writes are more permissive than for
4014
+ * reads. However, if raw_mode is not set, we'll do extra
4015
+ * checks below.
4016
+ */
4017
+ bounds_check_type = BPF_WRITE;
4018
+ clobber = true;
4019
+ } else {
4020
+ bounds_check_type = BPF_READ;
4021
+ }
4022
+ err = check_stack_access_within_bounds(env, regno, off, access_size,
4023
+ type, bounds_check_type);
4024
+ if (err)
4025
+ return err;
4026
+
4027
+
18154028 if (tnum_is_const(reg->var_off)) {
1816
- min_off = max_off = reg->var_off.value + reg->off;
1817
- err = __check_stack_boundary(env, regno, min_off, access_size,
1818
- zero_size_allowed);
1819
- if (err)
1820
- return err;
4029
+ min_off = max_off = reg->var_off.value + off;
18214030 } else {
18224031 /* Variable offset is prohibited for unprivileged mode for
18234032 * simplicity since it requires corresponding support in
18244033 * Spectre masking for stack ALU.
18254034 * See also retrieve_ptr_limit().
18264035 */
1827
- if (!env->allow_ptr_leaks) {
4036
+ if (!env->bypass_spec_v1) {
18284037 char tn_buf[48];
18294038
18304039 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
1831
- verbose(env, "R%d indirect variable offset stack access prohibited for !root, var_off=%s\n",
1832
- regno, tn_buf);
4040
+ verbose(env, "R%d%s variable offset stack access prohibited for !root, var_off=%s\n",
4041
+ regno, err_extra, tn_buf);
18334042 return -EACCES;
18344043 }
18354044 /* Only initialized buffer on stack is allowed to be accessed
....@@ -1841,28 +4050,8 @@
18414050 if (meta && meta->raw_mode)
18424051 meta = NULL;
18434052
1844
- if (reg->smax_value >= BPF_MAX_VAR_OFF ||
1845
- reg->smax_value <= -BPF_MAX_VAR_OFF) {
1846
- verbose(env, "R%d unbounded indirect variable offset stack access\n",
1847
- regno);
1848
- return -EACCES;
1849
- }
1850
- min_off = reg->smin_value + reg->off;
1851
- max_off = reg->smax_value + reg->off;
1852
- err = __check_stack_boundary(env, regno, min_off, access_size,
1853
- zero_size_allowed);
1854
- if (err) {
1855
- verbose(env, "R%d min value is outside of stack bound\n",
1856
- regno);
1857
- return err;
1858
- }
1859
- err = __check_stack_boundary(env, regno, max_off, access_size,
1860
- zero_size_allowed);
1861
- if (err) {
1862
- verbose(env, "R%d max value is outside of stack bound\n",
1863
- regno);
1864
- return err;
1865
- }
4053
+ min_off = reg->smin_value + off;
4054
+ max_off = reg->smax_value + off;
18664055 }
18674056
18684057 if (meta && meta->raw_mode) {
....@@ -1882,28 +4071,38 @@
18824071 if (*stype == STACK_MISC)
18834072 goto mark;
18844073 if (*stype == STACK_ZERO) {
1885
- /* helper can write anything into the stack */
1886
- *stype = STACK_MISC;
4074
+ if (clobber) {
4075
+ /* helper can write anything into the stack */
4076
+ *stype = STACK_MISC;
4077
+ }
18874078 goto mark;
18884079 }
4080
+
18894081 if (state->stack[spi].slot_type[0] == STACK_SPILL &&
1890
- state->stack[spi].spilled_ptr.type == SCALAR_VALUE) {
1891
- __mark_reg_unknown(&state->stack[spi].spilled_ptr);
1892
- for (j = 0; j < BPF_REG_SIZE; j++)
1893
- state->stack[spi].slot_type[j] = STACK_MISC;
4082
+ state->stack[spi].spilled_ptr.type == PTR_TO_BTF_ID)
4083
+ goto mark;
4084
+
4085
+ if (state->stack[spi].slot_type[0] == STACK_SPILL &&
4086
+ (state->stack[spi].spilled_ptr.type == SCALAR_VALUE ||
4087
+ env->allow_ptr_leaks)) {
4088
+ if (clobber) {
4089
+ __mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
4090
+ for (j = 0; j < BPF_REG_SIZE; j++)
4091
+ state->stack[spi].slot_type[j] = STACK_MISC;
4092
+ }
18944093 goto mark;
18954094 }
18964095
18974096 err:
18984097 if (tnum_is_const(reg->var_off)) {
1899
- verbose(env, "invalid indirect read from stack off %d+%d size %d\n",
1900
- min_off, i - min_off, access_size);
4098
+ verbose(env, "invalid%s read from stack R%d off %d+%d size %d\n",
4099
+ err_extra, regno, min_off, i - min_off, access_size);
19014100 } else {
19024101 char tn_buf[48];
19034102
19044103 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
1905
- verbose(env, "invalid indirect read from stack var_off %s+%d size %d\n",
1906
- tn_buf, i - min_off, access_size);
4104
+ verbose(env, "invalid%s read from stack R%d var_off %s+%d size %d\n",
4105
+ err_extra, regno, tn_buf, i - min_off, access_size);
19074106 }
19084107 return -EACCES;
19094108 mark:
....@@ -1911,7 +4110,8 @@
19114110 * the whole slot to be marked as 'read'
19124111 */
19134112 mark_reg_read(env, &state->stack[spi].spilled_ptr,
1914
- state->stack[spi].spilled_ptr.parent);
4113
+ state->stack[spi].spilled_ptr.parent,
4114
+ REG_LIVE_READ64);
19154115 }
19164116 return update_stack_depth(env, state, min_off);
19174117 }
....@@ -1928,12 +4128,125 @@
19284128 return check_packet_access(env, regno, reg->off, access_size,
19294129 zero_size_allowed);
19304130 case PTR_TO_MAP_VALUE:
4131
+ if (check_map_access_type(env, regno, reg->off, access_size,
4132
+ meta && meta->raw_mode ? BPF_WRITE :
4133
+ BPF_READ))
4134
+ return -EACCES;
19314135 return check_map_access(env, regno, reg->off, access_size,
19324136 zero_size_allowed);
1933
- default: /* scalar_value|ptr_to_stack or invalid ptr */
1934
- return check_stack_boundary(env, regno, access_size,
1935
- zero_size_allowed, meta);
4137
+ case PTR_TO_MEM:
4138
+ return check_mem_region_access(env, regno, reg->off,
4139
+ access_size, reg->mem_size,
4140
+ zero_size_allowed);
4141
+ case PTR_TO_RDONLY_BUF:
4142
+ if (meta && meta->raw_mode)
4143
+ return -EACCES;
4144
+ return check_buffer_access(env, reg, regno, reg->off,
4145
+ access_size, zero_size_allowed,
4146
+ "rdonly",
4147
+ &env->prog->aux->max_rdonly_access);
4148
+ case PTR_TO_RDWR_BUF:
4149
+ return check_buffer_access(env, reg, regno, reg->off,
4150
+ access_size, zero_size_allowed,
4151
+ "rdwr",
4152
+ &env->prog->aux->max_rdwr_access);
4153
+ case PTR_TO_STACK:
4154
+ return check_stack_range_initialized(
4155
+ env,
4156
+ regno, reg->off, access_size,
4157
+ zero_size_allowed, ACCESS_HELPER, meta);
4158
+ default: /* scalar_value or invalid ptr */
4159
+ /* Allow zero-byte read from NULL, regardless of pointer type */
4160
+ if (zero_size_allowed && access_size == 0 &&
4161
+ register_is_null(reg))
4162
+ return 0;
4163
+
4164
+ verbose(env, "R%d type=%s expected=%s\n", regno,
4165
+ reg_type_str[reg->type],
4166
+ reg_type_str[PTR_TO_STACK]);
4167
+ return -EACCES;
19364168 }
4169
+}
4170
+
4171
+/* Implementation details:
4172
+ * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL
4173
+ * Two bpf_map_lookups (even with the same key) will have different reg->id.
4174
+ * For traditional PTR_TO_MAP_VALUE the verifier clears reg->id after
4175
+ * value_or_null->value transition, since the verifier only cares about
4176
+ * the range of access to valid map value pointer and doesn't care about actual
4177
+ * address of the map element.
4178
+ * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps
4179
+ * reg->id > 0 after value_or_null->value transition. By doing so
4180
+ * two bpf_map_lookups will be considered two different pointers that
4181
+ * point to different bpf_spin_locks.
4182
+ * The verifier allows taking only one bpf_spin_lock at a time to avoid
4183
+ * dead-locks.
4184
+ * Since only one bpf_spin_lock is allowed the checks are simpler than
4185
+ * reg_is_refcounted() logic. The verifier needs to remember only
4186
+ * one spin_lock instead of array of acquired_refs.
4187
+ * cur_state->active_spin_lock remembers which map value element got locked
4188
+ * and clears it after bpf_spin_unlock.
4189
+ */
4190
+static int process_spin_lock(struct bpf_verifier_env *env, int regno,
4191
+ bool is_lock)
4192
+{
4193
+ struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
4194
+ struct bpf_verifier_state *cur = env->cur_state;
4195
+ bool is_const = tnum_is_const(reg->var_off);
4196
+ struct bpf_map *map = reg->map_ptr;
4197
+ u64 val = reg->var_off.value;
4198
+
4199
+ if (!is_const) {
4200
+ verbose(env,
4201
+ "R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n",
4202
+ regno);
4203
+ return -EINVAL;
4204
+ }
4205
+ if (!map->btf) {
4206
+ verbose(env,
4207
+ "map '%s' has to have BTF in order to use bpf_spin_lock\n",
4208
+ map->name);
4209
+ return -EINVAL;
4210
+ }
4211
+ if (!map_value_has_spin_lock(map)) {
4212
+ if (map->spin_lock_off == -E2BIG)
4213
+ verbose(env,
4214
+ "map '%s' has more than one 'struct bpf_spin_lock'\n",
4215
+ map->name);
4216
+ else if (map->spin_lock_off == -ENOENT)
4217
+ verbose(env,
4218
+ "map '%s' doesn't have 'struct bpf_spin_lock'\n",
4219
+ map->name);
4220
+ else
4221
+ verbose(env,
4222
+ "map '%s' is not a struct type or bpf_spin_lock is mangled\n",
4223
+ map->name);
4224
+ return -EINVAL;
4225
+ }
4226
+ if (map->spin_lock_off != val + reg->off) {
4227
+ verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock'\n",
4228
+ val + reg->off);
4229
+ return -EINVAL;
4230
+ }
4231
+ if (is_lock) {
4232
+ if (cur->active_spin_lock) {
4233
+ verbose(env,
4234
+ "Locking two bpf_spin_locks are not allowed\n");
4235
+ return -EINVAL;
4236
+ }
4237
+ cur->active_spin_lock = reg->id;
4238
+ } else {
4239
+ if (!cur->active_spin_lock) {
4240
+ verbose(env, "bpf_spin_unlock without taking a lock\n");
4241
+ return -EINVAL;
4242
+ }
4243
+ if (cur->active_spin_lock != reg->id) {
4244
+ verbose(env, "bpf_spin_unlock of different lock\n");
4245
+ return -EINVAL;
4246
+ }
4247
+ cur->active_spin_lock = 0;
4248
+ }
4249
+ return 0;
19374250 }
19384251
19394252 static bool arg_type_is_mem_ptr(enum bpf_arg_type type)
....@@ -1949,12 +4262,215 @@
19494262 type == ARG_CONST_SIZE_OR_ZERO;
19504263 }
19514264
1952
-static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
4265
+static bool arg_type_is_alloc_size(enum bpf_arg_type type)
4266
+{
4267
+ return type == ARG_CONST_ALLOC_SIZE_OR_ZERO;
4268
+}
4269
+
4270
+static bool arg_type_is_int_ptr(enum bpf_arg_type type)
4271
+{
4272
+ return type == ARG_PTR_TO_INT ||
4273
+ type == ARG_PTR_TO_LONG;
4274
+}
4275
+
4276
+static int int_ptr_type_to_size(enum bpf_arg_type type)
4277
+{
4278
+ if (type == ARG_PTR_TO_INT)
4279
+ return sizeof(u32);
4280
+ else if (type == ARG_PTR_TO_LONG)
4281
+ return sizeof(u64);
4282
+
4283
+ return -EINVAL;
4284
+}
4285
+
4286
+static int resolve_map_arg_type(struct bpf_verifier_env *env,
4287
+ const struct bpf_call_arg_meta *meta,
4288
+ enum bpf_arg_type *arg_type)
4289
+{
4290
+ if (!meta->map_ptr) {
4291
+ /* kernel subsystem misconfigured verifier */
4292
+ verbose(env, "invalid map_ptr to access map->type\n");
4293
+ return -EACCES;
4294
+ }
4295
+
4296
+ switch (meta->map_ptr->map_type) {
4297
+ case BPF_MAP_TYPE_SOCKMAP:
4298
+ case BPF_MAP_TYPE_SOCKHASH:
4299
+ if (*arg_type == ARG_PTR_TO_MAP_VALUE) {
4300
+ *arg_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON;
4301
+ } else {
4302
+ verbose(env, "invalid arg_type for sockmap/sockhash\n");
4303
+ return -EINVAL;
4304
+ }
4305
+ break;
4306
+
4307
+ default:
4308
+ break;
4309
+ }
4310
+ return 0;
4311
+}
4312
+
4313
+struct bpf_reg_types {
4314
+ const enum bpf_reg_type types[10];
4315
+ u32 *btf_id;
4316
+};
4317
+
4318
+static const struct bpf_reg_types map_key_value_types = {
4319
+ .types = {
4320
+ PTR_TO_STACK,
4321
+ PTR_TO_PACKET,
4322
+ PTR_TO_PACKET_META,
4323
+ PTR_TO_MAP_VALUE,
4324
+ },
4325
+};
4326
+
4327
+static const struct bpf_reg_types sock_types = {
4328
+ .types = {
4329
+ PTR_TO_SOCK_COMMON,
4330
+ PTR_TO_SOCKET,
4331
+ PTR_TO_TCP_SOCK,
4332
+ PTR_TO_XDP_SOCK,
4333
+ },
4334
+};
4335
+
4336
+#ifdef CONFIG_NET
4337
+static const struct bpf_reg_types btf_id_sock_common_types = {
4338
+ .types = {
4339
+ PTR_TO_SOCK_COMMON,
4340
+ PTR_TO_SOCKET,
4341
+ PTR_TO_TCP_SOCK,
4342
+ PTR_TO_XDP_SOCK,
4343
+ PTR_TO_BTF_ID,
4344
+ },
4345
+ .btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
4346
+};
4347
+#endif
4348
+
4349
+static const struct bpf_reg_types mem_types = {
4350
+ .types = {
4351
+ PTR_TO_STACK,
4352
+ PTR_TO_PACKET,
4353
+ PTR_TO_PACKET_META,
4354
+ PTR_TO_MAP_VALUE,
4355
+ PTR_TO_MEM,
4356
+ PTR_TO_RDONLY_BUF,
4357
+ PTR_TO_RDWR_BUF,
4358
+ },
4359
+};
4360
+
4361
+static const struct bpf_reg_types int_ptr_types = {
4362
+ .types = {
4363
+ PTR_TO_STACK,
4364
+ PTR_TO_PACKET,
4365
+ PTR_TO_PACKET_META,
4366
+ PTR_TO_MAP_VALUE,
4367
+ },
4368
+};
4369
+
4370
+static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } };
4371
+static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } };
4372
+static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } };
4373
+static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM } };
4374
+static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
4375
+static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } };
4376
+static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } };
4377
+static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_PERCPU_BTF_ID } };
4378
+
4379
+static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
4380
+ [ARG_PTR_TO_MAP_KEY] = &map_key_value_types,
4381
+ [ARG_PTR_TO_MAP_VALUE] = &map_key_value_types,
4382
+ [ARG_PTR_TO_UNINIT_MAP_VALUE] = &map_key_value_types,
4383
+ [ARG_PTR_TO_MAP_VALUE_OR_NULL] = &map_key_value_types,
4384
+ [ARG_CONST_SIZE] = &scalar_types,
4385
+ [ARG_CONST_SIZE_OR_ZERO] = &scalar_types,
4386
+ [ARG_CONST_ALLOC_SIZE_OR_ZERO] = &scalar_types,
4387
+ [ARG_CONST_MAP_PTR] = &const_map_ptr_types,
4388
+ [ARG_PTR_TO_CTX] = &context_types,
4389
+ [ARG_PTR_TO_CTX_OR_NULL] = &context_types,
4390
+ [ARG_PTR_TO_SOCK_COMMON] = &sock_types,
4391
+#ifdef CONFIG_NET
4392
+ [ARG_PTR_TO_BTF_ID_SOCK_COMMON] = &btf_id_sock_common_types,
4393
+#endif
4394
+ [ARG_PTR_TO_SOCKET] = &fullsock_types,
4395
+ [ARG_PTR_TO_SOCKET_OR_NULL] = &fullsock_types,
4396
+ [ARG_PTR_TO_BTF_ID] = &btf_ptr_types,
4397
+ [ARG_PTR_TO_SPIN_LOCK] = &spin_lock_types,
4398
+ [ARG_PTR_TO_MEM] = &mem_types,
4399
+ [ARG_PTR_TO_MEM_OR_NULL] = &mem_types,
4400
+ [ARG_PTR_TO_UNINIT_MEM] = &mem_types,
4401
+ [ARG_PTR_TO_ALLOC_MEM] = &alloc_mem_types,
4402
+ [ARG_PTR_TO_ALLOC_MEM_OR_NULL] = &alloc_mem_types,
4403
+ [ARG_PTR_TO_INT] = &int_ptr_types,
4404
+ [ARG_PTR_TO_LONG] = &int_ptr_types,
4405
+ [ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types,
4406
+};
4407
+
4408
+static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
19534409 enum bpf_arg_type arg_type,
1954
- struct bpf_call_arg_meta *meta)
4410
+ const u32 *arg_btf_id)
19554411 {
19564412 struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
1957
- enum bpf_reg_type expected_type, type = reg->type;
4413
+ enum bpf_reg_type expected, type = reg->type;
4414
+ const struct bpf_reg_types *compatible;
4415
+ int i, j;
4416
+
4417
+ compatible = compatible_reg_types[arg_type];
4418
+ if (!compatible) {
4419
+ verbose(env, "verifier internal error: unsupported arg type %d\n", arg_type);
4420
+ return -EFAULT;
4421
+ }
4422
+
4423
+ for (i = 0; i < ARRAY_SIZE(compatible->types); i++) {
4424
+ expected = compatible->types[i];
4425
+ if (expected == NOT_INIT)
4426
+ break;
4427
+
4428
+ if (type == expected)
4429
+ goto found;
4430
+ }
4431
+
4432
+ verbose(env, "R%d type=%s expected=", regno, reg_type_str[type]);
4433
+ for (j = 0; j + 1 < i; j++)
4434
+ verbose(env, "%s, ", reg_type_str[compatible->types[j]]);
4435
+ verbose(env, "%s\n", reg_type_str[compatible->types[j]]);
4436
+ return -EACCES;
4437
+
4438
+found:
4439
+ if (type == PTR_TO_BTF_ID) {
4440
+ if (!arg_btf_id) {
4441
+ if (!compatible->btf_id) {
4442
+ verbose(env, "verifier internal error: missing arg compatible BTF ID\n");
4443
+ return -EFAULT;
4444
+ }
4445
+ arg_btf_id = compatible->btf_id;
4446
+ }
4447
+
4448
+ if (!btf_struct_ids_match(&env->log, reg->off, reg->btf_id,
4449
+ *arg_btf_id)) {
4450
+ verbose(env, "R%d is of type %s but %s is expected\n",
4451
+ regno, kernel_type_name(reg->btf_id),
4452
+ kernel_type_name(*arg_btf_id));
4453
+ return -EACCES;
4454
+ }
4455
+
4456
+ if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
4457
+ verbose(env, "R%d is a pointer to in-kernel struct with non-zero offset\n",
4458
+ regno);
4459
+ return -EACCES;
4460
+ }
4461
+ }
4462
+
4463
+ return 0;
4464
+}
4465
+
4466
+static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
4467
+ struct bpf_call_arg_meta *meta,
4468
+ const struct bpf_func_proto *fn)
4469
+{
4470
+ u32 regno = BPF_REG_1 + arg;
4471
+ struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
4472
+ enum bpf_arg_type arg_type = fn->arg_type[arg];
4473
+ enum bpf_reg_type type = reg->type;
19584474 int err = 0;
19594475
19604476 if (arg_type == ARG_DONTCARE)
....@@ -1979,45 +4495,39 @@
19794495 return -EACCES;
19804496 }
19814497
1982
- if (arg_type == ARG_PTR_TO_MAP_KEY ||
1983
- arg_type == ARG_PTR_TO_MAP_VALUE) {
1984
- expected_type = PTR_TO_STACK;
1985
- if (!type_is_pkt_pointer(type) && type != PTR_TO_MAP_VALUE &&
1986
- type != expected_type)
1987
- goto err_type;
1988
- } else if (arg_type == ARG_CONST_SIZE ||
1989
- arg_type == ARG_CONST_SIZE_OR_ZERO) {
1990
- expected_type = SCALAR_VALUE;
1991
- if (type != expected_type)
1992
- goto err_type;
1993
- } else if (arg_type == ARG_CONST_MAP_PTR) {
1994
- expected_type = CONST_PTR_TO_MAP;
1995
- if (type != expected_type)
1996
- goto err_type;
1997
- } else if (arg_type == ARG_PTR_TO_CTX) {
1998
- expected_type = PTR_TO_CTX;
1999
- if (type != expected_type)
2000
- goto err_type;
4498
+ if (arg_type == ARG_PTR_TO_MAP_VALUE ||
4499
+ arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE ||
4500
+ arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL) {
4501
+ err = resolve_map_arg_type(env, meta, &arg_type);
4502
+ if (err)
4503
+ return err;
4504
+ }
4505
+
4506
+ if (register_is_null(reg) && arg_type_may_be_null(arg_type))
4507
+ /* A NULL register has a SCALAR_VALUE type, so skip
4508
+ * type checking.
4509
+ */
4510
+ goto skip_type_check;
4511
+
4512
+ err = check_reg_type(env, regno, arg_type, fn->arg_btf_id[arg]);
4513
+ if (err)
4514
+ return err;
4515
+
4516
+ if (type == PTR_TO_CTX) {
20014517 err = check_ctx_reg(env, reg, regno);
20024518 if (err < 0)
20034519 return err;
2004
- } else if (arg_type_is_mem_ptr(arg_type)) {
2005
- expected_type = PTR_TO_STACK;
2006
- /* One exception here. In case function allows for NULL to be
2007
- * passed in as argument, it's a SCALAR_VALUE type. Final test
2008
- * happens during stack boundary checking.
2009
- */
2010
- if (register_is_null(reg) &&
2011
- arg_type == ARG_PTR_TO_MEM_OR_NULL)
2012
- /* final test in check_stack_boundary() */;
2013
- else if (!type_is_pkt_pointer(type) &&
2014
- type != PTR_TO_MAP_VALUE &&
2015
- type != expected_type)
2016
- goto err_type;
2017
- meta->raw_mode = arg_type == ARG_PTR_TO_UNINIT_MEM;
2018
- } else {
2019
- verbose(env, "unsupported arg_type %d\n", arg_type);
2020
- return -EFAULT;
4520
+ }
4521
+
4522
+skip_type_check:
4523
+ if (reg->ref_obj_id) {
4524
+ if (meta->ref_obj_id) {
4525
+ verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
4526
+ regno, reg->ref_obj_id,
4527
+ meta->ref_obj_id);
4528
+ return -EFAULT;
4529
+ }
4530
+ meta->ref_obj_id = reg->ref_obj_id;
20214531 }
20224532
20234533 if (arg_type == ARG_CONST_MAP_PTR) {
....@@ -2040,7 +4550,10 @@
20404550 err = check_helper_mem_access(env, regno,
20414551 meta->map_ptr->key_size, false,
20424552 NULL);
2043
- } else if (arg_type == ARG_PTR_TO_MAP_VALUE) {
4553
+ } else if (arg_type == ARG_PTR_TO_MAP_VALUE ||
4554
+ (arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL &&
4555
+ !register_is_null(reg)) ||
4556
+ arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) {
20444557 /* bpf_map_xxx(..., map_ptr, ..., value) call:
20454558 * check [value, value + map->value_size) validity
20464559 */
....@@ -2049,14 +4562,42 @@
20494562 verbose(env, "invalid map_ptr to access map->value\n");
20504563 return -EACCES;
20514564 }
4565
+ meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE);
20524566 err = check_helper_mem_access(env, regno,
20534567 meta->map_ptr->value_size, false,
2054
- NULL);
4568
+ meta);
4569
+ } else if (arg_type == ARG_PTR_TO_PERCPU_BTF_ID) {
4570
+ if (!reg->btf_id) {
4571
+ verbose(env, "Helper has invalid btf_id in R%d\n", regno);
4572
+ return -EACCES;
4573
+ }
4574
+ meta->ret_btf_id = reg->btf_id;
4575
+ } else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
4576
+ if (meta->func_id == BPF_FUNC_spin_lock) {
4577
+ if (process_spin_lock(env, regno, true))
4578
+ return -EACCES;
4579
+ } else if (meta->func_id == BPF_FUNC_spin_unlock) {
4580
+ if (process_spin_lock(env, regno, false))
4581
+ return -EACCES;
4582
+ } else {
4583
+ verbose(env, "verifier internal error\n");
4584
+ return -EFAULT;
4585
+ }
4586
+ } else if (arg_type_is_mem_ptr(arg_type)) {
4587
+ /* The access to this pointer is only checked when we hit the
4588
+ * next is_mem_size argument below.
4589
+ */
4590
+ meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MEM);
20554591 } else if (arg_type_is_mem_size(arg_type)) {
20564592 bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
20574593
2058
- /* remember the mem_size which may be used later
2059
- * to refine return values.
4594
+ /* This is used to refine r0 return value bounds for helpers
4595
+ * that enforce this value as an upper bound on return values.
4596
+ * See do_refine_retval_range() for helpers that can refine
4597
+ * the return value. C type of helper is u32 so we pull register
4598
+ * bound from umax_value however, if negative verifier errors
4599
+ * out. Only upper bounds can be learned because retval is an
4600
+ * int type and negative retvals are allowed.
20604601 */
20614602 meta->msize_max_value = reg->umax_value;
20624603
....@@ -2093,13 +4634,62 @@
20934634 err = check_helper_mem_access(env, regno - 1,
20944635 reg->umax_value,
20954636 zero_size_allowed, meta);
4637
+ if (!err)
4638
+ err = mark_chain_precision(env, regno);
4639
+ } else if (arg_type_is_alloc_size(arg_type)) {
4640
+ if (!tnum_is_const(reg->var_off)) {
4641
+ verbose(env, "R%d unbounded size, use 'var &= const' or 'if (var < const)'\n",
4642
+ regno);
4643
+ return -EACCES;
4644
+ }
4645
+ meta->mem_size = reg->var_off.value;
4646
+ } else if (arg_type_is_int_ptr(arg_type)) {
4647
+ int size = int_ptr_type_to_size(arg_type);
4648
+
4649
+ err = check_helper_mem_access(env, regno, size, false, meta);
4650
+ if (err)
4651
+ return err;
4652
+ err = check_ptr_alignment(env, reg, 0, size, true);
20964653 }
20974654
20984655 return err;
2099
-err_type:
2100
- verbose(env, "R%d type=%s expected=%s\n", regno,
2101
- reg_type_str[type], reg_type_str[expected_type]);
2102
- return -EACCES;
4656
+}
4657
+
4658
+static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
4659
+{
4660
+ enum bpf_attach_type eatype = env->prog->expected_attach_type;
4661
+ enum bpf_prog_type type = resolve_prog_type(env->prog);
4662
+
4663
+ if (func_id != BPF_FUNC_map_update_elem)
4664
+ return false;
4665
+
4666
+ /* It's not possible to get access to a locked struct sock in these
4667
+ * contexts, so updating is safe.
4668
+ */
4669
+ switch (type) {
4670
+ case BPF_PROG_TYPE_TRACING:
4671
+ if (eatype == BPF_TRACE_ITER)
4672
+ return true;
4673
+ break;
4674
+ case BPF_PROG_TYPE_SOCKET_FILTER:
4675
+ case BPF_PROG_TYPE_SCHED_CLS:
4676
+ case BPF_PROG_TYPE_SCHED_ACT:
4677
+ case BPF_PROG_TYPE_XDP:
4678
+ case BPF_PROG_TYPE_SK_REUSEPORT:
4679
+ case BPF_PROG_TYPE_FLOW_DISSECTOR:
4680
+ case BPF_PROG_TYPE_SK_LOOKUP:
4681
+ return true;
4682
+ default:
4683
+ break;
4684
+ }
4685
+
4686
+ verbose(env, "cannot update sockmap in this context\n");
4687
+ return false;
4688
+}
4689
+
4690
+static bool allow_tail_call_in_subprogs(struct bpf_verifier_env *env)
4691
+{
4692
+ return env->prog->jit_requested && IS_ENABLED(CONFIG_X86_64);
21034693 }
21044694
21054695 static int check_map_func_compatibility(struct bpf_verifier_env *env,
....@@ -2117,7 +4707,15 @@
21174707 case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
21184708 if (func_id != BPF_FUNC_perf_event_read &&
21194709 func_id != BPF_FUNC_perf_event_output &&
2120
- func_id != BPF_FUNC_perf_event_read_value)
4710
+ func_id != BPF_FUNC_skb_output &&
4711
+ func_id != BPF_FUNC_perf_event_read_value &&
4712
+ func_id != BPF_FUNC_xdp_output)
4713
+ goto error;
4714
+ break;
4715
+ case BPF_MAP_TYPE_RINGBUF:
4716
+ if (func_id != BPF_FUNC_ringbuf_output &&
4717
+ func_id != BPF_FUNC_ringbuf_reserve &&
4718
+ func_id != BPF_FUNC_ringbuf_query)
21214719 goto error;
21224720 break;
21234721 case BPF_MAP_TYPE_STACK_TRACE:
....@@ -2130,23 +4728,26 @@
21304728 goto error;
21314729 break;
21324730 case BPF_MAP_TYPE_CGROUP_STORAGE:
4731
+ case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
21334732 if (func_id != BPF_FUNC_get_local_storage)
21344733 goto error;
21354734 break;
2136
- /* devmap returns a pointer to a live net_device ifindex that we cannot
2137
- * allow to be modified from bpf side. So do not allow lookup elements
2138
- * for now.
2139
- */
21404735 case BPF_MAP_TYPE_DEVMAP:
2141
- if (func_id != BPF_FUNC_redirect_map)
4736
+ case BPF_MAP_TYPE_DEVMAP_HASH:
4737
+ if (func_id != BPF_FUNC_redirect_map &&
4738
+ func_id != BPF_FUNC_map_lookup_elem)
21424739 goto error;
21434740 break;
21444741 /* Restrict bpf side of cpumap and xskmap, open when use-cases
21454742 * appear.
21464743 */
21474744 case BPF_MAP_TYPE_CPUMAP:
2148
- case BPF_MAP_TYPE_XSKMAP:
21494745 if (func_id != BPF_FUNC_redirect_map)
4746
+ goto error;
4747
+ break;
4748
+ case BPF_MAP_TYPE_XSKMAP:
4749
+ if (func_id != BPF_FUNC_redirect_map &&
4750
+ func_id != BPF_FUNC_map_lookup_elem)
21504751 goto error;
21514752 break;
21524753 case BPF_MAP_TYPE_ARRAY_OF_MAPS:
....@@ -2158,18 +4759,41 @@
21584759 if (func_id != BPF_FUNC_sk_redirect_map &&
21594760 func_id != BPF_FUNC_sock_map_update &&
21604761 func_id != BPF_FUNC_map_delete_elem &&
2161
- func_id != BPF_FUNC_msg_redirect_map)
4762
+ func_id != BPF_FUNC_msg_redirect_map &&
4763
+ func_id != BPF_FUNC_sk_select_reuseport &&
4764
+ func_id != BPF_FUNC_map_lookup_elem &&
4765
+ !may_update_sockmap(env, func_id))
21624766 goto error;
21634767 break;
21644768 case BPF_MAP_TYPE_SOCKHASH:
21654769 if (func_id != BPF_FUNC_sk_redirect_hash &&
21664770 func_id != BPF_FUNC_sock_hash_update &&
21674771 func_id != BPF_FUNC_map_delete_elem &&
2168
- func_id != BPF_FUNC_msg_redirect_hash)
4772
+ func_id != BPF_FUNC_msg_redirect_hash &&
4773
+ func_id != BPF_FUNC_sk_select_reuseport &&
4774
+ func_id != BPF_FUNC_map_lookup_elem &&
4775
+ !may_update_sockmap(env, func_id))
21694776 goto error;
21704777 break;
21714778 case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
21724779 if (func_id != BPF_FUNC_sk_select_reuseport)
4780
+ goto error;
4781
+ break;
4782
+ case BPF_MAP_TYPE_QUEUE:
4783
+ case BPF_MAP_TYPE_STACK:
4784
+ if (func_id != BPF_FUNC_map_peek_elem &&
4785
+ func_id != BPF_FUNC_map_pop_elem &&
4786
+ func_id != BPF_FUNC_map_push_elem)
4787
+ goto error;
4788
+ break;
4789
+ case BPF_MAP_TYPE_SK_STORAGE:
4790
+ if (func_id != BPF_FUNC_sk_storage_get &&
4791
+ func_id != BPF_FUNC_sk_storage_delete)
4792
+ goto error;
4793
+ break;
4794
+ case BPF_MAP_TYPE_INODE_STORAGE:
4795
+ if (func_id != BPF_FUNC_inode_storage_get &&
4796
+ func_id != BPF_FUNC_inode_storage_delete)
21734797 goto error;
21744798 break;
21754799 default:
....@@ -2181,15 +4805,23 @@
21814805 case BPF_FUNC_tail_call:
21824806 if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
21834807 goto error;
2184
- if (env->subprog_cnt > 1) {
2185
- verbose(env, "tail_calls are not allowed in programs with bpf-to-bpf calls\n");
4808
+ if (env->subprog_cnt > 1 && !allow_tail_call_in_subprogs(env)) {
4809
+ verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
21864810 return -EINVAL;
21874811 }
21884812 break;
21894813 case BPF_FUNC_perf_event_read:
21904814 case BPF_FUNC_perf_event_output:
21914815 case BPF_FUNC_perf_event_read_value:
4816
+ case BPF_FUNC_skb_output:
4817
+ case BPF_FUNC_xdp_output:
21924818 if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
4819
+ goto error;
4820
+ break;
4821
+ case BPF_FUNC_ringbuf_output:
4822
+ case BPF_FUNC_ringbuf_reserve:
4823
+ case BPF_FUNC_ringbuf_query:
4824
+ if (map->map_type != BPF_MAP_TYPE_RINGBUF)
21934825 goto error;
21944826 break;
21954827 case BPF_FUNC_get_stackid:
....@@ -2203,6 +4835,7 @@
22034835 break;
22044836 case BPF_FUNC_redirect_map:
22054837 if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
4838
+ map->map_type != BPF_MAP_TYPE_DEVMAP_HASH &&
22064839 map->map_type != BPF_MAP_TYPE_CPUMAP &&
22074840 map->map_type != BPF_MAP_TYPE_XSKMAP)
22084841 goto error;
....@@ -2220,11 +4853,31 @@
22204853 goto error;
22214854 break;
22224855 case BPF_FUNC_get_local_storage:
2223
- if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE)
4856
+ if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
4857
+ map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
22244858 goto error;
22254859 break;
22264860 case BPF_FUNC_sk_select_reuseport:
2227
- if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY)
4861
+ if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY &&
4862
+ map->map_type != BPF_MAP_TYPE_SOCKMAP &&
4863
+ map->map_type != BPF_MAP_TYPE_SOCKHASH)
4864
+ goto error;
4865
+ break;
4866
+ case BPF_FUNC_map_peek_elem:
4867
+ case BPF_FUNC_map_pop_elem:
4868
+ case BPF_FUNC_map_push_elem:
4869
+ if (map->map_type != BPF_MAP_TYPE_QUEUE &&
4870
+ map->map_type != BPF_MAP_TYPE_STACK)
4871
+ goto error;
4872
+ break;
4873
+ case BPF_FUNC_sk_storage_get:
4874
+ case BPF_FUNC_sk_storage_delete:
4875
+ if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
4876
+ goto error;
4877
+ break;
4878
+ case BPF_FUNC_inode_storage_get:
4879
+ case BPF_FUNC_inode_storage_delete:
4880
+ if (map->map_type != BPF_MAP_TYPE_INODE_STORAGE)
22284881 goto error;
22294882 break;
22304883 default:
....@@ -2287,49 +4940,142 @@
22874940 return true;
22884941 }
22894942
2290
-static int check_func_proto(const struct bpf_func_proto *fn)
4943
+static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id)
4944
+{
4945
+ int count = 0;
4946
+
4947
+ if (arg_type_may_be_refcounted(fn->arg1_type))
4948
+ count++;
4949
+ if (arg_type_may_be_refcounted(fn->arg2_type))
4950
+ count++;
4951
+ if (arg_type_may_be_refcounted(fn->arg3_type))
4952
+ count++;
4953
+ if (arg_type_may_be_refcounted(fn->arg4_type))
4954
+ count++;
4955
+ if (arg_type_may_be_refcounted(fn->arg5_type))
4956
+ count++;
4957
+
4958
+ /* A reference acquiring function cannot acquire
4959
+ * another refcounted ptr.
4960
+ */
4961
+ if (may_be_acquire_function(func_id) && count)
4962
+ return false;
4963
+
4964
+ /* We only support one arg being unreferenced at the moment,
4965
+ * which is sufficient for the helper functions we have right now.
4966
+ */
4967
+ return count <= 1;
4968
+}
4969
+
4970
+static bool check_btf_id_ok(const struct bpf_func_proto *fn)
4971
+{
4972
+ int i;
4973
+
4974
+ for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
4975
+ if (fn->arg_type[i] == ARG_PTR_TO_BTF_ID && !fn->arg_btf_id[i])
4976
+ return false;
4977
+
4978
+ if (fn->arg_type[i] != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i])
4979
+ return false;
4980
+ }
4981
+
4982
+ return true;
4983
+}
4984
+
4985
+static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
22914986 {
22924987 return check_raw_mode_ok(fn) &&
2293
- check_arg_pair_ok(fn) ? 0 : -EINVAL;
4988
+ check_arg_pair_ok(fn) &&
4989
+ check_btf_id_ok(fn) &&
4990
+ check_refcount_ok(fn, func_id) ? 0 : -EINVAL;
22944991 }
22954992
22964993 /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
22974994 * are now invalid, so turn them into unknown SCALAR_VALUE.
22984995 */
2299
-static void __clear_all_pkt_pointers(struct bpf_verifier_env *env,
2300
- struct bpf_func_state *state)
2301
-{
2302
- struct bpf_reg_state *regs = state->regs, *reg;
2303
- int i;
2304
-
2305
- for (i = 0; i < MAX_BPF_REG; i++)
2306
- if (reg_is_pkt_pointer_any(&regs[i]))
2307
- mark_reg_unknown(env, regs, i);
2308
-
2309
- for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
2310
- if (state->stack[i].slot_type[0] != STACK_SPILL)
2311
- continue;
2312
- reg = &state->stack[i].spilled_ptr;
2313
- if (reg_is_pkt_pointer_any(reg))
2314
- __mark_reg_unknown(reg);
2315
- }
2316
-}
2317
-
23184996 static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
23194997 {
2320
- struct bpf_verifier_state *vstate = env->cur_state;
4998
+ struct bpf_func_state *state;
4999
+ struct bpf_reg_state *reg;
5000
+
5001
+ bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
5002
+ if (reg_is_pkt_pointer_any(reg))
5003
+ __mark_reg_unknown(env, reg);
5004
+ }));
5005
+}
5006
+
5007
+enum {
5008
+ AT_PKT_END = -1,
5009
+ BEYOND_PKT_END = -2,
5010
+};
5011
+
5012
+static void mark_pkt_end(struct bpf_verifier_state *vstate, int regn, bool range_open)
5013
+{
5014
+ struct bpf_func_state *state = vstate->frame[vstate->curframe];
5015
+ struct bpf_reg_state *reg = &state->regs[regn];
5016
+
5017
+ if (reg->type != PTR_TO_PACKET)
5018
+ /* PTR_TO_PACKET_META is not supported yet */
5019
+ return;
5020
+
5021
+ /* The 'reg' is pkt > pkt_end or pkt >= pkt_end.
5022
+ * How far beyond pkt_end it goes is unknown.
5023
+ * if (!range_open) it's the case of pkt >= pkt_end
5024
+ * if (range_open) it's the case of pkt > pkt_end
5025
+ * hence this pointer is at least 1 byte bigger than pkt_end
5026
+ */
5027
+ if (range_open)
5028
+ reg->range = BEYOND_PKT_END;
5029
+ else
5030
+ reg->range = AT_PKT_END;
5031
+}
5032
+
5033
+/* The pointer with the specified id has released its reference to kernel
5034
+ * resources. Identify all copies of the same pointer and clear the reference.
5035
+ */
5036
+static int release_reference(struct bpf_verifier_env *env,
5037
+ int ref_obj_id)
5038
+{
5039
+ struct bpf_func_state *state;
5040
+ struct bpf_reg_state *reg;
5041
+ int err;
5042
+
5043
+ err = release_reference_state(cur_func(env), ref_obj_id);
5044
+ if (err)
5045
+ return err;
5046
+
5047
+ bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
5048
+ if (reg->ref_obj_id == ref_obj_id) {
5049
+ if (!env->allow_ptr_leaks)
5050
+ __mark_reg_not_init(env, reg);
5051
+ else
5052
+ __mark_reg_unknown(env, reg);
5053
+ }
5054
+ }));
5055
+
5056
+ return 0;
5057
+}
5058
+
5059
+static void clear_caller_saved_regs(struct bpf_verifier_env *env,
5060
+ struct bpf_reg_state *regs)
5061
+{
23215062 int i;
23225063
2323
- for (i = 0; i <= vstate->curframe; i++)
2324
- __clear_all_pkt_pointers(env, vstate->frame[i]);
5064
+ /* after the call registers r0 - r5 were scratched */
5065
+ for (i = 0; i < CALLER_SAVED_REGS; i++) {
5066
+ mark_reg_not_init(env, regs, caller_saved[i]);
5067
+ check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
5068
+ }
23255069 }
23265070
23275071 static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
23285072 int *insn_idx)
23295073 {
23305074 struct bpf_verifier_state *state = env->cur_state;
5075
+ struct bpf_func_info_aux *func_info_aux;
23315076 struct bpf_func_state *caller, *callee;
2332
- int i, subprog, target_insn;
5077
+ int i, err, subprog, target_insn;
5078
+ bool is_global = false;
23335079
23345080 if (state->curframe + 1 >= MAX_CALL_FRAMES) {
23355081 verbose(env, "the call stack of %d frames is too deep\n",
....@@ -2352,6 +5098,33 @@
23525098 return -EFAULT;
23535099 }
23545100
5101
+ func_info_aux = env->prog->aux->func_info_aux;
5102
+ if (func_info_aux)
5103
+ is_global = func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL;
5104
+ err = btf_check_func_arg_match(env, subprog, caller->regs);
5105
+ if (err == -EFAULT)
5106
+ return err;
5107
+ if (is_global) {
5108
+ if (err) {
5109
+ verbose(env, "Caller passes invalid args into func#%d\n",
5110
+ subprog);
5111
+ return err;
5112
+ } else {
5113
+ if (env->log.level & BPF_LOG_LEVEL)
5114
+ verbose(env,
5115
+ "Func#%d is global and valid. Skipping.\n",
5116
+ subprog);
5117
+ clear_caller_saved_regs(env, caller->regs);
5118
+
5119
+ /* All global functions return a 64-bit SCALAR_VALUE */
5120
+ mark_reg_unknown(env, caller->regs, BPF_REG_0);
5121
+ caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
5122
+
5123
+ /* continue with next insn after call */
5124
+ return 0;
5125
+ }
5126
+ }
5127
+
23555128 callee = kzalloc(sizeof(*callee), GFP_KERNEL);
23565129 if (!callee)
23575130 return -ENOMEM;
....@@ -2367,17 +5140,18 @@
23675140 state->curframe + 1 /* frameno within this callchain */,
23685141 subprog /* subprog number within this prog */);
23695142
5143
+ /* Transfer references to the callee */
5144
+ err = transfer_reference_state(callee, caller);
5145
+ if (err)
5146
+ return err;
5147
+
23705148 /* copy r1 - r5 args that callee can access. The copy includes parent
23715149 * pointers, which connects us up to the liveness chain
23725150 */
23735151 for (i = BPF_REG_1; i <= BPF_REG_5; i++)
23745152 callee->regs[i] = caller->regs[i];
23755153
2376
- /* after the call registers r0 - r5 were scratched */
2377
- for (i = 0; i < CALLER_SAVED_REGS; i++) {
2378
- mark_reg_not_init(env, caller->regs, caller_saved[i]);
2379
- check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
2380
- }
5154
+ clear_caller_saved_regs(env, caller->regs);
23815155
23825156 /* only increment it after check_reg_arg() finished */
23835157 state->curframe++;
....@@ -2385,7 +5159,7 @@
23855159 /* and go analyze first insn of the callee */
23865160 *insn_idx = target_insn;
23875161
2388
- if (env->log.level) {
5162
+ if (env->log.level & BPF_LOG_LEVEL) {
23895163 verbose(env, "caller:\n");
23905164 print_verifier_state(env, caller);
23915165 verbose(env, "callee:\n");
....@@ -2399,6 +5173,7 @@
23995173 struct bpf_verifier_state *state = env->cur_state;
24005174 struct bpf_func_state *caller, *callee;
24015175 struct bpf_reg_state *r0;
5176
+ int err;
24025177
24035178 callee = state->frame[state->curframe];
24045179 r0 = &callee->regs[BPF_REG_0];
....@@ -2418,8 +5193,13 @@
24185193 /* return to the caller whatever r0 had in the callee */
24195194 caller->regs[BPF_REG_0] = *r0;
24205195
5196
+ /* Transfer references to the caller */
5197
+ err = transfer_reference_state(caller, callee);
5198
+ if (err)
5199
+ return err;
5200
+
24215201 *insn_idx = callee->callsite + 1;
2422
- if (env->log.level) {
5202
+ if (env->log.level & BPF_LOG_LEVEL) {
24235203 verbose(env, "returning from callee:\n");
24245204 print_verifier_state(env, callee);
24255205 verbose(env, "to caller at %d:\n", *insn_idx);
....@@ -2431,44 +5211,24 @@
24315211 return 0;
24325212 }
24335213
2434
-static int do_refine_retval_range(struct bpf_verifier_env *env,
2435
- struct bpf_reg_state *regs, int ret_type,
2436
- int func_id, struct bpf_call_arg_meta *meta)
5214
+static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type,
5215
+ int func_id,
5216
+ struct bpf_call_arg_meta *meta)
24375217 {
24385218 struct bpf_reg_state *ret_reg = &regs[BPF_REG_0];
2439
- struct bpf_reg_state tmp_reg = *ret_reg;
2440
- bool ret;
24415219
24425220 if (ret_type != RET_INTEGER ||
24435221 (func_id != BPF_FUNC_get_stack &&
2444
- func_id != BPF_FUNC_probe_read_str))
2445
- return 0;
5222
+ func_id != BPF_FUNC_probe_read_str &&
5223
+ func_id != BPF_FUNC_probe_read_kernel_str &&
5224
+ func_id != BPF_FUNC_probe_read_user_str))
5225
+ return;
24465226
2447
- /* Error case where ret is in interval [S32MIN, -1]. */
2448
- ret_reg->smin_value = S32_MIN;
2449
- ret_reg->smax_value = -1;
2450
-
2451
- __reg_deduce_bounds(ret_reg);
2452
- __reg_bound_offset(ret_reg);
2453
- __update_reg_bounds(ret_reg);
2454
-
2455
- ret = push_stack(env, env->insn_idx + 1, env->insn_idx, false);
2456
- if (!ret)
2457
- return -EFAULT;
2458
-
2459
- *ret_reg = tmp_reg;
2460
-
2461
- /* Success case where ret is in range [0, msize_max_value]. */
2462
- ret_reg->smin_value = 0;
24635227 ret_reg->smax_value = meta->msize_max_value;
2464
- ret_reg->umin_value = ret_reg->smin_value;
2465
- ret_reg->umax_value = ret_reg->smax_value;
2466
-
2467
- __reg_deduce_bounds(ret_reg);
2468
- __reg_bound_offset(ret_reg);
2469
- __update_reg_bounds(ret_reg);
2470
-
2471
- return 0;
5228
+ ret_reg->s32_max_value = meta->msize_max_value;
5229
+ ret_reg->smin_value = -MAX_ERRNO;
5230
+ ret_reg->s32_min_value = -MAX_ERRNO;
5231
+ reg_bounds_sync(ret_reg);
24725232 }
24735233
24745234 static int
....@@ -2476,25 +5236,91 @@
24765236 int func_id, int insn_idx)
24775237 {
24785238 struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
5239
+ struct bpf_map *map = meta->map_ptr;
24795240
24805241 if (func_id != BPF_FUNC_tail_call &&
24815242 func_id != BPF_FUNC_map_lookup_elem &&
24825243 func_id != BPF_FUNC_map_update_elem &&
2483
- func_id != BPF_FUNC_map_delete_elem)
5244
+ func_id != BPF_FUNC_map_delete_elem &&
5245
+ func_id != BPF_FUNC_map_push_elem &&
5246
+ func_id != BPF_FUNC_map_pop_elem &&
5247
+ func_id != BPF_FUNC_map_peek_elem)
24845248 return 0;
24855249
2486
- if (meta->map_ptr == NULL) {
5250
+ if (map == NULL) {
24875251 verbose(env, "kernel subsystem misconfigured verifier\n");
24885252 return -EINVAL;
24895253 }
24905254
2491
- if (!BPF_MAP_PTR(aux->map_state))
5255
+ /* In case of read-only, some additional restrictions
5256
+ * need to be applied in order to prevent altering the
5257
+ * state of the map from program side.
5258
+ */
5259
+ if ((map->map_flags & BPF_F_RDONLY_PROG) &&
5260
+ (func_id == BPF_FUNC_map_delete_elem ||
5261
+ func_id == BPF_FUNC_map_update_elem ||
5262
+ func_id == BPF_FUNC_map_push_elem ||
5263
+ func_id == BPF_FUNC_map_pop_elem)) {
5264
+ verbose(env, "write into map forbidden\n");
5265
+ return -EACCES;
5266
+ }
5267
+
5268
+ if (!BPF_MAP_PTR(aux->map_ptr_state))
24925269 bpf_map_ptr_store(aux, meta->map_ptr,
2493
- meta->map_ptr->unpriv_array);
2494
- else if (BPF_MAP_PTR(aux->map_state) != meta->map_ptr)
5270
+ !meta->map_ptr->bypass_spec_v1);
5271
+ else if (BPF_MAP_PTR(aux->map_ptr_state) != meta->map_ptr)
24955272 bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON,
2496
- meta->map_ptr->unpriv_array);
5273
+ !meta->map_ptr->bypass_spec_v1);
24975274 return 0;
5275
+}
5276
+
5277
+static int
5278
+record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
5279
+ int func_id, int insn_idx)
5280
+{
5281
+ struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
5282
+ struct bpf_reg_state *regs = cur_regs(env), *reg;
5283
+ struct bpf_map *map = meta->map_ptr;
5284
+ u64 val, max;
5285
+ int err;
5286
+
5287
+ if (func_id != BPF_FUNC_tail_call)
5288
+ return 0;
5289
+ if (!map || map->map_type != BPF_MAP_TYPE_PROG_ARRAY) {
5290
+ verbose(env, "kernel subsystem misconfigured verifier\n");
5291
+ return -EINVAL;
5292
+ }
5293
+
5294
+ reg = &regs[BPF_REG_3];
5295
+ val = reg->var_off.value;
5296
+ max = map->max_entries;
5297
+
5298
+ if (!(register_is_const(reg) && val < max)) {
5299
+ bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
5300
+ return 0;
5301
+ }
5302
+
5303
+ err = mark_chain_precision(env, BPF_REG_3);
5304
+ if (err)
5305
+ return err;
5306
+ if (bpf_map_key_unseen(aux))
5307
+ bpf_map_key_store(aux, val);
5308
+ else if (!bpf_map_key_poisoned(aux) &&
5309
+ bpf_map_key_immediate(aux) != val)
5310
+ bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
5311
+ return 0;
5312
+}
5313
+
5314
+static int check_reference_leak(struct bpf_verifier_env *env)
5315
+{
5316
+ struct bpf_func_state *state = cur_func(env);
5317
+ int i;
5318
+
5319
+ for (i = 0; i < state->acquired_refs; i++) {
5320
+ verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
5321
+ state->refs[i].id, state->refs[i].insn_idx);
5322
+ }
5323
+ return state->acquired_refs ? -EINVAL : 0;
24985324 }
24995325
25005326 static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
....@@ -2526,6 +5352,11 @@
25265352 return -EINVAL;
25275353 }
25285354
5355
+ if (fn->allowed && !fn->allowed(env->prog)) {
5356
+ verbose(env, "helper call is not allowed in probe\n");
5357
+ return -EINVAL;
5358
+ }
5359
+
25295360 /* With LD_ABS/IND some JITs save/restore skb from r1. */
25305361 changes_data = bpf_helper_changes_pkt_data(fn->func);
25315362 if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
....@@ -2537,31 +5368,26 @@
25375368 memset(&meta, 0, sizeof(meta));
25385369 meta.pkt_access = fn->pkt_access;
25395370
2540
- err = check_func_proto(fn);
5371
+ err = check_func_proto(fn, func_id);
25415372 if (err) {
25425373 verbose(env, "kernel subsystem misconfigured func %s#%d\n",
25435374 func_id_name(func_id), func_id);
25445375 return err;
25455376 }
25465377
5378
+ meta.func_id = func_id;
25475379 /* check args */
2548
- err = check_func_arg(env, BPF_REG_1, fn->arg1_type, &meta);
2549
- if (err)
2550
- return err;
2551
- err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta);
2552
- if (err)
2553
- return err;
2554
- err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta);
2555
- if (err)
2556
- return err;
2557
- err = check_func_arg(env, BPF_REG_4, fn->arg4_type, &meta);
2558
- if (err)
2559
- return err;
2560
- err = check_func_arg(env, BPF_REG_5, fn->arg5_type, &meta);
5380
+ for (i = 0; i < 5; i++) {
5381
+ err = check_func_arg(env, i, &meta, fn);
5382
+ if (err)
5383
+ return err;
5384
+ }
5385
+
5386
+ err = record_func_map(env, &meta, func_id, insn_idx);
25615387 if (err)
25625388 return err;
25635389
2564
- err = record_func_map(env, &meta, func_id, insn_idx);
5390
+ err = record_func_key(env, &meta, func_id, insn_idx);
25655391 if (err)
25665392 return err;
25675393
....@@ -2573,6 +5399,21 @@
25735399 BPF_WRITE, -1, false);
25745400 if (err)
25755401 return err;
5402
+ }
5403
+
5404
+ if (func_id == BPF_FUNC_tail_call) {
5405
+ err = check_reference_leak(env);
5406
+ if (err) {
5407
+ verbose(env, "tail_call would lead to reference leak\n");
5408
+ return err;
5409
+ }
5410
+ } else if (is_release_function(func_id)) {
5411
+ err = release_reference(env, meta.ref_obj_id);
5412
+ if (err) {
5413
+ verbose(env, "func %s#%d reference has not been acquired before\n",
5414
+ func_id_name(func_id), func_id);
5415
+ return err;
5416
+ }
25765417 }
25775418
25785419 regs = cur_regs(env);
....@@ -2592,6 +5433,9 @@
25925433 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
25935434 }
25945435
5436
+ /* helper call returns 64-bit value. */
5437
+ regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
5438
+
25955439 /* update return register (already marked as written above) */
25965440 if (fn->ret_type == RET_INTEGER) {
25975441 /* sets type to SCALAR_VALUE */
....@@ -2600,10 +5444,6 @@
26005444 regs[BPF_REG_0].type = NOT_INIT;
26015445 } else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL ||
26025446 fn->ret_type == RET_PTR_TO_MAP_VALUE) {
2603
- if (fn->ret_type == RET_PTR_TO_MAP_VALUE)
2604
- regs[BPF_REG_0].type = PTR_TO_MAP_VALUE;
2605
- else
2606
- regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
26075447 /* There is no offset yet applied, variable or fixed */
26085448 mark_reg_known_zero(env, regs, BPF_REG_0);
26095449 /* remember map_ptr, so that check_map_access()
....@@ -2616,22 +5456,99 @@
26165456 return -EINVAL;
26175457 }
26185458 regs[BPF_REG_0].map_ptr = meta.map_ptr;
2619
- regs[BPF_REG_0].id = ++env->id_gen;
5459
+ if (fn->ret_type == RET_PTR_TO_MAP_VALUE) {
5460
+ regs[BPF_REG_0].type = PTR_TO_MAP_VALUE;
5461
+ if (map_value_has_spin_lock(meta.map_ptr))
5462
+ regs[BPF_REG_0].id = ++env->id_gen;
5463
+ } else {
5464
+ regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
5465
+ }
5466
+ } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) {
5467
+ mark_reg_known_zero(env, regs, BPF_REG_0);
5468
+ regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL;
5469
+ } else if (fn->ret_type == RET_PTR_TO_SOCK_COMMON_OR_NULL) {
5470
+ mark_reg_known_zero(env, regs, BPF_REG_0);
5471
+ regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON_OR_NULL;
5472
+ } else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) {
5473
+ mark_reg_known_zero(env, regs, BPF_REG_0);
5474
+ regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL;
5475
+ } else if (fn->ret_type == RET_PTR_TO_ALLOC_MEM_OR_NULL) {
5476
+ mark_reg_known_zero(env, regs, BPF_REG_0);
5477
+ regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL;
5478
+ regs[BPF_REG_0].mem_size = meta.mem_size;
5479
+ } else if (fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL ||
5480
+ fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID) {
5481
+ const struct btf_type *t;
5482
+
5483
+ mark_reg_known_zero(env, regs, BPF_REG_0);
5484
+ t = btf_type_skip_modifiers(btf_vmlinux, meta.ret_btf_id, NULL);
5485
+ if (!btf_type_is_struct(t)) {
5486
+ u32 tsize;
5487
+ const struct btf_type *ret;
5488
+ const char *tname;
5489
+
5490
+ /* resolve the type size of ksym. */
5491
+ ret = btf_resolve_size(btf_vmlinux, t, &tsize);
5492
+ if (IS_ERR(ret)) {
5493
+ tname = btf_name_by_offset(btf_vmlinux, t->name_off);
5494
+ verbose(env, "unable to resolve the size of type '%s': %ld\n",
5495
+ tname, PTR_ERR(ret));
5496
+ return -EINVAL;
5497
+ }
5498
+ regs[BPF_REG_0].type =
5499
+ fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID ?
5500
+ PTR_TO_MEM : PTR_TO_MEM_OR_NULL;
5501
+ regs[BPF_REG_0].mem_size = tsize;
5502
+ } else {
5503
+ regs[BPF_REG_0].type =
5504
+ fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID ?
5505
+ PTR_TO_BTF_ID : PTR_TO_BTF_ID_OR_NULL;
5506
+ regs[BPF_REG_0].btf_id = meta.ret_btf_id;
5507
+ }
5508
+ } else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL) {
5509
+ int ret_btf_id;
5510
+
5511
+ mark_reg_known_zero(env, regs, BPF_REG_0);
5512
+ regs[BPF_REG_0].type = PTR_TO_BTF_ID_OR_NULL;
5513
+ ret_btf_id = *fn->ret_btf_id;
5514
+ if (ret_btf_id == 0) {
5515
+ verbose(env, "invalid return type %d of func %s#%d\n",
5516
+ fn->ret_type, func_id_name(func_id), func_id);
5517
+ return -EINVAL;
5518
+ }
5519
+ regs[BPF_REG_0].btf_id = ret_btf_id;
26205520 } else {
26215521 verbose(env, "unknown return type %d of func %s#%d\n",
26225522 fn->ret_type, func_id_name(func_id), func_id);
26235523 return -EINVAL;
26245524 }
26255525
2626
- err = do_refine_retval_range(env, regs, fn->ret_type, func_id, &meta);
2627
- if (err)
2628
- return err;
5526
+ if (reg_type_may_be_null(regs[BPF_REG_0].type))
5527
+ regs[BPF_REG_0].id = ++env->id_gen;
5528
+
5529
+ if (is_ptr_cast_function(func_id)) {
5530
+ /* For release_reference() */
5531
+ regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
5532
+ } else if (is_acquire_function(func_id, meta.map_ptr)) {
5533
+ int id = acquire_reference_state(env, insn_idx);
5534
+
5535
+ if (id < 0)
5536
+ return id;
5537
+ /* For mark_ptr_or_null_reg() */
5538
+ regs[BPF_REG_0].id = id;
5539
+ /* For release_reference() */
5540
+ regs[BPF_REG_0].ref_obj_id = id;
5541
+ }
5542
+
5543
+ do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
26295544
26305545 err = check_map_func_compatibility(env, meta.map_ptr, func_id);
26315546 if (err)
26325547 return err;
26335548
2634
- if (func_id == BPF_FUNC_get_stack && !env->prog->has_callchain_buf) {
5549
+ if ((func_id == BPF_FUNC_get_stack ||
5550
+ func_id == BPF_FUNC_get_task_stack) &&
5551
+ !env->prog->has_callchain_buf) {
26355552 const char *err_str;
26365553
26375554 #ifdef CONFIG_PERF_EVENTS
....@@ -2649,6 +5566,9 @@
26495566 env->prog->has_callchain_buf = true;
26505567 }
26515568
5569
+ if (func_id == BPF_FUNC_get_stackid || func_id == BPF_FUNC_get_stack)
5570
+ env->prog->call_get_stack = true;
5571
+
26525572 if (changes_data)
26535573 clear_all_pkt_pointers(env);
26545574 return 0;
....@@ -2664,10 +5584,30 @@
26645584 return res < a;
26655585 }
26665586
5587
+static bool signed_add32_overflows(s32 a, s32 b)
5588
+{
5589
+ /* Do the add in u32, where overflow is well-defined */
5590
+ s32 res = (s32)((u32)a + (u32)b);
5591
+
5592
+ if (b < 0)
5593
+ return res > a;
5594
+ return res < a;
5595
+}
5596
+
26675597 static bool signed_sub_overflows(s64 a, s64 b)
26685598 {
26695599 /* Do the sub in u64, where overflow is well-defined */
26705600 s64 res = (s64)((u64)a - (u64)b);
5601
+
5602
+ if (b < 0)
5603
+ return res < a;
5604
+ return res > a;
5605
+}
5606
+
5607
+static bool signed_sub32_overflows(s32 a, s32 b)
5608
+{
5609
+ /* Do the sub in u32, where overflow is well-defined */
5610
+ s32 res = (s32)((u32)a - (u32)b);
26715611
26725612 if (b < 0)
26735613 return res < a;
....@@ -2756,7 +5696,7 @@
27565696 static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
27575697 const struct bpf_insn *insn)
27585698 {
2759
- return env->allow_ptr_leaks || BPF_SRC(insn->code) == BPF_K;
5699
+ return env->bypass_spec_v1 || BPF_SRC(insn->code) == BPF_K;
27605700 }
27615701
27625702 static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
....@@ -2924,7 +5864,7 @@
29245864 * rewrite/sanitize them.
29255865 */
29265866 if (!vstate->speculative)
2927
- env->insn_aux_data[env->insn_idx].seen = true;
5867
+ env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
29285868 }
29295869
29305870 static int sanitize_err(struct bpf_verifier_env *env,
....@@ -2966,6 +5906,40 @@
29665906 return -EACCES;
29675907 }
29685908
5909
+/* check that stack access falls within stack limits and that 'reg' doesn't
5910
+ * have a variable offset.
5911
+ *
5912
+ * Variable offset is prohibited for unprivileged mode for simplicity since it
5913
+ * requires corresponding support in Spectre masking for stack ALU. See also
5914
+ * retrieve_ptr_limit().
5915
+ *
5916
+ *
5917
+ * 'off' includes 'reg->off'.
5918
+ */
5919
+static int check_stack_access_for_ptr_arithmetic(
5920
+ struct bpf_verifier_env *env,
5921
+ int regno,
5922
+ const struct bpf_reg_state *reg,
5923
+ int off)
5924
+{
5925
+ if (!tnum_is_const(reg->var_off)) {
5926
+ char tn_buf[48];
5927
+
5928
+ tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5929
+ verbose(env, "R%d variable stack access prohibited for !root, var_off=%s off=%d\n",
5930
+ regno, tn_buf, off);
5931
+ return -EACCES;
5932
+ }
5933
+
5934
+ if (off >= 0 || off < -MAX_BPF_STACK) {
5935
+ verbose(env, "R%d stack pointer arithmetic goes out of range, "
5936
+ "prohibited for !root; off=%d\n", regno, off);
5937
+ return -EACCES;
5938
+ }
5939
+
5940
+ return 0;
5941
+}
5942
+
29695943 static int sanitize_check_bounds(struct bpf_verifier_env *env,
29705944 const struct bpf_insn *insn,
29715945 const struct bpf_reg_state *dst_reg)
....@@ -2975,17 +5949,14 @@
29755949 /* For unprivileged we require that resulting offset must be in bounds
29765950 * in order to be able to sanitize access later on.
29775951 */
2978
- if (env->allow_ptr_leaks)
5952
+ if (env->bypass_spec_v1)
29795953 return 0;
29805954
29815955 switch (dst_reg->type) {
29825956 case PTR_TO_STACK:
2983
- if (check_stack_access(env, dst_reg, dst_reg->off +
2984
- dst_reg->var_off.value, 1)) {
2985
- verbose(env, "R%d stack pointer arithmetic goes out of range, "
2986
- "prohibited for !root\n", dst);
5957
+ if (check_stack_access_for_ptr_arithmetic(env, dst, dst_reg,
5958
+ dst_reg->off + dst_reg->var_off.value))
29875959 return -EACCES;
2988
- }
29895960 break;
29905961 case PTR_TO_MAP_VALUE:
29915962 if (check_map_access(env, dst, dst_reg->off, 1, false)) {
....@@ -3031,32 +6002,46 @@
30316002 /* Taint dst register if offset had invalid bounds derived from
30326003 * e.g. dead branches.
30336004 */
3034
- __mark_reg_unknown(dst_reg);
6005
+ __mark_reg_unknown(env, dst_reg);
30356006 return 0;
30366007 }
30376008
30386009 if (BPF_CLASS(insn->code) != BPF_ALU64) {
30396010 /* 32-bit ALU ops on pointers produce (meaningless) scalars */
6011
+ if (opcode == BPF_SUB && env->allow_ptr_leaks) {
6012
+ __mark_reg_unknown(env, dst_reg);
6013
+ return 0;
6014
+ }
6015
+
30406016 verbose(env,
30416017 "R%d 32-bit pointer arithmetic prohibited\n",
30426018 dst);
30436019 return -EACCES;
30446020 }
30456021
3046
- if (ptr_reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
3047
- verbose(env, "R%d pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL prohibited, null-check it first\n",
3048
- dst);
6022
+ switch (ptr_reg->type) {
6023
+ case PTR_TO_MAP_VALUE_OR_NULL:
6024
+ verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n",
6025
+ dst, reg_type_str[ptr_reg->type]);
30496026 return -EACCES;
3050
- }
3051
- if (ptr_reg->type == CONST_PTR_TO_MAP) {
3052
- verbose(env, "R%d pointer arithmetic on CONST_PTR_TO_MAP prohibited\n",
3053
- dst);
6027
+ case CONST_PTR_TO_MAP:
6028
+ /* smin_val represents the known value */
6029
+ if (known && smin_val == 0 && opcode == BPF_ADD)
6030
+ break;
6031
+ fallthrough;
6032
+ case PTR_TO_PACKET_END:
6033
+ case PTR_TO_SOCKET:
6034
+ case PTR_TO_SOCK_COMMON:
6035
+ case PTR_TO_TCP_SOCK:
6036
+ case PTR_TO_XDP_SOCK:
6037
+reject:
6038
+ verbose(env, "R%d pointer arithmetic on %s prohibited\n",
6039
+ dst, reg_type_str[ptr_reg->type]);
30546040 return -EACCES;
3055
- }
3056
- if (ptr_reg->type == PTR_TO_PACKET_END) {
3057
- verbose(env, "R%d pointer arithmetic on PTR_TO_PACKET_END prohibited\n",
3058
- dst);
3059
- return -EACCES;
6041
+ default:
6042
+ if (reg_type_may_be_null(ptr_reg->type))
6043
+ goto reject;
6044
+ break;
30606045 }
30616046
30626047 /* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
....@@ -3068,6 +6053,9 @@
30686053 if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
30696054 !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
30706055 return -EINVAL;
6056
+
6057
+ /* pointer types do not carry 32-bit bounds at the moment. */
6058
+ __mark_reg32_unbounded(dst_reg);
30716059
30726060 if (sanitize_needed(opcode)) {
30736061 ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg,
....@@ -3203,11 +6191,7 @@
32036191
32046192 if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
32056193 return -EINVAL;
3206
-
3207
- __update_reg_bounds(dst_reg);
3208
- __reg_deduce_bounds(dst_reg);
3209
- __reg_bound_offset(dst_reg);
3210
-
6194
+ reg_bounds_sync(dst_reg);
32116195 if (sanitize_check_bounds(env, insn, dst_reg) < 0)
32126196 return -EACCES;
32136197 if (sanitize_needed(opcode)) {
....@@ -3218,6 +6202,573 @@
32186202 }
32196203
32206204 return 0;
6205
+}
6206
+
6207
+static void scalar32_min_max_add(struct bpf_reg_state *dst_reg,
6208
+ struct bpf_reg_state *src_reg)
6209
+{
6210
+ s32 smin_val = src_reg->s32_min_value;
6211
+ s32 smax_val = src_reg->s32_max_value;
6212
+ u32 umin_val = src_reg->u32_min_value;
6213
+ u32 umax_val = src_reg->u32_max_value;
6214
+
6215
+ if (signed_add32_overflows(dst_reg->s32_min_value, smin_val) ||
6216
+ signed_add32_overflows(dst_reg->s32_max_value, smax_val)) {
6217
+ dst_reg->s32_min_value = S32_MIN;
6218
+ dst_reg->s32_max_value = S32_MAX;
6219
+ } else {
6220
+ dst_reg->s32_min_value += smin_val;
6221
+ dst_reg->s32_max_value += smax_val;
6222
+ }
6223
+ if (dst_reg->u32_min_value + umin_val < umin_val ||
6224
+ dst_reg->u32_max_value + umax_val < umax_val) {
6225
+ dst_reg->u32_min_value = 0;
6226
+ dst_reg->u32_max_value = U32_MAX;
6227
+ } else {
6228
+ dst_reg->u32_min_value += umin_val;
6229
+ dst_reg->u32_max_value += umax_val;
6230
+ }
6231
+}
6232
+
6233
+static void scalar_min_max_add(struct bpf_reg_state *dst_reg,
6234
+ struct bpf_reg_state *src_reg)
6235
+{
6236
+ s64 smin_val = src_reg->smin_value;
6237
+ s64 smax_val = src_reg->smax_value;
6238
+ u64 umin_val = src_reg->umin_value;
6239
+ u64 umax_val = src_reg->umax_value;
6240
+
6241
+ if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
6242
+ signed_add_overflows(dst_reg->smax_value, smax_val)) {
6243
+ dst_reg->smin_value = S64_MIN;
6244
+ dst_reg->smax_value = S64_MAX;
6245
+ } else {
6246
+ dst_reg->smin_value += smin_val;
6247
+ dst_reg->smax_value += smax_val;
6248
+ }
6249
+ if (dst_reg->umin_value + umin_val < umin_val ||
6250
+ dst_reg->umax_value + umax_val < umax_val) {
6251
+ dst_reg->umin_value = 0;
6252
+ dst_reg->umax_value = U64_MAX;
6253
+ } else {
6254
+ dst_reg->umin_value += umin_val;
6255
+ dst_reg->umax_value += umax_val;
6256
+ }
6257
+}
6258
+
6259
+static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg,
6260
+ struct bpf_reg_state *src_reg)
6261
+{
6262
+ s32 smin_val = src_reg->s32_min_value;
6263
+ s32 smax_val = src_reg->s32_max_value;
6264
+ u32 umin_val = src_reg->u32_min_value;
6265
+ u32 umax_val = src_reg->u32_max_value;
6266
+
6267
+ if (signed_sub32_overflows(dst_reg->s32_min_value, smax_val) ||
6268
+ signed_sub32_overflows(dst_reg->s32_max_value, smin_val)) {
6269
+ /* Overflow possible, we know nothing */
6270
+ dst_reg->s32_min_value = S32_MIN;
6271
+ dst_reg->s32_max_value = S32_MAX;
6272
+ } else {
6273
+ dst_reg->s32_min_value -= smax_val;
6274
+ dst_reg->s32_max_value -= smin_val;
6275
+ }
6276
+ if (dst_reg->u32_min_value < umax_val) {
6277
+ /* Overflow possible, we know nothing */
6278
+ dst_reg->u32_min_value = 0;
6279
+ dst_reg->u32_max_value = U32_MAX;
6280
+ } else {
6281
+ /* Cannot overflow (as long as bounds are consistent) */
6282
+ dst_reg->u32_min_value -= umax_val;
6283
+ dst_reg->u32_max_value -= umin_val;
6284
+ }
6285
+}
6286
+
6287
+static void scalar_min_max_sub(struct bpf_reg_state *dst_reg,
6288
+ struct bpf_reg_state *src_reg)
6289
+{
6290
+ s64 smin_val = src_reg->smin_value;
6291
+ s64 smax_val = src_reg->smax_value;
6292
+ u64 umin_val = src_reg->umin_value;
6293
+ u64 umax_val = src_reg->umax_value;
6294
+
6295
+ if (signed_sub_overflows(dst_reg->smin_value, smax_val) ||
6296
+ signed_sub_overflows(dst_reg->smax_value, smin_val)) {
6297
+ /* Overflow possible, we know nothing */
6298
+ dst_reg->smin_value = S64_MIN;
6299
+ dst_reg->smax_value = S64_MAX;
6300
+ } else {
6301
+ dst_reg->smin_value -= smax_val;
6302
+ dst_reg->smax_value -= smin_val;
6303
+ }
6304
+ if (dst_reg->umin_value < umax_val) {
6305
+ /* Overflow possible, we know nothing */
6306
+ dst_reg->umin_value = 0;
6307
+ dst_reg->umax_value = U64_MAX;
6308
+ } else {
6309
+ /* Cannot overflow (as long as bounds are consistent) */
6310
+ dst_reg->umin_value -= umax_val;
6311
+ dst_reg->umax_value -= umin_val;
6312
+ }
6313
+}
6314
+
6315
+static void scalar32_min_max_mul(struct bpf_reg_state *dst_reg,
6316
+ struct bpf_reg_state *src_reg)
6317
+{
6318
+ s32 smin_val = src_reg->s32_min_value;
6319
+ u32 umin_val = src_reg->u32_min_value;
6320
+ u32 umax_val = src_reg->u32_max_value;
6321
+
6322
+ if (smin_val < 0 || dst_reg->s32_min_value < 0) {
6323
+ /* Ain't nobody got time to multiply that sign */
6324
+ __mark_reg32_unbounded(dst_reg);
6325
+ return;
6326
+ }
6327
+ /* Both values are positive, so we can work with unsigned and
6328
+ * copy the result to signed (unless it exceeds S32_MAX).
6329
+ */
6330
+ if (umax_val > U16_MAX || dst_reg->u32_max_value > U16_MAX) {
6331
+ /* Potential overflow, we know nothing */
6332
+ __mark_reg32_unbounded(dst_reg);
6333
+ return;
6334
+ }
6335
+ dst_reg->u32_min_value *= umin_val;
6336
+ dst_reg->u32_max_value *= umax_val;
6337
+ if (dst_reg->u32_max_value > S32_MAX) {
6338
+ /* Overflow possible, we know nothing */
6339
+ dst_reg->s32_min_value = S32_MIN;
6340
+ dst_reg->s32_max_value = S32_MAX;
6341
+ } else {
6342
+ dst_reg->s32_min_value = dst_reg->u32_min_value;
6343
+ dst_reg->s32_max_value = dst_reg->u32_max_value;
6344
+ }
6345
+}
6346
+
6347
+static void scalar_min_max_mul(struct bpf_reg_state *dst_reg,
6348
+ struct bpf_reg_state *src_reg)
6349
+{
6350
+ s64 smin_val = src_reg->smin_value;
6351
+ u64 umin_val = src_reg->umin_value;
6352
+ u64 umax_val = src_reg->umax_value;
6353
+
6354
+ if (smin_val < 0 || dst_reg->smin_value < 0) {
6355
+ /* Ain't nobody got time to multiply that sign */
6356
+ __mark_reg64_unbounded(dst_reg);
6357
+ return;
6358
+ }
6359
+ /* Both values are positive, so we can work with unsigned and
6360
+ * copy the result to signed (unless it exceeds S64_MAX).
6361
+ */
6362
+ if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) {
6363
+ /* Potential overflow, we know nothing */
6364
+ __mark_reg64_unbounded(dst_reg);
6365
+ return;
6366
+ }
6367
+ dst_reg->umin_value *= umin_val;
6368
+ dst_reg->umax_value *= umax_val;
6369
+ if (dst_reg->umax_value > S64_MAX) {
6370
+ /* Overflow possible, we know nothing */
6371
+ dst_reg->smin_value = S64_MIN;
6372
+ dst_reg->smax_value = S64_MAX;
6373
+ } else {
6374
+ dst_reg->smin_value = dst_reg->umin_value;
6375
+ dst_reg->smax_value = dst_reg->umax_value;
6376
+ }
6377
+}
6378
+
6379
+static void scalar32_min_max_and(struct bpf_reg_state *dst_reg,
6380
+ struct bpf_reg_state *src_reg)
6381
+{
6382
+ bool src_known = tnum_subreg_is_const(src_reg->var_off);
6383
+ bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
6384
+ struct tnum var32_off = tnum_subreg(dst_reg->var_off);
6385
+ s32 smin_val = src_reg->s32_min_value;
6386
+ u32 umax_val = src_reg->u32_max_value;
6387
+
6388
+ if (src_known && dst_known) {
6389
+ __mark_reg32_known(dst_reg, var32_off.value);
6390
+ return;
6391
+ }
6392
+
6393
+ /* We get our minimum from the var_off, since that's inherently
6394
+ * bitwise. Our maximum is the minimum of the operands' maxima.
6395
+ */
6396
+ dst_reg->u32_min_value = var32_off.value;
6397
+ dst_reg->u32_max_value = min(dst_reg->u32_max_value, umax_val);
6398
+ if (dst_reg->s32_min_value < 0 || smin_val < 0) {
6399
+ /* Lose signed bounds when ANDing negative numbers,
6400
+ * ain't nobody got time for that.
6401
+ */
6402
+ dst_reg->s32_min_value = S32_MIN;
6403
+ dst_reg->s32_max_value = S32_MAX;
6404
+ } else {
6405
+ /* ANDing two positives gives a positive, so safe to
6406
+ * cast result into s64.
6407
+ */
6408
+ dst_reg->s32_min_value = dst_reg->u32_min_value;
6409
+ dst_reg->s32_max_value = dst_reg->u32_max_value;
6410
+ }
6411
+}
6412
+
6413
+static void scalar_min_max_and(struct bpf_reg_state *dst_reg,
6414
+ struct bpf_reg_state *src_reg)
6415
+{
6416
+ bool src_known = tnum_is_const(src_reg->var_off);
6417
+ bool dst_known = tnum_is_const(dst_reg->var_off);
6418
+ s64 smin_val = src_reg->smin_value;
6419
+ u64 umax_val = src_reg->umax_value;
6420
+
6421
+ if (src_known && dst_known) {
6422
+ __mark_reg_known(dst_reg, dst_reg->var_off.value);
6423
+ return;
6424
+ }
6425
+
6426
+ /* We get our minimum from the var_off, since that's inherently
6427
+ * bitwise. Our maximum is the minimum of the operands' maxima.
6428
+ */
6429
+ dst_reg->umin_value = dst_reg->var_off.value;
6430
+ dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
6431
+ if (dst_reg->smin_value < 0 || smin_val < 0) {
6432
+ /* Lose signed bounds when ANDing negative numbers,
6433
+ * ain't nobody got time for that.
6434
+ */
6435
+ dst_reg->smin_value = S64_MIN;
6436
+ dst_reg->smax_value = S64_MAX;
6437
+ } else {
6438
+ /* ANDing two positives gives a positive, so safe to
6439
+ * cast result into s64.
6440
+ */
6441
+ dst_reg->smin_value = dst_reg->umin_value;
6442
+ dst_reg->smax_value = dst_reg->umax_value;
6443
+ }
6444
+ /* We may learn something more from the var_off */
6445
+ __update_reg_bounds(dst_reg);
6446
+}
6447
+
6448
+static void scalar32_min_max_or(struct bpf_reg_state *dst_reg,
6449
+ struct bpf_reg_state *src_reg)
6450
+{
6451
+ bool src_known = tnum_subreg_is_const(src_reg->var_off);
6452
+ bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
6453
+ struct tnum var32_off = tnum_subreg(dst_reg->var_off);
6454
+ s32 smin_val = src_reg->s32_min_value;
6455
+ u32 umin_val = src_reg->u32_min_value;
6456
+
6457
+ if (src_known && dst_known) {
6458
+ __mark_reg32_known(dst_reg, var32_off.value);
6459
+ return;
6460
+ }
6461
+
6462
+ /* We get our maximum from the var_off, and our minimum is the
6463
+ * maximum of the operands' minima
6464
+ */
6465
+ dst_reg->u32_min_value = max(dst_reg->u32_min_value, umin_val);
6466
+ dst_reg->u32_max_value = var32_off.value | var32_off.mask;
6467
+ if (dst_reg->s32_min_value < 0 || smin_val < 0) {
6468
+ /* Lose signed bounds when ORing negative numbers,
6469
+ * ain't nobody got time for that.
6470
+ */
6471
+ dst_reg->s32_min_value = S32_MIN;
6472
+ dst_reg->s32_max_value = S32_MAX;
6473
+ } else {
6474
+ /* ORing two positives gives a positive, so safe to
6475
+ * cast result into s64.
6476
+ */
6477
+ dst_reg->s32_min_value = dst_reg->u32_min_value;
6478
+ dst_reg->s32_max_value = dst_reg->u32_max_value;
6479
+ }
6480
+}
6481
+
6482
+static void scalar_min_max_or(struct bpf_reg_state *dst_reg,
6483
+ struct bpf_reg_state *src_reg)
6484
+{
6485
+ bool src_known = tnum_is_const(src_reg->var_off);
6486
+ bool dst_known = tnum_is_const(dst_reg->var_off);
6487
+ s64 smin_val = src_reg->smin_value;
6488
+ u64 umin_val = src_reg->umin_value;
6489
+
6490
+ if (src_known && dst_known) {
6491
+ __mark_reg_known(dst_reg, dst_reg->var_off.value);
6492
+ return;
6493
+ }
6494
+
6495
+ /* We get our maximum from the var_off, and our minimum is the
6496
+ * maximum of the operands' minima
6497
+ */
6498
+ dst_reg->umin_value = max(dst_reg->umin_value, umin_val);
6499
+ dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
6500
+ if (dst_reg->smin_value < 0 || smin_val < 0) {
6501
+ /* Lose signed bounds when ORing negative numbers,
6502
+ * ain't nobody got time for that.
6503
+ */
6504
+ dst_reg->smin_value = S64_MIN;
6505
+ dst_reg->smax_value = S64_MAX;
6506
+ } else {
6507
+ /* ORing two positives gives a positive, so safe to
6508
+ * cast result into s64.
6509
+ */
6510
+ dst_reg->smin_value = dst_reg->umin_value;
6511
+ dst_reg->smax_value = dst_reg->umax_value;
6512
+ }
6513
+ /* We may learn something more from the var_off */
6514
+ __update_reg_bounds(dst_reg);
6515
+}
6516
+
6517
+static void scalar32_min_max_xor(struct bpf_reg_state *dst_reg,
6518
+ struct bpf_reg_state *src_reg)
6519
+{
6520
+ bool src_known = tnum_subreg_is_const(src_reg->var_off);
6521
+ bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
6522
+ struct tnum var32_off = tnum_subreg(dst_reg->var_off);
6523
+ s32 smin_val = src_reg->s32_min_value;
6524
+
6525
+ if (src_known && dst_known) {
6526
+ __mark_reg32_known(dst_reg, var32_off.value);
6527
+ return;
6528
+ }
6529
+
6530
+ /* We get both minimum and maximum from the var32_off. */
6531
+ dst_reg->u32_min_value = var32_off.value;
6532
+ dst_reg->u32_max_value = var32_off.value | var32_off.mask;
6533
+
6534
+ if (dst_reg->s32_min_value >= 0 && smin_val >= 0) {
6535
+ /* XORing two positive sign numbers gives a positive,
6536
+ * so safe to cast u32 result into s32.
6537
+ */
6538
+ dst_reg->s32_min_value = dst_reg->u32_min_value;
6539
+ dst_reg->s32_max_value = dst_reg->u32_max_value;
6540
+ } else {
6541
+ dst_reg->s32_min_value = S32_MIN;
6542
+ dst_reg->s32_max_value = S32_MAX;
6543
+ }
6544
+}
6545
+
6546
+static void scalar_min_max_xor(struct bpf_reg_state *dst_reg,
6547
+ struct bpf_reg_state *src_reg)
6548
+{
6549
+ bool src_known = tnum_is_const(src_reg->var_off);
6550
+ bool dst_known = tnum_is_const(dst_reg->var_off);
6551
+ s64 smin_val = src_reg->smin_value;
6552
+
6553
+ if (src_known && dst_known) {
6554
+ /* dst_reg->var_off.value has been updated earlier */
6555
+ __mark_reg_known(dst_reg, dst_reg->var_off.value);
6556
+ return;
6557
+ }
6558
+
6559
+ /* We get both minimum and maximum from the var_off. */
6560
+ dst_reg->umin_value = dst_reg->var_off.value;
6561
+ dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
6562
+
6563
+ if (dst_reg->smin_value >= 0 && smin_val >= 0) {
6564
+ /* XORing two positive sign numbers gives a positive,
6565
+ * so safe to cast u64 result into s64.
6566
+ */
6567
+ dst_reg->smin_value = dst_reg->umin_value;
6568
+ dst_reg->smax_value = dst_reg->umax_value;
6569
+ } else {
6570
+ dst_reg->smin_value = S64_MIN;
6571
+ dst_reg->smax_value = S64_MAX;
6572
+ }
6573
+
6574
+ __update_reg_bounds(dst_reg);
6575
+}
6576
+
6577
+static void __scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
6578
+ u64 umin_val, u64 umax_val)
6579
+{
6580
+ /* We lose all sign bit information (except what we can pick
6581
+ * up from var_off)
6582
+ */
6583
+ dst_reg->s32_min_value = S32_MIN;
6584
+ dst_reg->s32_max_value = S32_MAX;
6585
+ /* If we might shift our top bit out, then we know nothing */
6586
+ if (umax_val > 31 || dst_reg->u32_max_value > 1ULL << (31 - umax_val)) {
6587
+ dst_reg->u32_min_value = 0;
6588
+ dst_reg->u32_max_value = U32_MAX;
6589
+ } else {
6590
+ dst_reg->u32_min_value <<= umin_val;
6591
+ dst_reg->u32_max_value <<= umax_val;
6592
+ }
6593
+}
6594
+
6595
+static void scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
6596
+ struct bpf_reg_state *src_reg)
6597
+{
6598
+ u32 umax_val = src_reg->u32_max_value;
6599
+ u32 umin_val = src_reg->u32_min_value;
6600
+ /* u32 alu operation will zext upper bits */
6601
+ struct tnum subreg = tnum_subreg(dst_reg->var_off);
6602
+
6603
+ __scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
6604
+ dst_reg->var_off = tnum_subreg(tnum_lshift(subreg, umin_val));
6605
+ /* Not required but being careful mark reg64 bounds as unknown so
6606
+ * that we are forced to pick them up from tnum and zext later and
6607
+ * if some path skips this step we are still safe.
6608
+ */
6609
+ __mark_reg64_unbounded(dst_reg);
6610
+ __update_reg32_bounds(dst_reg);
6611
+}
6612
+
6613
+static void __scalar64_min_max_lsh(struct bpf_reg_state *dst_reg,
6614
+ u64 umin_val, u64 umax_val)
6615
+{
6616
+ /* Special case <<32 because it is a common compiler pattern to sign
6617
+ * extend subreg by doing <<32 s>>32. In this case if 32bit bounds are
6618
+ * positive we know this shift will also be positive so we can track
6619
+ * bounds correctly. Otherwise we lose all sign bit information except
6620
+ * what we can pick up from var_off. Perhaps we can generalize this
6621
+ * later to shifts of any length.
6622
+ */
6623
+ if (umin_val == 32 && umax_val == 32 && dst_reg->s32_max_value >= 0)
6624
+ dst_reg->smax_value = (s64)dst_reg->s32_max_value << 32;
6625
+ else
6626
+ dst_reg->smax_value = S64_MAX;
6627
+
6628
+ if (umin_val == 32 && umax_val == 32 && dst_reg->s32_min_value >= 0)
6629
+ dst_reg->smin_value = (s64)dst_reg->s32_min_value << 32;
6630
+ else
6631
+ dst_reg->smin_value = S64_MIN;
6632
+
6633
+ /* If we might shift our top bit out, then we know nothing */
6634
+ if (dst_reg->umax_value > 1ULL << (63 - umax_val)) {
6635
+ dst_reg->umin_value = 0;
6636
+ dst_reg->umax_value = U64_MAX;
6637
+ } else {
6638
+ dst_reg->umin_value <<= umin_val;
6639
+ dst_reg->umax_value <<= umax_val;
6640
+ }
6641
+}
6642
+
6643
+static void scalar_min_max_lsh(struct bpf_reg_state *dst_reg,
6644
+ struct bpf_reg_state *src_reg)
6645
+{
6646
+ u64 umax_val = src_reg->umax_value;
6647
+ u64 umin_val = src_reg->umin_value;
6648
+
6649
+ /* scalar64 calc uses 32bit unshifted bounds so must be called first */
6650
+ __scalar64_min_max_lsh(dst_reg, umin_val, umax_val);
6651
+ __scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
6652
+
6653
+ dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
6654
+ /* We may learn something more from the var_off */
6655
+ __update_reg_bounds(dst_reg);
6656
+}
6657
+
6658
+static void scalar32_min_max_rsh(struct bpf_reg_state *dst_reg,
6659
+ struct bpf_reg_state *src_reg)
6660
+{
6661
+ struct tnum subreg = tnum_subreg(dst_reg->var_off);
6662
+ u32 umax_val = src_reg->u32_max_value;
6663
+ u32 umin_val = src_reg->u32_min_value;
6664
+
6665
+ /* BPF_RSH is an unsigned shift. If the value in dst_reg might
6666
+ * be negative, then either:
6667
+ * 1) src_reg might be zero, so the sign bit of the result is
6668
+ * unknown, so we lose our signed bounds
6669
+ * 2) it's known negative, thus the unsigned bounds capture the
6670
+ * signed bounds
6671
+ * 3) the signed bounds cross zero, so they tell us nothing
6672
+ * about the result
6673
+ * If the value in dst_reg is known nonnegative, then again the
6674
+ * unsigned bounts capture the signed bounds.
6675
+ * Thus, in all cases it suffices to blow away our signed bounds
6676
+ * and rely on inferring new ones from the unsigned bounds and
6677
+ * var_off of the result.
6678
+ */
6679
+ dst_reg->s32_min_value = S32_MIN;
6680
+ dst_reg->s32_max_value = S32_MAX;
6681
+
6682
+ dst_reg->var_off = tnum_rshift(subreg, umin_val);
6683
+ dst_reg->u32_min_value >>= umax_val;
6684
+ dst_reg->u32_max_value >>= umin_val;
6685
+
6686
+ __mark_reg64_unbounded(dst_reg);
6687
+ __update_reg32_bounds(dst_reg);
6688
+}
6689
+
6690
+static void scalar_min_max_rsh(struct bpf_reg_state *dst_reg,
6691
+ struct bpf_reg_state *src_reg)
6692
+{
6693
+ u64 umax_val = src_reg->umax_value;
6694
+ u64 umin_val = src_reg->umin_value;
6695
+
6696
+ /* BPF_RSH is an unsigned shift. If the value in dst_reg might
6697
+ * be negative, then either:
6698
+ * 1) src_reg might be zero, so the sign bit of the result is
6699
+ * unknown, so we lose our signed bounds
6700
+ * 2) it's known negative, thus the unsigned bounds capture the
6701
+ * signed bounds
6702
+ * 3) the signed bounds cross zero, so they tell us nothing
6703
+ * about the result
6704
+ * If the value in dst_reg is known nonnegative, then again the
6705
+ * unsigned bounts capture the signed bounds.
6706
+ * Thus, in all cases it suffices to blow away our signed bounds
6707
+ * and rely on inferring new ones from the unsigned bounds and
6708
+ * var_off of the result.
6709
+ */
6710
+ dst_reg->smin_value = S64_MIN;
6711
+ dst_reg->smax_value = S64_MAX;
6712
+ dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
6713
+ dst_reg->umin_value >>= umax_val;
6714
+ dst_reg->umax_value >>= umin_val;
6715
+
6716
+ /* Its not easy to operate on alu32 bounds here because it depends
6717
+ * on bits being shifted in. Take easy way out and mark unbounded
6718
+ * so we can recalculate later from tnum.
6719
+ */
6720
+ __mark_reg32_unbounded(dst_reg);
6721
+ __update_reg_bounds(dst_reg);
6722
+}
6723
+
6724
+static void scalar32_min_max_arsh(struct bpf_reg_state *dst_reg,
6725
+ struct bpf_reg_state *src_reg)
6726
+{
6727
+ u64 umin_val = src_reg->u32_min_value;
6728
+
6729
+ /* Upon reaching here, src_known is true and
6730
+ * umax_val is equal to umin_val.
6731
+ */
6732
+ dst_reg->s32_min_value = (u32)(((s32)dst_reg->s32_min_value) >> umin_val);
6733
+ dst_reg->s32_max_value = (u32)(((s32)dst_reg->s32_max_value) >> umin_val);
6734
+
6735
+ dst_reg->var_off = tnum_arshift(tnum_subreg(dst_reg->var_off), umin_val, 32);
6736
+
6737
+ /* blow away the dst_reg umin_value/umax_value and rely on
6738
+ * dst_reg var_off to refine the result.
6739
+ */
6740
+ dst_reg->u32_min_value = 0;
6741
+ dst_reg->u32_max_value = U32_MAX;
6742
+
6743
+ __mark_reg64_unbounded(dst_reg);
6744
+ __update_reg32_bounds(dst_reg);
6745
+}
6746
+
6747
+static void scalar_min_max_arsh(struct bpf_reg_state *dst_reg,
6748
+ struct bpf_reg_state *src_reg)
6749
+{
6750
+ u64 umin_val = src_reg->umin_value;
6751
+
6752
+ /* Upon reaching here, src_known is true and umax_val is equal
6753
+ * to umin_val.
6754
+ */
6755
+ dst_reg->smin_value >>= umin_val;
6756
+ dst_reg->smax_value >>= umin_val;
6757
+
6758
+ dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val, 64);
6759
+
6760
+ /* blow away the dst_reg umin_value/umax_value and rely on
6761
+ * dst_reg var_off to refine the result.
6762
+ */
6763
+ dst_reg->umin_value = 0;
6764
+ dst_reg->umax_value = U64_MAX;
6765
+
6766
+ /* Its not easy to operate on alu32 bounds here because it depends
6767
+ * on bits being shifted in from upper 32-bits. Take easy way out
6768
+ * and mark unbounded so we can recalculate later from tnum.
6769
+ */
6770
+ __mark_reg32_unbounded(dst_reg);
6771
+ __update_reg_bounds(dst_reg);
32216772 }
32226773
32236774 /* WARNING: This function does calculations on 64-bit values, but the actual
....@@ -3231,40 +6782,52 @@
32316782 {
32326783 struct bpf_reg_state *regs = cur_regs(env);
32336784 u8 opcode = BPF_OP(insn->code);
3234
- bool src_known, dst_known;
6785
+ bool src_known;
32356786 s64 smin_val, smax_val;
32366787 u64 umin_val, umax_val;
6788
+ s32 s32_min_val, s32_max_val;
6789
+ u32 u32_min_val, u32_max_val;
32376790 u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
6791
+ bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64);
32386792 int ret;
3239
-
3240
- if (insn_bitness == 32) {
3241
- /* Relevant for 32-bit RSH: Information can propagate towards
3242
- * LSB, so it isn't sufficient to only truncate the output to
3243
- * 32 bits.
3244
- */
3245
- coerce_reg_to_size(dst_reg, 4);
3246
- coerce_reg_to_size(&src_reg, 4);
3247
- }
32486793
32496794 smin_val = src_reg.smin_value;
32506795 smax_val = src_reg.smax_value;
32516796 umin_val = src_reg.umin_value;
32526797 umax_val = src_reg.umax_value;
3253
- src_known = tnum_is_const(src_reg.var_off);
3254
- dst_known = tnum_is_const(dst_reg->var_off);
32556798
3256
- if ((src_known && (smin_val != smax_val || umin_val != umax_val)) ||
3257
- smin_val > smax_val || umin_val > umax_val) {
3258
- /* Taint dst register if offset had invalid bounds derived from
3259
- * e.g. dead branches.
3260
- */
3261
- __mark_reg_unknown(dst_reg);
3262
- return 0;
6799
+ s32_min_val = src_reg.s32_min_value;
6800
+ s32_max_val = src_reg.s32_max_value;
6801
+ u32_min_val = src_reg.u32_min_value;
6802
+ u32_max_val = src_reg.u32_max_value;
6803
+
6804
+ if (alu32) {
6805
+ src_known = tnum_subreg_is_const(src_reg.var_off);
6806
+ if ((src_known &&
6807
+ (s32_min_val != s32_max_val || u32_min_val != u32_max_val)) ||
6808
+ s32_min_val > s32_max_val || u32_min_val > u32_max_val) {
6809
+ /* Taint dst register if offset had invalid bounds
6810
+ * derived from e.g. dead branches.
6811
+ */
6812
+ __mark_reg_unknown(env, dst_reg);
6813
+ return 0;
6814
+ }
6815
+ } else {
6816
+ src_known = tnum_is_const(src_reg.var_off);
6817
+ if ((src_known &&
6818
+ (smin_val != smax_val || umin_val != umax_val)) ||
6819
+ smin_val > smax_val || umin_val > umax_val) {
6820
+ /* Taint dst register if offset had invalid bounds
6821
+ * derived from e.g. dead branches.
6822
+ */
6823
+ __mark_reg_unknown(env, dst_reg);
6824
+ return 0;
6825
+ }
32636826 }
32646827
32656828 if (!src_known &&
32666829 opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
3267
- __mark_reg_unknown(dst_reg);
6830
+ __mark_reg_unknown(env, dst_reg);
32686831 return 0;
32696832 }
32706833
....@@ -3274,132 +6837,50 @@
32746837 return sanitize_err(env, insn, ret, NULL, NULL);
32756838 }
32766839
6840
+ /* Calculate sign/unsigned bounds and tnum for alu32 and alu64 bit ops.
6841
+ * There are two classes of instructions: The first class we track both
6842
+ * alu32 and alu64 sign/unsigned bounds independently this provides the
6843
+ * greatest amount of precision when alu operations are mixed with jmp32
6844
+ * operations. These operations are BPF_ADD, BPF_SUB, BPF_MUL, BPF_ADD,
6845
+ * and BPF_OR. This is possible because these ops have fairly easy to
6846
+ * understand and calculate behavior in both 32-bit and 64-bit alu ops.
6847
+ * See alu32 verifier tests for examples. The second class of
6848
+ * operations, BPF_LSH, BPF_RSH, and BPF_ARSH, however are not so easy
6849
+ * with regards to tracking sign/unsigned bounds because the bits may
6850
+ * cross subreg boundaries in the alu64 case. When this happens we mark
6851
+ * the reg unbounded in the subreg bound space and use the resulting
6852
+ * tnum to calculate an approximation of the sign/unsigned bounds.
6853
+ */
32776854 switch (opcode) {
32786855 case BPF_ADD:
3279
- if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
3280
- signed_add_overflows(dst_reg->smax_value, smax_val)) {
3281
- dst_reg->smin_value = S64_MIN;
3282
- dst_reg->smax_value = S64_MAX;
3283
- } else {
3284
- dst_reg->smin_value += smin_val;
3285
- dst_reg->smax_value += smax_val;
3286
- }
3287
- if (dst_reg->umin_value + umin_val < umin_val ||
3288
- dst_reg->umax_value + umax_val < umax_val) {
3289
- dst_reg->umin_value = 0;
3290
- dst_reg->umax_value = U64_MAX;
3291
- } else {
3292
- dst_reg->umin_value += umin_val;
3293
- dst_reg->umax_value += umax_val;
3294
- }
6856
+ scalar32_min_max_add(dst_reg, &src_reg);
6857
+ scalar_min_max_add(dst_reg, &src_reg);
32956858 dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
32966859 break;
32976860 case BPF_SUB:
3298
- if (signed_sub_overflows(dst_reg->smin_value, smax_val) ||
3299
- signed_sub_overflows(dst_reg->smax_value, smin_val)) {
3300
- /* Overflow possible, we know nothing */
3301
- dst_reg->smin_value = S64_MIN;
3302
- dst_reg->smax_value = S64_MAX;
3303
- } else {
3304
- dst_reg->smin_value -= smax_val;
3305
- dst_reg->smax_value -= smin_val;
3306
- }
3307
- if (dst_reg->umin_value < umax_val) {
3308
- /* Overflow possible, we know nothing */
3309
- dst_reg->umin_value = 0;
3310
- dst_reg->umax_value = U64_MAX;
3311
- } else {
3312
- /* Cannot overflow (as long as bounds are consistent) */
3313
- dst_reg->umin_value -= umax_val;
3314
- dst_reg->umax_value -= umin_val;
3315
- }
6861
+ scalar32_min_max_sub(dst_reg, &src_reg);
6862
+ scalar_min_max_sub(dst_reg, &src_reg);
33166863 dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
33176864 break;
33186865 case BPF_MUL:
33196866 dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
3320
- if (smin_val < 0 || dst_reg->smin_value < 0) {
3321
- /* Ain't nobody got time to multiply that sign */
3322
- __mark_reg_unbounded(dst_reg);
3323
- __update_reg_bounds(dst_reg);
3324
- break;
3325
- }
3326
- /* Both values are positive, so we can work with unsigned and
3327
- * copy the result to signed (unless it exceeds S64_MAX).
3328
- */
3329
- if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) {
3330
- /* Potential overflow, we know nothing */
3331
- __mark_reg_unbounded(dst_reg);
3332
- /* (except what we can learn from the var_off) */
3333
- __update_reg_bounds(dst_reg);
3334
- break;
3335
- }
3336
- dst_reg->umin_value *= umin_val;
3337
- dst_reg->umax_value *= umax_val;
3338
- if (dst_reg->umax_value > S64_MAX) {
3339
- /* Overflow possible, we know nothing */
3340
- dst_reg->smin_value = S64_MIN;
3341
- dst_reg->smax_value = S64_MAX;
3342
- } else {
3343
- dst_reg->smin_value = dst_reg->umin_value;
3344
- dst_reg->smax_value = dst_reg->umax_value;
3345
- }
6867
+ scalar32_min_max_mul(dst_reg, &src_reg);
6868
+ scalar_min_max_mul(dst_reg, &src_reg);
33466869 break;
33476870 case BPF_AND:
3348
- if (src_known && dst_known) {
3349
- __mark_reg_known(dst_reg, dst_reg->var_off.value &
3350
- src_reg.var_off.value);
3351
- break;
3352
- }
3353
- /* We get our minimum from the var_off, since that's inherently
3354
- * bitwise. Our maximum is the minimum of the operands' maxima.
3355
- */
33566871 dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
3357
- dst_reg->umin_value = dst_reg->var_off.value;
3358
- dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
3359
- if (dst_reg->smin_value < 0 || smin_val < 0) {
3360
- /* Lose signed bounds when ANDing negative numbers,
3361
- * ain't nobody got time for that.
3362
- */
3363
- dst_reg->smin_value = S64_MIN;
3364
- dst_reg->smax_value = S64_MAX;
3365
- } else {
3366
- /* ANDing two positives gives a positive, so safe to
3367
- * cast result into s64.
3368
- */
3369
- dst_reg->smin_value = dst_reg->umin_value;
3370
- dst_reg->smax_value = dst_reg->umax_value;
3371
- }
3372
- /* We may learn something more from the var_off */
3373
- __update_reg_bounds(dst_reg);
6872
+ scalar32_min_max_and(dst_reg, &src_reg);
6873
+ scalar_min_max_and(dst_reg, &src_reg);
33746874 break;
33756875 case BPF_OR:
3376
- if (src_known && dst_known) {
3377
- __mark_reg_known(dst_reg, dst_reg->var_off.value |
3378
- src_reg.var_off.value);
3379
- break;
3380
- }
3381
- /* We get our maximum from the var_off, and our minimum is the
3382
- * maximum of the operands' minima
3383
- */
33846876 dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
3385
- dst_reg->umin_value = max(dst_reg->umin_value, umin_val);
3386
- dst_reg->umax_value = dst_reg->var_off.value |
3387
- dst_reg->var_off.mask;
3388
- if (dst_reg->smin_value < 0 || smin_val < 0) {
3389
- /* Lose signed bounds when ORing negative numbers,
3390
- * ain't nobody got time for that.
3391
- */
3392
- dst_reg->smin_value = S64_MIN;
3393
- dst_reg->smax_value = S64_MAX;
3394
- } else {
3395
- /* ORing two positives gives a positive, so safe to
3396
- * cast result into s64.
3397
- */
3398
- dst_reg->smin_value = dst_reg->umin_value;
3399
- dst_reg->smax_value = dst_reg->umax_value;
3400
- }
3401
- /* We may learn something more from the var_off */
3402
- __update_reg_bounds(dst_reg);
6877
+ scalar32_min_max_or(dst_reg, &src_reg);
6878
+ scalar_min_max_or(dst_reg, &src_reg);
6879
+ break;
6880
+ case BPF_XOR:
6881
+ dst_reg->var_off = tnum_xor(dst_reg->var_off, src_reg.var_off);
6882
+ scalar32_min_max_xor(dst_reg, &src_reg);
6883
+ scalar_min_max_xor(dst_reg, &src_reg);
34036884 break;
34046885 case BPF_LSH:
34056886 if (umax_val >= insn_bitness) {
....@@ -3409,22 +6890,10 @@
34096890 mark_reg_unknown(env, regs, insn->dst_reg);
34106891 break;
34116892 }
3412
- /* We lose all sign bit information (except what we can pick
3413
- * up from var_off)
3414
- */
3415
- dst_reg->smin_value = S64_MIN;
3416
- dst_reg->smax_value = S64_MAX;
3417
- /* If we might shift our top bit out, then we know nothing */
3418
- if (dst_reg->umax_value > 1ULL << (63 - umax_val)) {
3419
- dst_reg->umin_value = 0;
3420
- dst_reg->umax_value = U64_MAX;
3421
- } else {
3422
- dst_reg->umin_value <<= umin_val;
3423
- dst_reg->umax_value <<= umax_val;
3424
- }
3425
- dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
3426
- /* We may learn something more from the var_off */
3427
- __update_reg_bounds(dst_reg);
6893
+ if (alu32)
6894
+ scalar32_min_max_lsh(dst_reg, &src_reg);
6895
+ else
6896
+ scalar_min_max_lsh(dst_reg, &src_reg);
34286897 break;
34296898 case BPF_RSH:
34306899 if (umax_val >= insn_bitness) {
....@@ -3434,27 +6903,10 @@
34346903 mark_reg_unknown(env, regs, insn->dst_reg);
34356904 break;
34366905 }
3437
- /* BPF_RSH is an unsigned shift. If the value in dst_reg might
3438
- * be negative, then either:
3439
- * 1) src_reg might be zero, so the sign bit of the result is
3440
- * unknown, so we lose our signed bounds
3441
- * 2) it's known negative, thus the unsigned bounds capture the
3442
- * signed bounds
3443
- * 3) the signed bounds cross zero, so they tell us nothing
3444
- * about the result
3445
- * If the value in dst_reg is known nonnegative, then again the
3446
- * unsigned bounts capture the signed bounds.
3447
- * Thus, in all cases it suffices to blow away our signed bounds
3448
- * and rely on inferring new ones from the unsigned bounds and
3449
- * var_off of the result.
3450
- */
3451
- dst_reg->smin_value = S64_MIN;
3452
- dst_reg->smax_value = S64_MAX;
3453
- dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
3454
- dst_reg->umin_value >>= umax_val;
3455
- dst_reg->umax_value >>= umin_val;
3456
- /* We may learn something more from the var_off */
3457
- __update_reg_bounds(dst_reg);
6906
+ if (alu32)
6907
+ scalar32_min_max_rsh(dst_reg, &src_reg);
6908
+ else
6909
+ scalar_min_max_rsh(dst_reg, &src_reg);
34586910 break;
34596911 case BPF_ARSH:
34606912 if (umax_val >= insn_bitness) {
....@@ -3464,40 +6916,20 @@
34646916 mark_reg_unknown(env, regs, insn->dst_reg);
34656917 break;
34666918 }
3467
-
3468
- /* Upon reaching here, src_known is true and
3469
- * umax_val is equal to umin_val.
3470
- */
3471
- if (insn_bitness == 32) {
3472
- dst_reg->smin_value = (u32)(((s32)dst_reg->smin_value) >> umin_val);
3473
- dst_reg->smax_value = (u32)(((s32)dst_reg->smax_value) >> umin_val);
3474
- } else {
3475
- dst_reg->smin_value >>= umin_val;
3476
- dst_reg->smax_value >>= umin_val;
3477
- }
3478
-
3479
- dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val,
3480
- insn_bitness);
3481
-
3482
- /* blow away the dst_reg umin_value/umax_value and rely on
3483
- * dst_reg var_off to refine the result.
3484
- */
3485
- dst_reg->umin_value = 0;
3486
- dst_reg->umax_value = U64_MAX;
3487
- __update_reg_bounds(dst_reg);
6919
+ if (alu32)
6920
+ scalar32_min_max_arsh(dst_reg, &src_reg);
6921
+ else
6922
+ scalar_min_max_arsh(dst_reg, &src_reg);
34886923 break;
34896924 default:
34906925 mark_reg_unknown(env, regs, insn->dst_reg);
34916926 break;
34926927 }
34936928
3494
- if (BPF_CLASS(insn->code) != BPF_ALU64) {
3495
- /* 32-bit ALU ops are (32,32)->32 */
3496
- coerce_reg_to_size(dst_reg, 4);
3497
- }
3498
-
3499
- __reg_deduce_bounds(dst_reg);
3500
- __reg_bound_offset(dst_reg);
6929
+ /* ALU32 ops are zero extended into 64bit register */
6930
+ if (alu32)
6931
+ zext_32_to_64(dst_reg);
6932
+ reg_bounds_sync(dst_reg);
35016933 return 0;
35026934 }
35036935
....@@ -3512,11 +6944,17 @@
35126944 struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
35136945 struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
35146946 u8 opcode = BPF_OP(insn->code);
6947
+ int err;
35156948
35166949 dst_reg = &regs[insn->dst_reg];
35176950 src_reg = NULL;
35186951 if (dst_reg->type != SCALAR_VALUE)
35196952 ptr_reg = dst_reg;
6953
+ else
6954
+ /* Make sure ID is cleared otherwise dst_reg min/max could be
6955
+ * incorrectly propagated into other registers by find_equal_scalars()
6956
+ */
6957
+ dst_reg->id = 0;
35206958 if (BPF_SRC(insn->code) == BPF_X) {
35216959 src_reg = &regs[insn->src_reg];
35226960 if (src_reg->type != SCALAR_VALUE) {
....@@ -3538,11 +6976,17 @@
35386976 * This is legal, but we have to reverse our
35396977 * src/dest handling in computing the range
35406978 */
6979
+ err = mark_chain_precision(env, insn->dst_reg);
6980
+ if (err)
6981
+ return err;
35416982 return adjust_ptr_min_max_vals(env, insn,
35426983 src_reg, dst_reg);
35436984 }
35446985 } else if (ptr_reg) {
35456986 /* pointer += scalar */
6987
+ err = mark_chain_precision(env, insn->src_reg);
6988
+ if (err)
6989
+ return err;
35466990 return adjust_ptr_min_max_vals(env, insn,
35476991 dst_reg, src_reg);
35486992 }
....@@ -3644,8 +7088,15 @@
36447088 /* case: R1 = R2
36457089 * copy register state to dest reg
36467090 */
7091
+ if (src_reg->type == SCALAR_VALUE && !src_reg->id)
7092
+ /* Assign src and dst registers the same ID
7093
+ * that will be used by find_equal_scalars()
7094
+ * to propagate min/max range.
7095
+ */
7096
+ src_reg->id = ++env->id_gen;
36477097 *dst_reg = *src_reg;
36487098 dst_reg->live |= REG_LIVE_WRITTEN;
7099
+ dst_reg->subreg_def = DEF_NOT_SUBREG;
36497100 } else {
36507101 /* R1 = (u32) R2 */
36517102 if (is_pointer_value(env, insn->src_reg)) {
....@@ -3655,12 +7106,19 @@
36557106 return -EACCES;
36567107 } else if (src_reg->type == SCALAR_VALUE) {
36577108 *dst_reg = *src_reg;
7109
+ /* Make sure ID is cleared otherwise
7110
+ * dst_reg min/max could be incorrectly
7111
+ * propagated into src_reg by find_equal_scalars()
7112
+ */
7113
+ dst_reg->id = 0;
36587114 dst_reg->live |= REG_LIVE_WRITTEN;
7115
+ dst_reg->subreg_def = env->insn_idx + 1;
36597116 } else {
36607117 mark_reg_unknown(env, regs,
36617118 insn->dst_reg);
36627119 }
3663
- coerce_reg_to_size(dst_reg, 4);
7120
+ zext_32_to_64(dst_reg);
7121
+ reg_bounds_sync(dst_reg);
36647122 }
36657123 } else {
36667124 /* case: R = imm
....@@ -3711,11 +7169,6 @@
37117169 return -EINVAL;
37127170 }
37137171
3714
- if (opcode == BPF_ARSH && BPF_CLASS(insn->code) != BPF_ALU64) {
3715
- verbose(env, "BPF_ARSH not supported for 32 bit ALU\n");
3716
- return -EINVAL;
3717
- }
3718
-
37197172 if ((opcode == BPF_LSH || opcode == BPF_RSH ||
37207173 opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
37217174 int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
....@@ -3742,10 +7195,9 @@
37427195 enum bpf_reg_type type,
37437196 bool range_right_open)
37447197 {
3745
- struct bpf_func_state *state = vstate->frame[vstate->curframe];
3746
- struct bpf_reg_state *regs = state->regs, *reg;
3747
- u16 new_range;
3748
- int i, j;
7198
+ struct bpf_func_state *state;
7199
+ struct bpf_reg_state *reg;
7200
+ int new_range;
37497201
37507202 if (dst_reg->off < 0 ||
37517203 (dst_reg->off == 0 && range_right_open))
....@@ -3810,33 +7262,90 @@
38107262 * the range won't allow anything.
38117263 * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
38127264 */
3813
- for (i = 0; i < MAX_BPF_REG; i++)
3814
- if (regs[i].type == type && regs[i].id == dst_reg->id)
7265
+ bpf_for_each_reg_in_vstate(vstate, state, reg, ({
7266
+ if (reg->type == type && reg->id == dst_reg->id)
38157267 /* keep the maximum range already checked */
3816
- regs[i].range = max(regs[i].range, new_range);
3817
-
3818
- for (j = 0; j <= vstate->curframe; j++) {
3819
- state = vstate->frame[j];
3820
- for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
3821
- if (state->stack[i].slot_type[0] != STACK_SPILL)
3822
- continue;
3823
- reg = &state->stack[i].spilled_ptr;
3824
- if (reg->type == type && reg->id == dst_reg->id)
3825
- reg->range = max(reg->range, new_range);
3826
- }
3827
- }
7268
+ reg->range = max(reg->range, new_range);
7269
+ }));
38287270 }
38297271
3830
-/* compute branch direction of the expression "if (reg opcode val) goto target;"
3831
- * and return:
3832
- * 1 - branch will be taken and "goto target" will be executed
3833
- * 0 - branch will not be taken and fall-through to next insn
3834
- * -1 - unknown. Example: "if (reg < 5)" is unknown when register value range [0,10]
3835
- */
3836
-static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
7272
+static int is_branch32_taken(struct bpf_reg_state *reg, u32 val, u8 opcode)
38377273 {
3838
- if (__is_pointer_value(false, reg))
3839
- return -1;
7274
+ struct tnum subreg = tnum_subreg(reg->var_off);
7275
+ s32 sval = (s32)val;
7276
+
7277
+ switch (opcode) {
7278
+ case BPF_JEQ:
7279
+ if (tnum_is_const(subreg))
7280
+ return !!tnum_equals_const(subreg, val);
7281
+ break;
7282
+ case BPF_JNE:
7283
+ if (tnum_is_const(subreg))
7284
+ return !tnum_equals_const(subreg, val);
7285
+ break;
7286
+ case BPF_JSET:
7287
+ if ((~subreg.mask & subreg.value) & val)
7288
+ return 1;
7289
+ if (!((subreg.mask | subreg.value) & val))
7290
+ return 0;
7291
+ break;
7292
+ case BPF_JGT:
7293
+ if (reg->u32_min_value > val)
7294
+ return 1;
7295
+ else if (reg->u32_max_value <= val)
7296
+ return 0;
7297
+ break;
7298
+ case BPF_JSGT:
7299
+ if (reg->s32_min_value > sval)
7300
+ return 1;
7301
+ else if (reg->s32_max_value <= sval)
7302
+ return 0;
7303
+ break;
7304
+ case BPF_JLT:
7305
+ if (reg->u32_max_value < val)
7306
+ return 1;
7307
+ else if (reg->u32_min_value >= val)
7308
+ return 0;
7309
+ break;
7310
+ case BPF_JSLT:
7311
+ if (reg->s32_max_value < sval)
7312
+ return 1;
7313
+ else if (reg->s32_min_value >= sval)
7314
+ return 0;
7315
+ break;
7316
+ case BPF_JGE:
7317
+ if (reg->u32_min_value >= val)
7318
+ return 1;
7319
+ else if (reg->u32_max_value < val)
7320
+ return 0;
7321
+ break;
7322
+ case BPF_JSGE:
7323
+ if (reg->s32_min_value >= sval)
7324
+ return 1;
7325
+ else if (reg->s32_max_value < sval)
7326
+ return 0;
7327
+ break;
7328
+ case BPF_JLE:
7329
+ if (reg->u32_max_value <= val)
7330
+ return 1;
7331
+ else if (reg->u32_min_value > val)
7332
+ return 0;
7333
+ break;
7334
+ case BPF_JSLE:
7335
+ if (reg->s32_max_value <= sval)
7336
+ return 1;
7337
+ else if (reg->s32_min_value > sval)
7338
+ return 0;
7339
+ break;
7340
+ }
7341
+
7342
+ return -1;
7343
+}
7344
+
7345
+
7346
+static int is_branch64_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
7347
+{
7348
+ s64 sval = (s64)val;
38407349
38417350 switch (opcode) {
38427351 case BPF_JEQ:
....@@ -3847,6 +7356,12 @@
38477356 if (tnum_is_const(reg->var_off))
38487357 return !tnum_equals_const(reg->var_off, val);
38497358 break;
7359
+ case BPF_JSET:
7360
+ if ((~reg->var_off.mask & reg->var_off.value) & val)
7361
+ return 1;
7362
+ if (!((reg->var_off.mask | reg->var_off.value) & val))
7363
+ return 0;
7364
+ break;
38507365 case BPF_JGT:
38517366 if (reg->umin_value > val)
38527367 return 1;
....@@ -3854,9 +7369,9 @@
38547369 return 0;
38557370 break;
38567371 case BPF_JSGT:
3857
- if (reg->smin_value > (s64)val)
7372
+ if (reg->smin_value > sval)
38587373 return 1;
3859
- else if (reg->smax_value < (s64)val)
7374
+ else if (reg->smax_value <= sval)
38607375 return 0;
38617376 break;
38627377 case BPF_JLT:
....@@ -3866,9 +7381,9 @@
38667381 return 0;
38677382 break;
38687383 case BPF_JSLT:
3869
- if (reg->smax_value < (s64)val)
7384
+ if (reg->smax_value < sval)
38707385 return 1;
3871
- else if (reg->smin_value >= (s64)val)
7386
+ else if (reg->smin_value >= sval)
38727387 return 0;
38737388 break;
38747389 case BPF_JGE:
....@@ -3878,9 +7393,9 @@
38787393 return 0;
38797394 break;
38807395 case BPF_JSGE:
3881
- if (reg->smin_value >= (s64)val)
7396
+ if (reg->smin_value >= sval)
38827397 return 1;
3883
- else if (reg->smax_value < (s64)val)
7398
+ else if (reg->smax_value < sval)
38847399 return 0;
38857400 break;
38867401 case BPF_JLE:
....@@ -3890,13 +7405,109 @@
38907405 return 0;
38917406 break;
38927407 case BPF_JSLE:
3893
- if (reg->smax_value <= (s64)val)
7408
+ if (reg->smax_value <= sval)
38947409 return 1;
3895
- else if (reg->smin_value > (s64)val)
7410
+ else if (reg->smin_value > sval)
38967411 return 0;
38977412 break;
38987413 }
38997414
7415
+ return -1;
7416
+}
7417
+
7418
+/* compute branch direction of the expression "if (reg opcode val) goto target;"
7419
+ * and return:
7420
+ * 1 - branch will be taken and "goto target" will be executed
7421
+ * 0 - branch will not be taken and fall-through to next insn
7422
+ * -1 - unknown. Example: "if (reg < 5)" is unknown when register value
7423
+ * range [0,10]
7424
+ */
7425
+static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode,
7426
+ bool is_jmp32)
7427
+{
7428
+ if (__is_pointer_value(false, reg)) {
7429
+ if (!reg_type_not_null(reg->type))
7430
+ return -1;
7431
+
7432
+ /* If pointer is valid tests against zero will fail so we can
7433
+ * use this to direct branch taken.
7434
+ */
7435
+ if (val != 0)
7436
+ return -1;
7437
+
7438
+ switch (opcode) {
7439
+ case BPF_JEQ:
7440
+ return 0;
7441
+ case BPF_JNE:
7442
+ return 1;
7443
+ default:
7444
+ return -1;
7445
+ }
7446
+ }
7447
+
7448
+ if (is_jmp32)
7449
+ return is_branch32_taken(reg, val, opcode);
7450
+ return is_branch64_taken(reg, val, opcode);
7451
+}
7452
+
7453
+static int flip_opcode(u32 opcode)
7454
+{
7455
+ /* How can we transform "a <op> b" into "b <op> a"? */
7456
+ static const u8 opcode_flip[16] = {
7457
+ /* these stay the same */
7458
+ [BPF_JEQ >> 4] = BPF_JEQ,
7459
+ [BPF_JNE >> 4] = BPF_JNE,
7460
+ [BPF_JSET >> 4] = BPF_JSET,
7461
+ /* these swap "lesser" and "greater" (L and G in the opcodes) */
7462
+ [BPF_JGE >> 4] = BPF_JLE,
7463
+ [BPF_JGT >> 4] = BPF_JLT,
7464
+ [BPF_JLE >> 4] = BPF_JGE,
7465
+ [BPF_JLT >> 4] = BPF_JGT,
7466
+ [BPF_JSGE >> 4] = BPF_JSLE,
7467
+ [BPF_JSGT >> 4] = BPF_JSLT,
7468
+ [BPF_JSLE >> 4] = BPF_JSGE,
7469
+ [BPF_JSLT >> 4] = BPF_JSGT
7470
+ };
7471
+ return opcode_flip[opcode >> 4];
7472
+}
7473
+
7474
+static int is_pkt_ptr_branch_taken(struct bpf_reg_state *dst_reg,
7475
+ struct bpf_reg_state *src_reg,
7476
+ u8 opcode)
7477
+{
7478
+ struct bpf_reg_state *pkt;
7479
+
7480
+ if (src_reg->type == PTR_TO_PACKET_END) {
7481
+ pkt = dst_reg;
7482
+ } else if (dst_reg->type == PTR_TO_PACKET_END) {
7483
+ pkt = src_reg;
7484
+ opcode = flip_opcode(opcode);
7485
+ } else {
7486
+ return -1;
7487
+ }
7488
+
7489
+ if (pkt->range >= 0)
7490
+ return -1;
7491
+
7492
+ switch (opcode) {
7493
+ case BPF_JLE:
7494
+ /* pkt <= pkt_end */
7495
+ fallthrough;
7496
+ case BPF_JGT:
7497
+ /* pkt > pkt_end */
7498
+ if (pkt->range == BEYOND_PKT_END)
7499
+ /* pkt has at last one extra byte beyond pkt_end */
7500
+ return opcode == BPF_JGT;
7501
+ break;
7502
+ case BPF_JLT:
7503
+ /* pkt < pkt_end */
7504
+ fallthrough;
7505
+ case BPF_JGE:
7506
+ /* pkt >= pkt_end */
7507
+ if (pkt->range == BEYOND_PKT_END || pkt->range == AT_PKT_END)
7508
+ return opcode == BPF_JGE;
7509
+ break;
7510
+ }
39007511 return -1;
39017512 }
39027513
....@@ -3906,9 +7517,17 @@
39067517 * In JEQ/JNE cases we also adjust the var_off values.
39077518 */
39087519 static void reg_set_min_max(struct bpf_reg_state *true_reg,
3909
- struct bpf_reg_state *false_reg, u64 val,
3910
- u8 opcode)
7520
+ struct bpf_reg_state *false_reg,
7521
+ u64 val, u32 val32,
7522
+ u8 opcode, bool is_jmp32)
39117523 {
7524
+ struct tnum false_32off = tnum_subreg(false_reg->var_off);
7525
+ struct tnum false_64off = false_reg->var_off;
7526
+ struct tnum true_32off = tnum_subreg(true_reg->var_off);
7527
+ struct tnum true_64off = true_reg->var_off;
7528
+ s64 sval = (s64)val;
7529
+ s32 sval32 = (s32)val32;
7530
+
39127531 /* If the dst_reg is a pointer, we can't learn anything about its
39137532 * variable offset from the compare (unless src_reg were a pointer into
39147533 * the same object, but we don't bother with that.
....@@ -3919,137 +7538,155 @@
39197538 return;
39207539
39217540 switch (opcode) {
7541
+ /* JEQ/JNE comparison doesn't change the register equivalence.
7542
+ *
7543
+ * r1 = r2;
7544
+ * if (r1 == 42) goto label;
7545
+ * ...
7546
+ * label: // here both r1 and r2 are known to be 42.
7547
+ *
7548
+ * Hence when marking register as known preserve it's ID.
7549
+ */
39227550 case BPF_JEQ:
3923
- /* If this is false then we know nothing Jon Snow, but if it is
3924
- * true then we know for sure.
3925
- */
3926
- __mark_reg_known(true_reg, val);
7551
+ if (is_jmp32) {
7552
+ __mark_reg32_known(true_reg, val32);
7553
+ true_32off = tnum_subreg(true_reg->var_off);
7554
+ } else {
7555
+ ___mark_reg_known(true_reg, val);
7556
+ true_64off = true_reg->var_off;
7557
+ }
39277558 break;
39287559 case BPF_JNE:
3929
- /* If this is true we know nothing Jon Snow, but if it is false
3930
- * we know the value for sure;
3931
- */
3932
- __mark_reg_known(false_reg, val);
7560
+ if (is_jmp32) {
7561
+ __mark_reg32_known(false_reg, val32);
7562
+ false_32off = tnum_subreg(false_reg->var_off);
7563
+ } else {
7564
+ ___mark_reg_known(false_reg, val);
7565
+ false_64off = false_reg->var_off;
7566
+ }
39337567 break;
3934
- case BPF_JGT:
3935
- false_reg->umax_value = min(false_reg->umax_value, val);
3936
- true_reg->umin_value = max(true_reg->umin_value, val + 1);
3937
- break;
3938
- case BPF_JSGT:
3939
- false_reg->smax_value = min_t(s64, false_reg->smax_value, val);
3940
- true_reg->smin_value = max_t(s64, true_reg->smin_value, val + 1);
3941
- break;
3942
- case BPF_JLT:
3943
- false_reg->umin_value = max(false_reg->umin_value, val);
3944
- true_reg->umax_value = min(true_reg->umax_value, val - 1);
3945
- break;
3946
- case BPF_JSLT:
3947
- false_reg->smin_value = max_t(s64, false_reg->smin_value, val);
3948
- true_reg->smax_value = min_t(s64, true_reg->smax_value, val - 1);
7568
+ case BPF_JSET:
7569
+ if (is_jmp32) {
7570
+ false_32off = tnum_and(false_32off, tnum_const(~val32));
7571
+ if (is_power_of_2(val32))
7572
+ true_32off = tnum_or(true_32off,
7573
+ tnum_const(val32));
7574
+ } else {
7575
+ false_64off = tnum_and(false_64off, tnum_const(~val));
7576
+ if (is_power_of_2(val))
7577
+ true_64off = tnum_or(true_64off,
7578
+ tnum_const(val));
7579
+ }
39497580 break;
39507581 case BPF_JGE:
3951
- false_reg->umax_value = min(false_reg->umax_value, val - 1);
3952
- true_reg->umin_value = max(true_reg->umin_value, val);
3953
- break;
3954
- case BPF_JSGE:
3955
- false_reg->smax_value = min_t(s64, false_reg->smax_value, val - 1);
3956
- true_reg->smin_value = max_t(s64, true_reg->smin_value, val);
3957
- break;
3958
- case BPF_JLE:
3959
- false_reg->umin_value = max(false_reg->umin_value, val + 1);
3960
- true_reg->umax_value = min(true_reg->umax_value, val);
3961
- break;
3962
- case BPF_JSLE:
3963
- false_reg->smin_value = max_t(s64, false_reg->smin_value, val + 1);
3964
- true_reg->smax_value = min_t(s64, true_reg->smax_value, val);
3965
- break;
3966
- default:
7582
+ case BPF_JGT:
7583
+ {
7584
+ if (is_jmp32) {
7585
+ u32 false_umax = opcode == BPF_JGT ? val32 : val32 - 1;
7586
+ u32 true_umin = opcode == BPF_JGT ? val32 + 1 : val32;
7587
+
7588
+ false_reg->u32_max_value = min(false_reg->u32_max_value,
7589
+ false_umax);
7590
+ true_reg->u32_min_value = max(true_reg->u32_min_value,
7591
+ true_umin);
7592
+ } else {
7593
+ u64 false_umax = opcode == BPF_JGT ? val : val - 1;
7594
+ u64 true_umin = opcode == BPF_JGT ? val + 1 : val;
7595
+
7596
+ false_reg->umax_value = min(false_reg->umax_value, false_umax);
7597
+ true_reg->umin_value = max(true_reg->umin_value, true_umin);
7598
+ }
39677599 break;
39687600 }
7601
+ case BPF_JSGE:
7602
+ case BPF_JSGT:
7603
+ {
7604
+ if (is_jmp32) {
7605
+ s32 false_smax = opcode == BPF_JSGT ? sval32 : sval32 - 1;
7606
+ s32 true_smin = opcode == BPF_JSGT ? sval32 + 1 : sval32;
39697607
3970
- __reg_deduce_bounds(false_reg);
3971
- __reg_deduce_bounds(true_reg);
3972
- /* We might have learned some bits from the bounds. */
3973
- __reg_bound_offset(false_reg);
3974
- __reg_bound_offset(true_reg);
3975
- /* Intersecting with the old var_off might have improved our bounds
3976
- * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
3977
- * then new var_off is (0; 0x7f...fc) which improves our umax.
3978
- */
3979
- __update_reg_bounds(false_reg);
3980
- __update_reg_bounds(true_reg);
7608
+ false_reg->s32_max_value = min(false_reg->s32_max_value, false_smax);
7609
+ true_reg->s32_min_value = max(true_reg->s32_min_value, true_smin);
7610
+ } else {
7611
+ s64 false_smax = opcode == BPF_JSGT ? sval : sval - 1;
7612
+ s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval;
7613
+
7614
+ false_reg->smax_value = min(false_reg->smax_value, false_smax);
7615
+ true_reg->smin_value = max(true_reg->smin_value, true_smin);
7616
+ }
7617
+ break;
7618
+ }
7619
+ case BPF_JLE:
7620
+ case BPF_JLT:
7621
+ {
7622
+ if (is_jmp32) {
7623
+ u32 false_umin = opcode == BPF_JLT ? val32 : val32 + 1;
7624
+ u32 true_umax = opcode == BPF_JLT ? val32 - 1 : val32;
7625
+
7626
+ false_reg->u32_min_value = max(false_reg->u32_min_value,
7627
+ false_umin);
7628
+ true_reg->u32_max_value = min(true_reg->u32_max_value,
7629
+ true_umax);
7630
+ } else {
7631
+ u64 false_umin = opcode == BPF_JLT ? val : val + 1;
7632
+ u64 true_umax = opcode == BPF_JLT ? val - 1 : val;
7633
+
7634
+ false_reg->umin_value = max(false_reg->umin_value, false_umin);
7635
+ true_reg->umax_value = min(true_reg->umax_value, true_umax);
7636
+ }
7637
+ break;
7638
+ }
7639
+ case BPF_JSLE:
7640
+ case BPF_JSLT:
7641
+ {
7642
+ if (is_jmp32) {
7643
+ s32 false_smin = opcode == BPF_JSLT ? sval32 : sval32 + 1;
7644
+ s32 true_smax = opcode == BPF_JSLT ? sval32 - 1 : sval32;
7645
+
7646
+ false_reg->s32_min_value = max(false_reg->s32_min_value, false_smin);
7647
+ true_reg->s32_max_value = min(true_reg->s32_max_value, true_smax);
7648
+ } else {
7649
+ s64 false_smin = opcode == BPF_JSLT ? sval : sval + 1;
7650
+ s64 true_smax = opcode == BPF_JSLT ? sval - 1 : sval;
7651
+
7652
+ false_reg->smin_value = max(false_reg->smin_value, false_smin);
7653
+ true_reg->smax_value = min(true_reg->smax_value, true_smax);
7654
+ }
7655
+ break;
7656
+ }
7657
+ default:
7658
+ return;
7659
+ }
7660
+
7661
+ if (is_jmp32) {
7662
+ false_reg->var_off = tnum_or(tnum_clear_subreg(false_64off),
7663
+ tnum_subreg(false_32off));
7664
+ true_reg->var_off = tnum_or(tnum_clear_subreg(true_64off),
7665
+ tnum_subreg(true_32off));
7666
+ __reg_combine_32_into_64(false_reg);
7667
+ __reg_combine_32_into_64(true_reg);
7668
+ } else {
7669
+ false_reg->var_off = false_64off;
7670
+ true_reg->var_off = true_64off;
7671
+ __reg_combine_64_into_32(false_reg);
7672
+ __reg_combine_64_into_32(true_reg);
7673
+ }
39817674 }
39827675
39837676 /* Same as above, but for the case that dst_reg holds a constant and src_reg is
39847677 * the variable reg.
39857678 */
39867679 static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
3987
- struct bpf_reg_state *false_reg, u64 val,
3988
- u8 opcode)
7680
+ struct bpf_reg_state *false_reg,
7681
+ u64 val, u32 val32,
7682
+ u8 opcode, bool is_jmp32)
39897683 {
3990
- if (__is_pointer_value(false, false_reg))
3991
- return;
3992
-
3993
- switch (opcode) {
3994
- case BPF_JEQ:
3995
- /* If this is false then we know nothing Jon Snow, but if it is
3996
- * true then we know for sure.
3997
- */
3998
- __mark_reg_known(true_reg, val);
3999
- break;
4000
- case BPF_JNE:
4001
- /* If this is true we know nothing Jon Snow, but if it is false
4002
- * we know the value for sure;
4003
- */
4004
- __mark_reg_known(false_reg, val);
4005
- break;
4006
- case BPF_JGT:
4007
- true_reg->umax_value = min(true_reg->umax_value, val - 1);
4008
- false_reg->umin_value = max(false_reg->umin_value, val);
4009
- break;
4010
- case BPF_JSGT:
4011
- true_reg->smax_value = min_t(s64, true_reg->smax_value, val - 1);
4012
- false_reg->smin_value = max_t(s64, false_reg->smin_value, val);
4013
- break;
4014
- case BPF_JLT:
4015
- true_reg->umin_value = max(true_reg->umin_value, val + 1);
4016
- false_reg->umax_value = min(false_reg->umax_value, val);
4017
- break;
4018
- case BPF_JSLT:
4019
- true_reg->smin_value = max_t(s64, true_reg->smin_value, val + 1);
4020
- false_reg->smax_value = min_t(s64, false_reg->smax_value, val);
4021
- break;
4022
- case BPF_JGE:
4023
- true_reg->umax_value = min(true_reg->umax_value, val);
4024
- false_reg->umin_value = max(false_reg->umin_value, val + 1);
4025
- break;
4026
- case BPF_JSGE:
4027
- true_reg->smax_value = min_t(s64, true_reg->smax_value, val);
4028
- false_reg->smin_value = max_t(s64, false_reg->smin_value, val + 1);
4029
- break;
4030
- case BPF_JLE:
4031
- true_reg->umin_value = max(true_reg->umin_value, val);
4032
- false_reg->umax_value = min(false_reg->umax_value, val - 1);
4033
- break;
4034
- case BPF_JSLE:
4035
- true_reg->smin_value = max_t(s64, true_reg->smin_value, val);
4036
- false_reg->smax_value = min_t(s64, false_reg->smax_value, val - 1);
4037
- break;
4038
- default:
4039
- break;
4040
- }
4041
-
4042
- __reg_deduce_bounds(false_reg);
4043
- __reg_deduce_bounds(true_reg);
4044
- /* We might have learned some bits from the bounds. */
4045
- __reg_bound_offset(false_reg);
4046
- __reg_bound_offset(true_reg);
4047
- /* Intersecting with the old var_off might have improved our bounds
4048
- * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
4049
- * then new var_off is (0; 0x7f...fc) which improves our umax.
7684
+ opcode = flip_opcode(opcode);
7685
+ /* This uses zero as "not present in table"; luckily the zero opcode,
7686
+ * BPF_JA, can't get here.
40507687 */
4051
- __update_reg_bounds(false_reg);
4052
- __update_reg_bounds(true_reg);
7688
+ if (opcode)
7689
+ reg_set_min_max(true_reg, false_reg, val, val32, opcode, is_jmp32);
40537690 }
40547691
40557692 /* Regs are known to be equal, so intersect their min/max/var_off */
....@@ -4066,21 +7703,8 @@
40667703 dst_reg->smax_value);
40677704 src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off,
40687705 dst_reg->var_off);
4069
- /* We might have learned new bounds from the var_off. */
4070
- __update_reg_bounds(src_reg);
4071
- __update_reg_bounds(dst_reg);
4072
- /* We might have learned something about the sign bit. */
4073
- __reg_deduce_bounds(src_reg);
4074
- __reg_deduce_bounds(dst_reg);
4075
- /* We might have learned some bits from the bounds. */
4076
- __reg_bound_offset(src_reg);
4077
- __reg_bound_offset(dst_reg);
4078
- /* Intersecting with the old var_off might have improved our bounds
4079
- * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
4080
- * then new var_off is (0; 0x7f...fc) which improves our umax.
4081
- */
4082
- __update_reg_bounds(src_reg);
4083
- __update_reg_bounds(dst_reg);
7706
+ reg_bounds_sync(src_reg);
7707
+ reg_bounds_sync(dst_reg);
40847708 }
40857709
40867710 static void reg_combine_min_max(struct bpf_reg_state *true_src,
....@@ -4099,60 +7723,93 @@
40997723 }
41007724 }
41017725
4102
-static void mark_map_reg(struct bpf_reg_state *regs, u32 regno, u32 id,
4103
- bool is_null)
7726
+static void mark_ptr_or_null_reg(struct bpf_func_state *state,
7727
+ struct bpf_reg_state *reg, u32 id,
7728
+ bool is_null)
41047729 {
4105
- struct bpf_reg_state *reg = &regs[regno];
4106
-
4107
- if (reg->type == PTR_TO_MAP_VALUE_OR_NULL && reg->id == id) {
4108
- /* Old offset (both fixed and variable parts) should
4109
- * have been known-zero, because we don't allow pointer
4110
- * arithmetic on pointers that might be NULL.
4111
- */
7730
+ if (reg_type_may_be_null(reg->type) && reg->id == id &&
7731
+ !WARN_ON_ONCE(!reg->id)) {
41127732 if (WARN_ON_ONCE(reg->smin_value || reg->smax_value ||
41137733 !tnum_equals_const(reg->var_off, 0) ||
41147734 reg->off)) {
4115
- __mark_reg_known_zero(reg);
4116
- reg->off = 0;
7735
+ /* Old offset (both fixed and variable parts) should
7736
+ * have been known-zero, because we don't allow pointer
7737
+ * arithmetic on pointers that might be NULL. If we
7738
+ * see this happening, don't convert the register.
7739
+ */
7740
+ return;
41177741 }
41187742 if (is_null) {
41197743 reg->type = SCALAR_VALUE;
4120
- } else if (reg->map_ptr->inner_map_meta) {
4121
- reg->type = CONST_PTR_TO_MAP;
4122
- reg->map_ptr = reg->map_ptr->inner_map_meta;
4123
- } else {
4124
- reg->type = PTR_TO_MAP_VALUE;
7744
+ } else if (reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
7745
+ const struct bpf_map *map = reg->map_ptr;
7746
+
7747
+ if (map->inner_map_meta) {
7748
+ reg->type = CONST_PTR_TO_MAP;
7749
+ reg->map_ptr = map->inner_map_meta;
7750
+ } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
7751
+ reg->type = PTR_TO_XDP_SOCK;
7752
+ } else if (map->map_type == BPF_MAP_TYPE_SOCKMAP ||
7753
+ map->map_type == BPF_MAP_TYPE_SOCKHASH) {
7754
+ reg->type = PTR_TO_SOCKET;
7755
+ } else {
7756
+ reg->type = PTR_TO_MAP_VALUE;
7757
+ }
7758
+ } else if (reg->type == PTR_TO_SOCKET_OR_NULL) {
7759
+ reg->type = PTR_TO_SOCKET;
7760
+ } else if (reg->type == PTR_TO_SOCK_COMMON_OR_NULL) {
7761
+ reg->type = PTR_TO_SOCK_COMMON;
7762
+ } else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) {
7763
+ reg->type = PTR_TO_TCP_SOCK;
7764
+ } else if (reg->type == PTR_TO_BTF_ID_OR_NULL) {
7765
+ reg->type = PTR_TO_BTF_ID;
7766
+ } else if (reg->type == PTR_TO_MEM_OR_NULL) {
7767
+ reg->type = PTR_TO_MEM;
7768
+ } else if (reg->type == PTR_TO_RDONLY_BUF_OR_NULL) {
7769
+ reg->type = PTR_TO_RDONLY_BUF;
7770
+ } else if (reg->type == PTR_TO_RDWR_BUF_OR_NULL) {
7771
+ reg->type = PTR_TO_RDWR_BUF;
41257772 }
4126
- /* We don't need id from this point onwards anymore, thus we
4127
- * should better reset it, so that state pruning has chances
4128
- * to take effect.
4129
- */
4130
- reg->id = 0;
7773
+ if (is_null) {
7774
+ /* We don't need id and ref_obj_id from this point
7775
+ * onwards anymore, thus we should better reset it,
7776
+ * so that state pruning has chances to take effect.
7777
+ */
7778
+ reg->id = 0;
7779
+ reg->ref_obj_id = 0;
7780
+ } else if (!reg_may_point_to_spin_lock(reg)) {
7781
+ /* For not-NULL ptr, reg->ref_obj_id will be reset
7782
+ * in release_reference().
7783
+ *
7784
+ * reg->id is still used by spin_lock ptr. Other
7785
+ * than spin_lock ptr type, reg->id can be reset.
7786
+ */
7787
+ reg->id = 0;
7788
+ }
41317789 }
41327790 }
41337791
41347792 /* The logic is similar to find_good_pkt_pointers(), both could eventually
41357793 * be folded together at some point.
41367794 */
4137
-static void mark_map_regs(struct bpf_verifier_state *vstate, u32 regno,
4138
- bool is_null)
7795
+static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
7796
+ bool is_null)
41397797 {
41407798 struct bpf_func_state *state = vstate->frame[vstate->curframe];
4141
- struct bpf_reg_state *regs = state->regs;
7799
+ struct bpf_reg_state *regs = state->regs, *reg;
7800
+ u32 ref_obj_id = regs[regno].ref_obj_id;
41427801 u32 id = regs[regno].id;
4143
- int i, j;
41447802
4145
- for (i = 0; i < MAX_BPF_REG; i++)
4146
- mark_map_reg(regs, i, id, is_null);
7803
+ if (ref_obj_id && ref_obj_id == id && is_null)
7804
+ /* regs[regno] is in the " == NULL" branch.
7805
+ * No one could have freed the reference state before
7806
+ * doing the NULL check.
7807
+ */
7808
+ WARN_ON_ONCE(release_reference_state(state, id));
41477809
4148
- for (j = 0; j <= vstate->curframe; j++) {
4149
- state = vstate->frame[j];
4150
- for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
4151
- if (state->stack[i].slot_type[0] != STACK_SPILL)
4152
- continue;
4153
- mark_map_reg(&state->stack[i].spilled_ptr, 0, id, is_null);
4154
- }
4155
- }
7810
+ bpf_for_each_reg_in_vstate(vstate, state, reg, ({
7811
+ mark_ptr_or_null_reg(state, reg, id, is_null);
7812
+ }));
41567813 }
41577814
41587815 static bool try_match_pkt_pointers(const struct bpf_insn *insn,
....@@ -4164,6 +7821,10 @@
41647821 if (BPF_SRC(insn->code) != BPF_X)
41657822 return false;
41667823
7824
+ /* Pointers are always 64-bit. */
7825
+ if (BPF_CLASS(insn->code) == BPF_JMP32)
7826
+ return false;
7827
+
41677828 switch (BPF_OP(insn->code)) {
41687829 case BPF_JGT:
41697830 if ((dst_reg->type == PTR_TO_PACKET &&
....@@ -4173,6 +7834,7 @@
41737834 /* pkt_data' > pkt_end, pkt_meta' > pkt_data */
41747835 find_good_pkt_pointers(this_branch, dst_reg,
41757836 dst_reg->type, false);
7837
+ mark_pkt_end(other_branch, insn->dst_reg, true);
41767838 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
41777839 src_reg->type == PTR_TO_PACKET) ||
41787840 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
....@@ -4180,6 +7842,7 @@
41807842 /* pkt_end > pkt_data', pkt_data > pkt_meta' */
41817843 find_good_pkt_pointers(other_branch, src_reg,
41827844 src_reg->type, true);
7845
+ mark_pkt_end(this_branch, insn->src_reg, false);
41837846 } else {
41847847 return false;
41857848 }
....@@ -4192,6 +7855,7 @@
41927855 /* pkt_data' < pkt_end, pkt_meta' < pkt_data */
41937856 find_good_pkt_pointers(other_branch, dst_reg,
41947857 dst_reg->type, true);
7858
+ mark_pkt_end(this_branch, insn->dst_reg, false);
41957859 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
41967860 src_reg->type == PTR_TO_PACKET) ||
41977861 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
....@@ -4199,6 +7863,7 @@
41997863 /* pkt_end < pkt_data', pkt_data > pkt_meta' */
42007864 find_good_pkt_pointers(this_branch, src_reg,
42017865 src_reg->type, false);
7866
+ mark_pkt_end(other_branch, insn->src_reg, true);
42027867 } else {
42037868 return false;
42047869 }
....@@ -4211,6 +7876,7 @@
42117876 /* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
42127877 find_good_pkt_pointers(this_branch, dst_reg,
42137878 dst_reg->type, true);
7879
+ mark_pkt_end(other_branch, insn->dst_reg, false);
42147880 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
42157881 src_reg->type == PTR_TO_PACKET) ||
42167882 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
....@@ -4218,6 +7884,7 @@
42187884 /* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
42197885 find_good_pkt_pointers(other_branch, src_reg,
42207886 src_reg->type, false);
7887
+ mark_pkt_end(this_branch, insn->src_reg, true);
42217888 } else {
42227889 return false;
42237890 }
....@@ -4230,6 +7897,7 @@
42307897 /* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
42317898 find_good_pkt_pointers(other_branch, dst_reg,
42327899 dst_reg->type, false);
7900
+ mark_pkt_end(this_branch, insn->dst_reg, true);
42337901 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
42347902 src_reg->type == PTR_TO_PACKET) ||
42357903 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
....@@ -4237,6 +7905,7 @@
42377905 /* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
42387906 find_good_pkt_pointers(this_branch, src_reg,
42397907 src_reg->type, true);
7908
+ mark_pkt_end(other_branch, insn->src_reg, false);
42407909 } else {
42417910 return false;
42427911 }
....@@ -4248,6 +7917,18 @@
42487917 return true;
42497918 }
42507919
7920
+static void find_equal_scalars(struct bpf_verifier_state *vstate,
7921
+ struct bpf_reg_state *known_reg)
7922
+{
7923
+ struct bpf_func_state *state;
7924
+ struct bpf_reg_state *reg;
7925
+
7926
+ bpf_for_each_reg_in_vstate(vstate, state, reg, ({
7927
+ if (reg->type == SCALAR_VALUE && reg->id == known_reg->id)
7928
+ *reg = *known_reg;
7929
+ }));
7930
+}
7931
+
42517932 static int check_cond_jmp_op(struct bpf_verifier_env *env,
42527933 struct bpf_insn *insn, int *insn_idx)
42537934 {
....@@ -4256,17 +7937,19 @@
42567937 struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
42577938 struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL;
42587939 u8 opcode = BPF_OP(insn->code);
7940
+ bool is_jmp32;
42597941 int pred = -1;
42607942 int err;
42617943
4262
- if (opcode > BPF_JSLE) {
4263
- verbose(env, "invalid BPF_JMP opcode %x\n", opcode);
7944
+ /* Only conditional jumps are expected to reach here. */
7945
+ if (opcode == BPF_JA || opcode > BPF_JSLE) {
7946
+ verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode);
42647947 return -EINVAL;
42657948 }
42667949
42677950 if (BPF_SRC(insn->code) == BPF_X) {
42687951 if (insn->imm != 0) {
4269
- verbose(env, "BPF_JMP uses reserved fields\n");
7952
+ verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
42707953 return -EINVAL;
42717954 }
42727955
....@@ -4283,7 +7966,7 @@
42837966 src_reg = &regs[insn->src_reg];
42847967 } else {
42857968 if (insn->src_reg != BPF_REG_0) {
4286
- verbose(env, "BPF_JMP uses reserved fields\n");
7969
+ verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
42877970 return -EINVAL;
42887971 }
42897972 }
....@@ -4294,20 +7977,47 @@
42947977 return err;
42957978
42967979 dst_reg = &regs[insn->dst_reg];
7980
+ is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
42977981
4298
- if (BPF_SRC(insn->code) == BPF_K)
4299
- pred = is_branch_taken(dst_reg, insn->imm, opcode);
4300
- else if (src_reg->type == SCALAR_VALUE &&
4301
- tnum_is_const(src_reg->var_off))
4302
- pred = is_branch_taken(dst_reg, src_reg->var_off.value,
4303
- opcode);
7982
+ if (BPF_SRC(insn->code) == BPF_K) {
7983
+ pred = is_branch_taken(dst_reg, insn->imm, opcode, is_jmp32);
7984
+ } else if (src_reg->type == SCALAR_VALUE &&
7985
+ is_jmp32 && tnum_is_const(tnum_subreg(src_reg->var_off))) {
7986
+ pred = is_branch_taken(dst_reg,
7987
+ tnum_subreg(src_reg->var_off).value,
7988
+ opcode,
7989
+ is_jmp32);
7990
+ } else if (src_reg->type == SCALAR_VALUE &&
7991
+ !is_jmp32 && tnum_is_const(src_reg->var_off)) {
7992
+ pred = is_branch_taken(dst_reg,
7993
+ src_reg->var_off.value,
7994
+ opcode,
7995
+ is_jmp32);
7996
+ } else if (reg_is_pkt_pointer_any(dst_reg) &&
7997
+ reg_is_pkt_pointer_any(src_reg) &&
7998
+ !is_jmp32) {
7999
+ pred = is_pkt_ptr_branch_taken(dst_reg, src_reg, opcode);
8000
+ }
8001
+
8002
+ if (pred >= 0) {
8003
+ /* If we get here with a dst_reg pointer type it is because
8004
+ * above is_branch_taken() special cased the 0 comparison.
8005
+ */
8006
+ if (!__is_pointer_value(false, dst_reg))
8007
+ err = mark_chain_precision(env, insn->dst_reg);
8008
+ if (BPF_SRC(insn->code) == BPF_X && !err &&
8009
+ !__is_pointer_value(false, src_reg))
8010
+ err = mark_chain_precision(env, insn->src_reg);
8011
+ if (err)
8012
+ return err;
8013
+ }
43048014
43058015 if (pred == 1) {
43068016 /* Only follow the goto, ignore fall-through. If needed, push
43078017 * the fall-through branch for simulation under speculative
43088018 * execution.
43098019 */
4310
- if (!env->allow_ptr_leaks &&
8020
+ if (!env->bypass_spec_v1 &&
43118021 !sanitize_speculative_path(env, insn, *insn_idx + 1,
43128022 *insn_idx))
43138023 return -EFAULT;
....@@ -4318,7 +8028,7 @@
43188028 * program will go. If needed, push the goto branch for
43198029 * simulation under speculative execution.
43208030 */
4321
- if (!env->allow_ptr_leaks &&
8031
+ if (!env->bypass_spec_v1 &&
43228032 !sanitize_speculative_path(env, insn,
43238033 *insn_idx + insn->off + 1,
43248034 *insn_idx))
....@@ -4340,37 +8050,65 @@
43408050 * comparable.
43418051 */
43428052 if (BPF_SRC(insn->code) == BPF_X) {
8053
+ struct bpf_reg_state *src_reg = &regs[insn->src_reg];
8054
+
43438055 if (dst_reg->type == SCALAR_VALUE &&
4344
- regs[insn->src_reg].type == SCALAR_VALUE) {
4345
- if (tnum_is_const(regs[insn->src_reg].var_off))
8056
+ src_reg->type == SCALAR_VALUE) {
8057
+ if (tnum_is_const(src_reg->var_off) ||
8058
+ (is_jmp32 &&
8059
+ tnum_is_const(tnum_subreg(src_reg->var_off))))
43468060 reg_set_min_max(&other_branch_regs[insn->dst_reg],
4347
- dst_reg, regs[insn->src_reg].var_off.value,
4348
- opcode);
4349
- else if (tnum_is_const(dst_reg->var_off))
8061
+ dst_reg,
8062
+ src_reg->var_off.value,
8063
+ tnum_subreg(src_reg->var_off).value,
8064
+ opcode, is_jmp32);
8065
+ else if (tnum_is_const(dst_reg->var_off) ||
8066
+ (is_jmp32 &&
8067
+ tnum_is_const(tnum_subreg(dst_reg->var_off))))
43508068 reg_set_min_max_inv(&other_branch_regs[insn->src_reg],
4351
- &regs[insn->src_reg],
4352
- dst_reg->var_off.value, opcode);
4353
- else if (opcode == BPF_JEQ || opcode == BPF_JNE)
8069
+ src_reg,
8070
+ dst_reg->var_off.value,
8071
+ tnum_subreg(dst_reg->var_off).value,
8072
+ opcode, is_jmp32);
8073
+ else if (!is_jmp32 &&
8074
+ (opcode == BPF_JEQ || opcode == BPF_JNE))
43548075 /* Comparing for equality, we can combine knowledge */
43558076 reg_combine_min_max(&other_branch_regs[insn->src_reg],
43568077 &other_branch_regs[insn->dst_reg],
4357
- &regs[insn->src_reg],
4358
- &regs[insn->dst_reg], opcode);
8078
+ src_reg, dst_reg, opcode);
8079
+ if (src_reg->id &&
8080
+ !WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) {
8081
+ find_equal_scalars(this_branch, src_reg);
8082
+ find_equal_scalars(other_branch, &other_branch_regs[insn->src_reg]);
8083
+ }
8084
+
43598085 }
43608086 } else if (dst_reg->type == SCALAR_VALUE) {
43618087 reg_set_min_max(&other_branch_regs[insn->dst_reg],
4362
- dst_reg, insn->imm, opcode);
8088
+ dst_reg, insn->imm, (u32)insn->imm,
8089
+ opcode, is_jmp32);
43638090 }
43648091
4365
- /* detect if R == 0 where R is returned from bpf_map_lookup_elem() */
4366
- if (BPF_SRC(insn->code) == BPF_K &&
8092
+ if (dst_reg->type == SCALAR_VALUE && dst_reg->id &&
8093
+ !WARN_ON_ONCE(dst_reg->id != other_branch_regs[insn->dst_reg].id)) {
8094
+ find_equal_scalars(this_branch, dst_reg);
8095
+ find_equal_scalars(other_branch, &other_branch_regs[insn->dst_reg]);
8096
+ }
8097
+
8098
+ /* detect if R == 0 where R is returned from bpf_map_lookup_elem().
8099
+ * NOTE: these optimizations below are related with pointer comparison
8100
+ * which will never be JMP32.
8101
+ */
8102
+ if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K &&
43678103 insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
4368
- dst_reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
4369
- /* Mark all identical map registers in each branch as either
8104
+ reg_type_may_be_null(dst_reg->type)) {
8105
+ /* Mark all identical registers in each branch as either
43708106 * safe or unknown depending R == 0 or R != 0 conditional.
43718107 */
4372
- mark_map_regs(this_branch, insn->dst_reg, opcode == BPF_JNE);
4373
- mark_map_regs(other_branch, insn->dst_reg, opcode == BPF_JEQ);
8108
+ mark_ptr_or_null_regs(this_branch, insn->dst_reg,
8109
+ opcode == BPF_JNE);
8110
+ mark_ptr_or_null_regs(other_branch, insn->dst_reg,
8111
+ opcode == BPF_JEQ);
43748112 } else if (!try_match_pkt_pointers(insn, dst_reg, &regs[insn->src_reg],
43758113 this_branch, other_branch) &&
43768114 is_pointer_value(env, insn->dst_reg)) {
....@@ -4378,23 +8116,18 @@
43788116 insn->dst_reg);
43798117 return -EACCES;
43808118 }
4381
- if (env->log.level)
8119
+ if (env->log.level & BPF_LOG_LEVEL)
43828120 print_verifier_state(env, this_branch->frame[this_branch->curframe]);
43838121 return 0;
4384
-}
4385
-
4386
-/* return the map pointer stored inside BPF_LD_IMM64 instruction */
4387
-static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn)
4388
-{
4389
- u64 imm64 = ((u64) (u32) insn[0].imm) | ((u64) (u32) insn[1].imm) << 32;
4390
-
4391
- return (struct bpf_map *) (unsigned long) imm64;
43928122 }
43938123
43948124 /* verify BPF_LD_IMM64 instruction */
43958125 static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
43968126 {
8127
+ struct bpf_insn_aux_data *aux = cur_aux(env);
43978128 struct bpf_reg_state *regs = cur_regs(env);
8129
+ struct bpf_reg_state *dst_reg;
8130
+ struct bpf_map *map;
43988131 int err;
43998132
44008133 if (BPF_SIZE(insn->code) != BPF_DW) {
....@@ -4410,19 +8143,50 @@
44108143 if (err)
44118144 return err;
44128145
8146
+ dst_reg = &regs[insn->dst_reg];
44138147 if (insn->src_reg == 0) {
44148148 u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
44158149
4416
- regs[insn->dst_reg].type = SCALAR_VALUE;
8150
+ dst_reg->type = SCALAR_VALUE;
44178151 __mark_reg_known(&regs[insn->dst_reg], imm);
44188152 return 0;
44198153 }
44208154
4421
- /* replace_map_fd_with_map_ptr() should have caught bad ld_imm64 */
4422
- BUG_ON(insn->src_reg != BPF_PSEUDO_MAP_FD);
8155
+ if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
8156
+ mark_reg_known_zero(env, regs, insn->dst_reg);
44238157
4424
- regs[insn->dst_reg].type = CONST_PTR_TO_MAP;
4425
- regs[insn->dst_reg].map_ptr = ld_imm64_to_map_ptr(insn);
8158
+ dst_reg->type = aux->btf_var.reg_type;
8159
+ switch (dst_reg->type) {
8160
+ case PTR_TO_MEM:
8161
+ dst_reg->mem_size = aux->btf_var.mem_size;
8162
+ break;
8163
+ case PTR_TO_BTF_ID:
8164
+ case PTR_TO_PERCPU_BTF_ID:
8165
+ dst_reg->btf_id = aux->btf_var.btf_id;
8166
+ break;
8167
+ default:
8168
+ verbose(env, "bpf verifier is misconfigured\n");
8169
+ return -EFAULT;
8170
+ }
8171
+ return 0;
8172
+ }
8173
+
8174
+ map = env->used_maps[aux->map_index];
8175
+ mark_reg_known_zero(env, regs, insn->dst_reg);
8176
+ dst_reg->map_ptr = map;
8177
+
8178
+ if (insn->src_reg == BPF_PSEUDO_MAP_VALUE) {
8179
+ dst_reg->type = PTR_TO_MAP_VALUE;
8180
+ dst_reg->off = aux->map_off;
8181
+ if (map_value_has_spin_lock(map))
8182
+ dst_reg->id = ++env->id_gen;
8183
+ } else if (insn->src_reg == BPF_PSEUDO_MAP_FD) {
8184
+ dst_reg->type = CONST_PTR_TO_MAP;
8185
+ } else {
8186
+ verbose(env, "bpf verifier is misconfigured\n");
8187
+ return -EINVAL;
8188
+ }
8189
+
44268190 return 0;
44278191 }
44288192
....@@ -4460,25 +8224,13 @@
44608224 u8 mode = BPF_MODE(insn->code);
44618225 int i, err;
44628226
4463
- if (!may_access_skb(env->prog->type)) {
8227
+ if (!may_access_skb(resolve_prog_type(env->prog))) {
44648228 verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
44658229 return -EINVAL;
44668230 }
44678231
44688232 if (!env->ops->gen_ld_abs) {
44698233 verbose(env, "bpf verifier is misconfigured\n");
4470
- return -EINVAL;
4471
- }
4472
-
4473
- if (env->subprog_cnt > 1) {
4474
- /* when program has LD_ABS insn JITs and interpreter assume
4475
- * that r1 == ctx == skb which is not the case for callees
4476
- * that can have arbitrary arguments. It's problematic
4477
- * for main prog as well since JITs would need to analyze
4478
- * all functions in order to make proper register save/restore
4479
- * decisions in the main prog. Hence disallow LD_ABS with calls
4480
- */
4481
- verbose(env, "BPF_LD_[ABS|IND] instructions cannot be mixed with bpf-to-bpf calls\n");
44828234 return -EINVAL;
44838235 }
44848236
....@@ -4493,6 +8245,21 @@
44938245 err = check_reg_arg(env, ctx_reg, SRC_OP);
44948246 if (err)
44958247 return err;
8248
+
8249
+ /* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as
8250
+ * gen_ld_abs() may terminate the program at runtime, leading to
8251
+ * reference leak.
8252
+ */
8253
+ err = check_reference_leak(env);
8254
+ if (err) {
8255
+ verbose(env, "BPF_LD_[ABS|IND] cannot be mixed with socket references\n");
8256
+ return err;
8257
+ }
8258
+
8259
+ if (env->cur_state->active_spin_lock) {
8260
+ verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_spin_lock-ed region\n");
8261
+ return -EINVAL;
8262
+ }
44968263
44978264 if (regs[ctx_reg].type != PTR_TO_CTX) {
44988265 verbose(env,
....@@ -4522,29 +8289,106 @@
45228289 * Already marked as written above.
45238290 */
45248291 mark_reg_unknown(env, regs, BPF_REG_0);
8292
+ /* ld_abs load up to 32-bit skb data. */
8293
+ regs[BPF_REG_0].subreg_def = env->insn_idx + 1;
45258294 return 0;
45268295 }
45278296
45288297 static int check_return_code(struct bpf_verifier_env *env)
45298298 {
8299
+ struct tnum enforce_attach_type_range = tnum_unknown;
8300
+ const struct bpf_prog *prog = env->prog;
45308301 struct bpf_reg_state *reg;
45318302 struct tnum range = tnum_range(0, 1);
8303
+ enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
8304
+ int err;
8305
+ const bool is_subprog = env->cur_state->frame[0]->subprogno;
45328306
4533
- switch (env->prog->type) {
8307
+ /* LSM and struct_ops func-ptr's return type could be "void" */
8308
+ if (!is_subprog &&
8309
+ (prog_type == BPF_PROG_TYPE_STRUCT_OPS ||
8310
+ prog_type == BPF_PROG_TYPE_LSM) &&
8311
+ !prog->aux->attach_func_proto->type)
8312
+ return 0;
8313
+
8314
+ /* eBPF calling convetion is such that R0 is used
8315
+ * to return the value from eBPF program.
8316
+ * Make sure that it's readable at this time
8317
+ * of bpf_exit, which means that program wrote
8318
+ * something into it earlier
8319
+ */
8320
+ err = check_reg_arg(env, BPF_REG_0, SRC_OP);
8321
+ if (err)
8322
+ return err;
8323
+
8324
+ if (is_pointer_value(env, BPF_REG_0)) {
8325
+ verbose(env, "R0 leaks addr as return value\n");
8326
+ return -EACCES;
8327
+ }
8328
+
8329
+ reg = cur_regs(env) + BPF_REG_0;
8330
+ if (is_subprog) {
8331
+ if (reg->type != SCALAR_VALUE) {
8332
+ verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n",
8333
+ reg_type_str[reg->type]);
8334
+ return -EINVAL;
8335
+ }
8336
+ return 0;
8337
+ }
8338
+
8339
+ switch (prog_type) {
45348340 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
45358341 if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
4536
- env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG)
8342
+ env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG ||
8343
+ env->prog->expected_attach_type == BPF_CGROUP_INET4_GETPEERNAME ||
8344
+ env->prog->expected_attach_type == BPF_CGROUP_INET6_GETPEERNAME ||
8345
+ env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME ||
8346
+ env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME)
45378347 range = tnum_range(1, 1);
8348
+ break;
45388349 case BPF_PROG_TYPE_CGROUP_SKB:
8350
+ if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) {
8351
+ range = tnum_range(0, 3);
8352
+ enforce_attach_type_range = tnum_range(2, 3);
8353
+ }
8354
+ break;
45398355 case BPF_PROG_TYPE_CGROUP_SOCK:
45408356 case BPF_PROG_TYPE_SOCK_OPS:
45418357 case BPF_PROG_TYPE_CGROUP_DEVICE:
8358
+ case BPF_PROG_TYPE_CGROUP_SYSCTL:
8359
+ case BPF_PROG_TYPE_CGROUP_SOCKOPT:
45428360 break;
8361
+ case BPF_PROG_TYPE_RAW_TRACEPOINT:
8362
+ if (!env->prog->aux->attach_btf_id)
8363
+ return 0;
8364
+ range = tnum_const(0);
8365
+ break;
8366
+ case BPF_PROG_TYPE_TRACING:
8367
+ switch (env->prog->expected_attach_type) {
8368
+ case BPF_TRACE_FENTRY:
8369
+ case BPF_TRACE_FEXIT:
8370
+ range = tnum_const(0);
8371
+ break;
8372
+ case BPF_TRACE_RAW_TP:
8373
+ case BPF_MODIFY_RETURN:
8374
+ return 0;
8375
+ case BPF_TRACE_ITER:
8376
+ break;
8377
+ default:
8378
+ return -ENOTSUPP;
8379
+ }
8380
+ break;
8381
+ case BPF_PROG_TYPE_SK_LOOKUP:
8382
+ range = tnum_range(SK_DROP, SK_PASS);
8383
+ break;
8384
+ case BPF_PROG_TYPE_EXT:
8385
+ /* freplace program can return anything as its return value
8386
+ * depends on the to-be-replaced kernel func or bpf program.
8387
+ */
45438388 default:
45448389 return 0;
45458390 }
45468391
4547
- reg = cur_regs(env) + BPF_REG_0;
45488392 if (reg->type != SCALAR_VALUE) {
45498393 verbose(env, "At program exit the register R0 is not a known value (%s)\n",
45508394 reg_type_str[reg->type]);
....@@ -4565,6 +8409,10 @@
45658409 verbose(env, " should have been in %s\n", tn_buf);
45668410 return -EINVAL;
45678411 }
8412
+
8413
+ if (!tnum_is_unknown(enforce_attach_type_range) &&
8414
+ tnum_in(enforce_attach_type_range, reg->var_off))
8415
+ env->prog->enforce_expected_attach_type = 1;
45688416 return 0;
45698417 }
45708418
....@@ -4608,19 +8456,37 @@
46088456 BRANCH = 2,
46098457 };
46108458
4611
-#define STATE_LIST_MARK ((struct bpf_verifier_state_list *) -1L)
8459
+static u32 state_htab_size(struct bpf_verifier_env *env)
8460
+{
8461
+ return env->prog->len;
8462
+}
46128463
4613
-static int *insn_stack; /* stack of insns to process */
4614
-static int cur_stack; /* current stack index */
4615
-static int *insn_state;
8464
+static struct bpf_verifier_state_list **explored_state(
8465
+ struct bpf_verifier_env *env,
8466
+ int idx)
8467
+{
8468
+ struct bpf_verifier_state *cur = env->cur_state;
8469
+ struct bpf_func_state *state = cur->frame[cur->curframe];
8470
+
8471
+ return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)];
8472
+}
8473
+
8474
+static void init_explored_state(struct bpf_verifier_env *env, int idx)
8475
+{
8476
+ env->insn_aux_data[idx].prune_point = true;
8477
+}
46168478
46178479 /* t, w, e - match pseudo-code above:
46188480 * t - index of current instruction
46198481 * w - next instruction
46208482 * e - edge
46218483 */
4622
-static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
8484
+static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
8485
+ bool loop_ok)
46238486 {
8487
+ int *insn_stack = env->cfg.insn_stack;
8488
+ int *insn_state = env->cfg.insn_state;
8489
+
46248490 if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
46258491 return 0;
46268492
....@@ -4628,23 +8494,28 @@
46288494 return 0;
46298495
46308496 if (w < 0 || w >= env->prog->len) {
8497
+ verbose_linfo(env, t, "%d: ", t);
46318498 verbose(env, "jump out of range from insn %d to %d\n", t, w);
46328499 return -EINVAL;
46338500 }
46348501
46358502 if (e == BRANCH)
46368503 /* mark branch target for state pruning */
4637
- env->explored_states[w] = STATE_LIST_MARK;
8504
+ init_explored_state(env, w);
46388505
46398506 if (insn_state[w] == 0) {
46408507 /* tree-edge */
46418508 insn_state[t] = DISCOVERED | e;
46428509 insn_state[w] = DISCOVERED;
4643
- if (cur_stack >= env->prog->len)
8510
+ if (env->cfg.cur_stack >= env->prog->len)
46448511 return -E2BIG;
4645
- insn_stack[cur_stack++] = w;
8512
+ insn_stack[env->cfg.cur_stack++] = w;
46468513 return 1;
46478514 } else if ((insn_state[w] & 0xF0) == DISCOVERED) {
8515
+ if (loop_ok && env->bpf_capable)
8516
+ return 0;
8517
+ verbose_linfo(env, t, "%d: ", t);
8518
+ verbose_linfo(env, w, "%d: ", w);
46488519 verbose(env, "back-edge from insn %d to %d\n", t, w);
46498520 return -EINVAL;
46508521 } else if (insn_state[w] == EXPLORED) {
....@@ -4664,48 +8535,47 @@
46648535 {
46658536 struct bpf_insn *insns = env->prog->insnsi;
46668537 int insn_cnt = env->prog->len;
8538
+ int *insn_stack, *insn_state;
46678539 int ret = 0;
46688540 int i, t;
46698541
4670
- ret = check_subprogs(env);
4671
- if (ret < 0)
4672
- return ret;
4673
-
4674
- insn_state = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
8542
+ insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
46758543 if (!insn_state)
46768544 return -ENOMEM;
46778545
4678
- insn_stack = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
8546
+ insn_stack = env->cfg.insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
46798547 if (!insn_stack) {
4680
- kfree(insn_state);
8548
+ kvfree(insn_state);
46818549 return -ENOMEM;
46828550 }
46838551
46848552 insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
46858553 insn_stack[0] = 0; /* 0 is the first instruction */
4686
- cur_stack = 1;
8554
+ env->cfg.cur_stack = 1;
46878555
46888556 peek_stack:
4689
- if (cur_stack == 0)
8557
+ if (env->cfg.cur_stack == 0)
46908558 goto check_state;
4691
- t = insn_stack[cur_stack - 1];
8559
+ t = insn_stack[env->cfg.cur_stack - 1];
46928560
4693
- if (BPF_CLASS(insns[t].code) == BPF_JMP) {
8561
+ if (BPF_CLASS(insns[t].code) == BPF_JMP ||
8562
+ BPF_CLASS(insns[t].code) == BPF_JMP32) {
46948563 u8 opcode = BPF_OP(insns[t].code);
46958564
46968565 if (opcode == BPF_EXIT) {
46978566 goto mark_explored;
46988567 } else if (opcode == BPF_CALL) {
4699
- ret = push_insn(t, t + 1, FALLTHROUGH, env);
8568
+ ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
47008569 if (ret == 1)
47018570 goto peek_stack;
47028571 else if (ret < 0)
47038572 goto err_free;
47048573 if (t + 1 < insn_cnt)
4705
- env->explored_states[t + 1] = STATE_LIST_MARK;
8574
+ init_explored_state(env, t + 1);
47068575 if (insns[t].src_reg == BPF_PSEUDO_CALL) {
4707
- env->explored_states[t] = STATE_LIST_MARK;
4708
- ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env);
8576
+ init_explored_state(env, t);
8577
+ ret = push_insn(t, t + insns[t].imm + 1, BRANCH,
8578
+ env, false);
47098579 if (ret == 1)
47108580 goto peek_stack;
47118581 else if (ret < 0)
....@@ -4718,26 +8588,31 @@
47188588 }
47198589 /* unconditional jump with single edge */
47208590 ret = push_insn(t, t + insns[t].off + 1,
4721
- FALLTHROUGH, env);
8591
+ FALLTHROUGH, env, true);
47228592 if (ret == 1)
47238593 goto peek_stack;
47248594 else if (ret < 0)
47258595 goto err_free;
8596
+ /* unconditional jmp is not a good pruning point,
8597
+ * but it's marked, since backtracking needs
8598
+ * to record jmp history in is_state_visited().
8599
+ */
8600
+ init_explored_state(env, t + insns[t].off + 1);
47268601 /* tell verifier to check for equivalent states
47278602 * after every call and jump
47288603 */
47298604 if (t + 1 < insn_cnt)
4730
- env->explored_states[t + 1] = STATE_LIST_MARK;
8605
+ init_explored_state(env, t + 1);
47318606 } else {
47328607 /* conditional jump with two edges */
4733
- env->explored_states[t] = STATE_LIST_MARK;
4734
- ret = push_insn(t, t + 1, FALLTHROUGH, env);
8608
+ init_explored_state(env, t);
8609
+ ret = push_insn(t, t + 1, FALLTHROUGH, env, true);
47358610 if (ret == 1)
47368611 goto peek_stack;
47378612 else if (ret < 0)
47388613 goto err_free;
47398614
4740
- ret = push_insn(t, t + insns[t].off + 1, BRANCH, env);
8615
+ ret = push_insn(t, t + insns[t].off + 1, BRANCH, env, true);
47418616 if (ret == 1)
47428617 goto peek_stack;
47438618 else if (ret < 0)
....@@ -4747,7 +8622,7 @@
47478622 /* all other non-branch instructions with single
47488623 * fall-through edge
47498624 */
4750
- ret = push_insn(t, t + 1, FALLTHROUGH, env);
8625
+ ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
47518626 if (ret == 1)
47528627 goto peek_stack;
47538628 else if (ret < 0)
....@@ -4756,7 +8631,7 @@
47568631
47578632 mark_explored:
47588633 insn_state[t] = EXPLORED;
4759
- if (cur_stack-- <= 0) {
8634
+ if (env->cfg.cur_stack-- <= 0) {
47608635 verbose(env, "pop stack internal bug\n");
47618636 ret = -EFAULT;
47628637 goto err_free;
....@@ -4774,9 +8649,329 @@
47748649 ret = 0; /* cfg looks good */
47758650
47768651 err_free:
4777
- kfree(insn_state);
4778
- kfree(insn_stack);
8652
+ kvfree(insn_state);
8653
+ kvfree(insn_stack);
8654
+ env->cfg.insn_state = env->cfg.insn_stack = NULL;
47798655 return ret;
8656
+}
8657
+
8658
+static int check_abnormal_return(struct bpf_verifier_env *env)
8659
+{
8660
+ int i;
8661
+
8662
+ for (i = 1; i < env->subprog_cnt; i++) {
8663
+ if (env->subprog_info[i].has_ld_abs) {
8664
+ verbose(env, "LD_ABS is not allowed in subprogs without BTF\n");
8665
+ return -EINVAL;
8666
+ }
8667
+ if (env->subprog_info[i].has_tail_call) {
8668
+ verbose(env, "tail_call is not allowed in subprogs without BTF\n");
8669
+ return -EINVAL;
8670
+ }
8671
+ }
8672
+ return 0;
8673
+}
8674
+
8675
+/* The minimum supported BTF func info size */
8676
+#define MIN_BPF_FUNCINFO_SIZE 8
8677
+#define MAX_FUNCINFO_REC_SIZE 252
8678
+
8679
+static int check_btf_func(struct bpf_verifier_env *env,
8680
+ const union bpf_attr *attr,
8681
+ union bpf_attr __user *uattr)
8682
+{
8683
+ const struct btf_type *type, *func_proto, *ret_type;
8684
+ u32 i, nfuncs, urec_size, min_size;
8685
+ u32 krec_size = sizeof(struct bpf_func_info);
8686
+ struct bpf_func_info *krecord;
8687
+ struct bpf_func_info_aux *info_aux = NULL;
8688
+ struct bpf_prog *prog;
8689
+ const struct btf *btf;
8690
+ void __user *urecord;
8691
+ u32 prev_offset = 0;
8692
+ bool scalar_return;
8693
+ int ret = -ENOMEM;
8694
+
8695
+ nfuncs = attr->func_info_cnt;
8696
+ if (!nfuncs) {
8697
+ if (check_abnormal_return(env))
8698
+ return -EINVAL;
8699
+ return 0;
8700
+ }
8701
+
8702
+ if (nfuncs != env->subprog_cnt) {
8703
+ verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
8704
+ return -EINVAL;
8705
+ }
8706
+
8707
+ urec_size = attr->func_info_rec_size;
8708
+ if (urec_size < MIN_BPF_FUNCINFO_SIZE ||
8709
+ urec_size > MAX_FUNCINFO_REC_SIZE ||
8710
+ urec_size % sizeof(u32)) {
8711
+ verbose(env, "invalid func info rec size %u\n", urec_size);
8712
+ return -EINVAL;
8713
+ }
8714
+
8715
+ prog = env->prog;
8716
+ btf = prog->aux->btf;
8717
+
8718
+ urecord = u64_to_user_ptr(attr->func_info);
8719
+ min_size = min_t(u32, krec_size, urec_size);
8720
+
8721
+ krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN);
8722
+ if (!krecord)
8723
+ return -ENOMEM;
8724
+ info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL | __GFP_NOWARN);
8725
+ if (!info_aux)
8726
+ goto err_free;
8727
+
8728
+ for (i = 0; i < nfuncs; i++) {
8729
+ ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
8730
+ if (ret) {
8731
+ if (ret == -E2BIG) {
8732
+ verbose(env, "nonzero tailing record in func info");
8733
+ /* set the size kernel expects so loader can zero
8734
+ * out the rest of the record.
8735
+ */
8736
+ if (put_user(min_size, &uattr->func_info_rec_size))
8737
+ ret = -EFAULT;
8738
+ }
8739
+ goto err_free;
8740
+ }
8741
+
8742
+ if (copy_from_user(&krecord[i], urecord, min_size)) {
8743
+ ret = -EFAULT;
8744
+ goto err_free;
8745
+ }
8746
+
8747
+ /* check insn_off */
8748
+ ret = -EINVAL;
8749
+ if (i == 0) {
8750
+ if (krecord[i].insn_off) {
8751
+ verbose(env,
8752
+ "nonzero insn_off %u for the first func info record",
8753
+ krecord[i].insn_off);
8754
+ goto err_free;
8755
+ }
8756
+ } else if (krecord[i].insn_off <= prev_offset) {
8757
+ verbose(env,
8758
+ "same or smaller insn offset (%u) than previous func info record (%u)",
8759
+ krecord[i].insn_off, prev_offset);
8760
+ goto err_free;
8761
+ }
8762
+
8763
+ if (env->subprog_info[i].start != krecord[i].insn_off) {
8764
+ verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
8765
+ goto err_free;
8766
+ }
8767
+
8768
+ /* check type_id */
8769
+ type = btf_type_by_id(btf, krecord[i].type_id);
8770
+ if (!type || !btf_type_is_func(type)) {
8771
+ verbose(env, "invalid type id %d in func info",
8772
+ krecord[i].type_id);
8773
+ goto err_free;
8774
+ }
8775
+ info_aux[i].linkage = BTF_INFO_VLEN(type->info);
8776
+
8777
+ func_proto = btf_type_by_id(btf, type->type);
8778
+ if (unlikely(!func_proto || !btf_type_is_func_proto(func_proto)))
8779
+ /* btf_func_check() already verified it during BTF load */
8780
+ goto err_free;
8781
+ ret_type = btf_type_skip_modifiers(btf, func_proto->type, NULL);
8782
+ scalar_return =
8783
+ btf_type_is_small_int(ret_type) || btf_type_is_enum(ret_type);
8784
+ if (i && !scalar_return && env->subprog_info[i].has_ld_abs) {
8785
+ verbose(env, "LD_ABS is only allowed in functions that return 'int'.\n");
8786
+ goto err_free;
8787
+ }
8788
+ if (i && !scalar_return && env->subprog_info[i].has_tail_call) {
8789
+ verbose(env, "tail_call is only allowed in functions that return 'int'.\n");
8790
+ goto err_free;
8791
+ }
8792
+
8793
+ prev_offset = krecord[i].insn_off;
8794
+ urecord += urec_size;
8795
+ }
8796
+
8797
+ prog->aux->func_info = krecord;
8798
+ prog->aux->func_info_cnt = nfuncs;
8799
+ prog->aux->func_info_aux = info_aux;
8800
+ return 0;
8801
+
8802
+err_free:
8803
+ kvfree(krecord);
8804
+ kfree(info_aux);
8805
+ return ret;
8806
+}
8807
+
8808
+static void adjust_btf_func(struct bpf_verifier_env *env)
8809
+{
8810
+ struct bpf_prog_aux *aux = env->prog->aux;
8811
+ int i;
8812
+
8813
+ if (!aux->func_info)
8814
+ return;
8815
+
8816
+ for (i = 0; i < env->subprog_cnt; i++)
8817
+ aux->func_info[i].insn_off = env->subprog_info[i].start;
8818
+}
8819
+
8820
+#define MIN_BPF_LINEINFO_SIZE (offsetof(struct bpf_line_info, line_col) + \
8821
+ sizeof(((struct bpf_line_info *)(0))->line_col))
8822
+#define MAX_LINEINFO_REC_SIZE MAX_FUNCINFO_REC_SIZE
8823
+
8824
+static int check_btf_line(struct bpf_verifier_env *env,
8825
+ const union bpf_attr *attr,
8826
+ union bpf_attr __user *uattr)
8827
+{
8828
+ u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0;
8829
+ struct bpf_subprog_info *sub;
8830
+ struct bpf_line_info *linfo;
8831
+ struct bpf_prog *prog;
8832
+ const struct btf *btf;
8833
+ void __user *ulinfo;
8834
+ int err;
8835
+
8836
+ nr_linfo = attr->line_info_cnt;
8837
+ if (!nr_linfo)
8838
+ return 0;
8839
+ if (nr_linfo > INT_MAX / sizeof(struct bpf_line_info))
8840
+ return -EINVAL;
8841
+
8842
+ rec_size = attr->line_info_rec_size;
8843
+ if (rec_size < MIN_BPF_LINEINFO_SIZE ||
8844
+ rec_size > MAX_LINEINFO_REC_SIZE ||
8845
+ rec_size & (sizeof(u32) - 1))
8846
+ return -EINVAL;
8847
+
8848
+ /* Need to zero it in case the userspace may
8849
+ * pass in a smaller bpf_line_info object.
8850
+ */
8851
+ linfo = kvcalloc(nr_linfo, sizeof(struct bpf_line_info),
8852
+ GFP_KERNEL | __GFP_NOWARN);
8853
+ if (!linfo)
8854
+ return -ENOMEM;
8855
+
8856
+ prog = env->prog;
8857
+ btf = prog->aux->btf;
8858
+
8859
+ s = 0;
8860
+ sub = env->subprog_info;
8861
+ ulinfo = u64_to_user_ptr(attr->line_info);
8862
+ expected_size = sizeof(struct bpf_line_info);
8863
+ ncopy = min_t(u32, expected_size, rec_size);
8864
+ for (i = 0; i < nr_linfo; i++) {
8865
+ err = bpf_check_uarg_tail_zero(ulinfo, expected_size, rec_size);
8866
+ if (err) {
8867
+ if (err == -E2BIG) {
8868
+ verbose(env, "nonzero tailing record in line_info");
8869
+ if (put_user(expected_size,
8870
+ &uattr->line_info_rec_size))
8871
+ err = -EFAULT;
8872
+ }
8873
+ goto err_free;
8874
+ }
8875
+
8876
+ if (copy_from_user(&linfo[i], ulinfo, ncopy)) {
8877
+ err = -EFAULT;
8878
+ goto err_free;
8879
+ }
8880
+
8881
+ /*
8882
+ * Check insn_off to ensure
8883
+ * 1) strictly increasing AND
8884
+ * 2) bounded by prog->len
8885
+ *
8886
+ * The linfo[0].insn_off == 0 check logically falls into
8887
+ * the later "missing bpf_line_info for func..." case
8888
+ * because the first linfo[0].insn_off must be the
8889
+ * first sub also and the first sub must have
8890
+ * subprog_info[0].start == 0.
8891
+ */
8892
+ if ((i && linfo[i].insn_off <= prev_offset) ||
8893
+ linfo[i].insn_off >= prog->len) {
8894
+ verbose(env, "Invalid line_info[%u].insn_off:%u (prev_offset:%u prog->len:%u)\n",
8895
+ i, linfo[i].insn_off, prev_offset,
8896
+ prog->len);
8897
+ err = -EINVAL;
8898
+ goto err_free;
8899
+ }
8900
+
8901
+ if (!prog->insnsi[linfo[i].insn_off].code) {
8902
+ verbose(env,
8903
+ "Invalid insn code at line_info[%u].insn_off\n",
8904
+ i);
8905
+ err = -EINVAL;
8906
+ goto err_free;
8907
+ }
8908
+
8909
+ if (!btf_name_by_offset(btf, linfo[i].line_off) ||
8910
+ !btf_name_by_offset(btf, linfo[i].file_name_off)) {
8911
+ verbose(env, "Invalid line_info[%u].line_off or .file_name_off\n", i);
8912
+ err = -EINVAL;
8913
+ goto err_free;
8914
+ }
8915
+
8916
+ if (s != env->subprog_cnt) {
8917
+ if (linfo[i].insn_off == sub[s].start) {
8918
+ sub[s].linfo_idx = i;
8919
+ s++;
8920
+ } else if (sub[s].start < linfo[i].insn_off) {
8921
+ verbose(env, "missing bpf_line_info for func#%u\n", s);
8922
+ err = -EINVAL;
8923
+ goto err_free;
8924
+ }
8925
+ }
8926
+
8927
+ prev_offset = linfo[i].insn_off;
8928
+ ulinfo += rec_size;
8929
+ }
8930
+
8931
+ if (s != env->subprog_cnt) {
8932
+ verbose(env, "missing bpf_line_info for %u funcs starting from func#%u\n",
8933
+ env->subprog_cnt - s, s);
8934
+ err = -EINVAL;
8935
+ goto err_free;
8936
+ }
8937
+
8938
+ prog->aux->linfo = linfo;
8939
+ prog->aux->nr_linfo = nr_linfo;
8940
+
8941
+ return 0;
8942
+
8943
+err_free:
8944
+ kvfree(linfo);
8945
+ return err;
8946
+}
8947
+
8948
+static int check_btf_info(struct bpf_verifier_env *env,
8949
+ const union bpf_attr *attr,
8950
+ union bpf_attr __user *uattr)
8951
+{
8952
+ struct btf *btf;
8953
+ int err;
8954
+
8955
+ if (!attr->func_info_cnt && !attr->line_info_cnt) {
8956
+ if (check_abnormal_return(env))
8957
+ return -EINVAL;
8958
+ return 0;
8959
+ }
8960
+
8961
+ btf = btf_get_by_fd(attr->prog_btf_fd);
8962
+ if (IS_ERR(btf))
8963
+ return PTR_ERR(btf);
8964
+ env->prog->aux->btf = btf;
8965
+
8966
+ err = check_btf_func(env, attr, uattr);
8967
+ if (err)
8968
+ return err;
8969
+
8970
+ err = check_btf_line(env, attr, uattr);
8971
+ if (err)
8972
+ return err;
8973
+
8974
+ return 0;
47808975 }
47818976
47828977 /* check %cur's range satisfies %old's */
....@@ -4786,7 +8981,11 @@
47868981 return old->umin_value <= cur->umin_value &&
47878982 old->umax_value >= cur->umax_value &&
47888983 old->smin_value <= cur->smin_value &&
4789
- old->smax_value >= cur->smax_value;
8984
+ old->smax_value >= cur->smax_value &&
8985
+ old->u32_min_value <= cur->u32_min_value &&
8986
+ old->u32_max_value >= cur->u32_max_value &&
8987
+ old->s32_min_value <= cur->s32_min_value &&
8988
+ old->s32_max_value >= cur->s32_max_value;
47908989 }
47918990
47928991 /* If in the old state two registers had the same id, then they need to have
....@@ -4816,6 +9015,102 @@
48169015 /* We ran out of idmap slots, which should be impossible */
48179016 WARN_ON_ONCE(1);
48189017 return false;
9018
+}
9019
+
9020
+static void clean_func_state(struct bpf_verifier_env *env,
9021
+ struct bpf_func_state *st)
9022
+{
9023
+ enum bpf_reg_liveness live;
9024
+ int i, j;
9025
+
9026
+ for (i = 0; i < BPF_REG_FP; i++) {
9027
+ live = st->regs[i].live;
9028
+ /* liveness must not touch this register anymore */
9029
+ st->regs[i].live |= REG_LIVE_DONE;
9030
+ if (!(live & REG_LIVE_READ))
9031
+ /* since the register is unused, clear its state
9032
+ * to make further comparison simpler
9033
+ */
9034
+ __mark_reg_not_init(env, &st->regs[i]);
9035
+ }
9036
+
9037
+ for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) {
9038
+ live = st->stack[i].spilled_ptr.live;
9039
+ /* liveness must not touch this stack slot anymore */
9040
+ st->stack[i].spilled_ptr.live |= REG_LIVE_DONE;
9041
+ if (!(live & REG_LIVE_READ)) {
9042
+ __mark_reg_not_init(env, &st->stack[i].spilled_ptr);
9043
+ for (j = 0; j < BPF_REG_SIZE; j++)
9044
+ st->stack[i].slot_type[j] = STACK_INVALID;
9045
+ }
9046
+ }
9047
+}
9048
+
9049
+static void clean_verifier_state(struct bpf_verifier_env *env,
9050
+ struct bpf_verifier_state *st)
9051
+{
9052
+ int i;
9053
+
9054
+ if (st->frame[0]->regs[0].live & REG_LIVE_DONE)
9055
+ /* all regs in this state in all frames were already marked */
9056
+ return;
9057
+
9058
+ for (i = 0; i <= st->curframe; i++)
9059
+ clean_func_state(env, st->frame[i]);
9060
+}
9061
+
9062
+/* the parentage chains form a tree.
9063
+ * the verifier states are added to state lists at given insn and
9064
+ * pushed into state stack for future exploration.
9065
+ * when the verifier reaches bpf_exit insn some of the verifer states
9066
+ * stored in the state lists have their final liveness state already,
9067
+ * but a lot of states will get revised from liveness point of view when
9068
+ * the verifier explores other branches.
9069
+ * Example:
9070
+ * 1: r0 = 1
9071
+ * 2: if r1 == 100 goto pc+1
9072
+ * 3: r0 = 2
9073
+ * 4: exit
9074
+ * when the verifier reaches exit insn the register r0 in the state list of
9075
+ * insn 2 will be seen as !REG_LIVE_READ. Then the verifier pops the other_branch
9076
+ * of insn 2 and goes exploring further. At the insn 4 it will walk the
9077
+ * parentage chain from insn 4 into insn 2 and will mark r0 as REG_LIVE_READ.
9078
+ *
9079
+ * Since the verifier pushes the branch states as it sees them while exploring
9080
+ * the program the condition of walking the branch instruction for the second
9081
+ * time means that all states below this branch were already explored and
9082
+ * their final liveness markes are already propagated.
9083
+ * Hence when the verifier completes the search of state list in is_state_visited()
9084
+ * we can call this clean_live_states() function to mark all liveness states
9085
+ * as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state'
9086
+ * will not be used.
9087
+ * This function also clears the registers and stack for states that !READ
9088
+ * to simplify state merging.
9089
+ *
9090
+ * Important note here that walking the same branch instruction in the callee
9091
+ * doesn't meant that the states are DONE. The verifier has to compare
9092
+ * the callsites
9093
+ */
9094
+static void clean_live_states(struct bpf_verifier_env *env, int insn,
9095
+ struct bpf_verifier_state *cur)
9096
+{
9097
+ struct bpf_verifier_state_list *sl;
9098
+ int i;
9099
+
9100
+ sl = *explored_state(env, insn);
9101
+ while (sl) {
9102
+ if (sl->state.branches)
9103
+ goto next;
9104
+ if (sl->state.insn_idx != insn ||
9105
+ sl->state.curframe != cur->curframe)
9106
+ goto next;
9107
+ for (i = 0; i <= cur->curframe; i++)
9108
+ if (sl->state.frame[i]->callsite != cur->frame[i]->callsite)
9109
+ goto next;
9110
+ clean_verifier_state(env, &sl->state);
9111
+next:
9112
+ sl = sl->next;
9113
+ }
48199114 }
48209115
48219116 /* Returns true if (rold safe implies rcur safe) */
....@@ -4849,6 +9144,8 @@
48499144 if (env->explore_alu_limits)
48509145 return false;
48519146 if (rcur->type == SCALAR_VALUE) {
9147
+ if (!rold->precise && !rcur->precise)
9148
+ return true;
48529149 /* new val must satisfy old val knowledge */
48539150 return range_within(rold, rcur) &&
48549151 tnum_in(rold->var_off, rcur->var_off);
....@@ -4865,8 +9162,11 @@
48659162 case PTR_TO_MAP_VALUE:
48669163 /* If the new min/max/var_off satisfy the old ones and
48679164 * everything else matches, we are OK.
4868
- * We don't care about the 'id' value, because nothing
4869
- * uses it for PTR_TO_MAP_VALUE (only for ..._OR_NULL)
9165
+ * 'id' is not compared, since it's only used for maps with
9166
+ * bpf_spin_lock inside map element and in such cases if
9167
+ * the rest of the prog is valid for one map element then
9168
+ * it's valid for all map elements regardless of the key
9169
+ * used in bpf_map_lookup()
48709170 */
48719171 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
48729172 range_within(rold, rcur) &&
....@@ -4911,6 +9211,14 @@
49119211 case PTR_TO_CTX:
49129212 case CONST_PTR_TO_MAP:
49139213 case PTR_TO_PACKET_END:
9214
+ case PTR_TO_FLOW_KEYS:
9215
+ case PTR_TO_SOCKET:
9216
+ case PTR_TO_SOCKET_OR_NULL:
9217
+ case PTR_TO_SOCK_COMMON:
9218
+ case PTR_TO_SOCK_COMMON_OR_NULL:
9219
+ case PTR_TO_TCP_SOCK:
9220
+ case PTR_TO_TCP_SOCK_OR_NULL:
9221
+ case PTR_TO_XDP_SOCK:
49149222 /* Only valid matches are exact, which memcmp() above
49159223 * would have accepted
49169224 */
....@@ -4929,12 +9237,6 @@
49299237 {
49309238 int i, spi;
49319239
4932
- /* if explored stack has more populated slots than current stack
4933
- * such stacks are not equivalent
4934
- */
4935
- if (old->allocated_stack > cur->allocated_stack)
4936
- return false;
4937
-
49389240 /* walk slots of the explored stack and ignore any additional
49399241 * slots in the current stack, since explored(safe) state
49409242 * didn't use them
....@@ -4942,12 +9244,21 @@
49429244 for (i = 0; i < old->allocated_stack; i++) {
49439245 spi = i / BPF_REG_SIZE;
49449246
4945
- if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ))
9247
+ if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) {
9248
+ i += BPF_REG_SIZE - 1;
49469249 /* explored state didn't use this */
49479250 continue;
9251
+ }
49489252
49499253 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
49509254 continue;
9255
+
9256
+ /* explored stack has more populated slots than current stack
9257
+ * and these slots were used
9258
+ */
9259
+ if (i >= cur->allocated_stack)
9260
+ return false;
9261
+
49519262 /* if old state was safe with misc data in the stack
49529263 * it will be safe with zero-initialized stack.
49539264 * The opposite is not true
....@@ -4958,7 +9269,7 @@
49589269 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
49599270 cur->stack[spi].slot_type[i % BPF_REG_SIZE])
49609271 /* Ex: old explored (safe) state has STACK_SPILL in
4961
- * this stack slot, but current has has STACK_MISC ->
9272
+ * this stack slot, but current has STACK_MISC ->
49629273 * this verifier states are not equivalent,
49639274 * return false to continue verification of this path
49649275 */
....@@ -4982,6 +9293,14 @@
49829293 return false;
49839294 }
49849295 return true;
9296
+}
9297
+
9298
+static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur)
9299
+{
9300
+ if (old->acquired_refs != cur->acquired_refs)
9301
+ return false;
9302
+ return !memcmp(old->refs, cur->refs,
9303
+ sizeof(*old->refs) * old->acquired_refs);
49859304 }
49869305
49879306 /* compare two verifier states
....@@ -5024,6 +9343,9 @@
50249343 if (!stacksafe(env, old, cur, env->idmap_scratch))
50259344 return false;
50269345
9346
+ if (!refsafe(old, cur))
9347
+ return false;
9348
+
50279349 return true;
50289350 }
50299351
....@@ -5042,6 +9364,9 @@
50429364 if (old->speculative && !cur->speculative)
50439365 return false;
50449366
9367
+ if (old->active_spin_lock != cur->active_spin_lock)
9368
+ return false;
9369
+
50459370 /* for states to be equal callsites have to be the same
50469371 * and all frame states need to be equivalent
50479372 */
....@@ -5052,6 +9377,35 @@
50529377 return false;
50539378 }
50549379 return true;
9380
+}
9381
+
9382
+/* Return 0 if no propagation happened. Return negative error code if error
9383
+ * happened. Otherwise, return the propagated bit.
9384
+ */
9385
+static int propagate_liveness_reg(struct bpf_verifier_env *env,
9386
+ struct bpf_reg_state *reg,
9387
+ struct bpf_reg_state *parent_reg)
9388
+{
9389
+ u8 parent_flag = parent_reg->live & REG_LIVE_READ;
9390
+ u8 flag = reg->live & REG_LIVE_READ;
9391
+ int err;
9392
+
9393
+ /* When comes here, read flags of PARENT_REG or REG could be any of
9394
+ * REG_LIVE_READ64, REG_LIVE_READ32, REG_LIVE_NONE. There is no need
9395
+ * of propagation if PARENT_REG has strongest REG_LIVE_READ64.
9396
+ */
9397
+ if (parent_flag == REG_LIVE_READ64 ||
9398
+ /* Or if there is no read flag from REG. */
9399
+ !flag ||
9400
+ /* Or if the read flag from REG is the same as PARENT_REG. */
9401
+ parent_flag == flag)
9402
+ return 0;
9403
+
9404
+ err = mark_reg_read(env, reg, parent_reg, flag);
9405
+ if (err)
9406
+ return err;
9407
+
9408
+ return flag;
50559409 }
50569410
50579411 /* A write screens off any subsequent reads; but write marks come from the
....@@ -5065,8 +9419,9 @@
50659419 const struct bpf_verifier_state *vstate,
50669420 struct bpf_verifier_state *vparent)
50679421 {
5068
- int i, frame, err = 0;
9422
+ struct bpf_reg_state *state_reg, *parent_reg;
50699423 struct bpf_func_state *state, *parent;
9424
+ int i, frame, err = 0;
50709425
50719426 if (vparent->curframe != vstate->curframe) {
50729427 WARN(1, "propagate_live: parent frame %d current frame %d\n",
....@@ -5075,50 +9430,156 @@
50759430 }
50769431 /* Propagate read liveness of registers... */
50779432 BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
5078
- /* We don't need to worry about FP liveness because it's read-only */
5079
- for (i = 0; i < BPF_REG_FP; i++) {
5080
- if (vparent->frame[vparent->curframe]->regs[i].live & REG_LIVE_READ)
5081
- continue;
5082
- if (vstate->frame[vstate->curframe]->regs[i].live & REG_LIVE_READ) {
5083
- err = mark_reg_read(env, &vstate->frame[vstate->curframe]->regs[i],
5084
- &vparent->frame[vstate->curframe]->regs[i]);
5085
- if (err)
9433
+ for (frame = 0; frame <= vstate->curframe; frame++) {
9434
+ parent = vparent->frame[frame];
9435
+ state = vstate->frame[frame];
9436
+ parent_reg = parent->regs;
9437
+ state_reg = state->regs;
9438
+ /* We don't need to worry about FP liveness, it's read-only */
9439
+ for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) {
9440
+ err = propagate_liveness_reg(env, &state_reg[i],
9441
+ &parent_reg[i]);
9442
+ if (err < 0)
9443
+ return err;
9444
+ if (err == REG_LIVE_READ64)
9445
+ mark_insn_zext(env, &parent_reg[i]);
9446
+ }
9447
+
9448
+ /* Propagate stack slots. */
9449
+ for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
9450
+ i < parent->allocated_stack / BPF_REG_SIZE; i++) {
9451
+ parent_reg = &parent->stack[i].spilled_ptr;
9452
+ state_reg = &state->stack[i].spilled_ptr;
9453
+ err = propagate_liveness_reg(env, state_reg,
9454
+ parent_reg);
9455
+ if (err < 0)
50869456 return err;
50879457 }
50889458 }
5089
-
5090
- /* ... and stack slots */
5091
- for (frame = 0; frame <= vstate->curframe; frame++) {
5092
- state = vstate->frame[frame];
5093
- parent = vparent->frame[frame];
5094
- for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
5095
- i < parent->allocated_stack / BPF_REG_SIZE; i++) {
5096
- if (parent->stack[i].spilled_ptr.live & REG_LIVE_READ)
5097
- continue;
5098
- if (state->stack[i].spilled_ptr.live & REG_LIVE_READ)
5099
- mark_reg_read(env, &state->stack[i].spilled_ptr,
5100
- &parent->stack[i].spilled_ptr);
5101
- }
5102
- }
5103
- return err;
9459
+ return 0;
51049460 }
9461
+
9462
+/* find precise scalars in the previous equivalent state and
9463
+ * propagate them into the current state
9464
+ */
9465
+static int propagate_precision(struct bpf_verifier_env *env,
9466
+ const struct bpf_verifier_state *old)
9467
+{
9468
+ struct bpf_reg_state *state_reg;
9469
+ struct bpf_func_state *state;
9470
+ int i, err = 0;
9471
+
9472
+ state = old->frame[old->curframe];
9473
+ state_reg = state->regs;
9474
+ for (i = 0; i < BPF_REG_FP; i++, state_reg++) {
9475
+ if (state_reg->type != SCALAR_VALUE ||
9476
+ !state_reg->precise)
9477
+ continue;
9478
+ if (env->log.level & BPF_LOG_LEVEL2)
9479
+ verbose(env, "propagating r%d\n", i);
9480
+ err = mark_chain_precision(env, i);
9481
+ if (err < 0)
9482
+ return err;
9483
+ }
9484
+
9485
+ for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
9486
+ if (state->stack[i].slot_type[0] != STACK_SPILL)
9487
+ continue;
9488
+ state_reg = &state->stack[i].spilled_ptr;
9489
+ if (state_reg->type != SCALAR_VALUE ||
9490
+ !state_reg->precise)
9491
+ continue;
9492
+ if (env->log.level & BPF_LOG_LEVEL2)
9493
+ verbose(env, "propagating fp%d\n",
9494
+ (-i - 1) * BPF_REG_SIZE);
9495
+ err = mark_chain_precision_stack(env, i);
9496
+ if (err < 0)
9497
+ return err;
9498
+ }
9499
+ return 0;
9500
+}
9501
+
9502
+static bool states_maybe_looping(struct bpf_verifier_state *old,
9503
+ struct bpf_verifier_state *cur)
9504
+{
9505
+ struct bpf_func_state *fold, *fcur;
9506
+ int i, fr = cur->curframe;
9507
+
9508
+ if (old->curframe != fr)
9509
+ return false;
9510
+
9511
+ fold = old->frame[fr];
9512
+ fcur = cur->frame[fr];
9513
+ for (i = 0; i < MAX_BPF_REG; i++)
9514
+ if (memcmp(&fold->regs[i], &fcur->regs[i],
9515
+ offsetof(struct bpf_reg_state, parent)))
9516
+ return false;
9517
+ return true;
9518
+}
9519
+
51059520
51069521 static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
51079522 {
51089523 struct bpf_verifier_state_list *new_sl;
5109
- struct bpf_verifier_state_list *sl;
9524
+ struct bpf_verifier_state_list *sl, **pprev;
51109525 struct bpf_verifier_state *cur = env->cur_state, *new;
51119526 int i, j, err, states_cnt = 0;
9527
+ bool add_new_state = env->test_state_freq ? true : false;
51129528
5113
- sl = env->explored_states[insn_idx];
5114
- if (!sl)
9529
+ cur->last_insn_idx = env->prev_insn_idx;
9530
+ if (!env->insn_aux_data[insn_idx].prune_point)
51159531 /* this 'insn_idx' instruction wasn't marked, so we will not
51169532 * be doing state search here
51179533 */
51189534 return 0;
51199535
5120
- while (sl != STATE_LIST_MARK) {
9536
+ /* bpf progs typically have pruning point every 4 instructions
9537
+ * http://vger.kernel.org/bpfconf2019.html#session-1
9538
+ * Do not add new state for future pruning if the verifier hasn't seen
9539
+ * at least 2 jumps and at least 8 instructions.
9540
+ * This heuristics helps decrease 'total_states' and 'peak_states' metric.
9541
+ * In tests that amounts to up to 50% reduction into total verifier
9542
+ * memory consumption and 20% verifier time speedup.
9543
+ */
9544
+ if (env->jmps_processed - env->prev_jmps_processed >= 2 &&
9545
+ env->insn_processed - env->prev_insn_processed >= 8)
9546
+ add_new_state = true;
9547
+
9548
+ pprev = explored_state(env, insn_idx);
9549
+ sl = *pprev;
9550
+
9551
+ clean_live_states(env, insn_idx, cur);
9552
+
9553
+ while (sl) {
9554
+ states_cnt++;
9555
+ if (sl->state.insn_idx != insn_idx)
9556
+ goto next;
9557
+ if (sl->state.branches) {
9558
+ if (states_maybe_looping(&sl->state, cur) &&
9559
+ states_equal(env, &sl->state, cur)) {
9560
+ verbose_linfo(env, insn_idx, "; ");
9561
+ verbose(env, "infinite loop detected at insn %d\n", insn_idx);
9562
+ return -EINVAL;
9563
+ }
9564
+ /* if the verifier is processing a loop, avoid adding new state
9565
+ * too often, since different loop iterations have distinct
9566
+ * states and may not help future pruning.
9567
+ * This threshold shouldn't be too low to make sure that
9568
+ * a loop with large bound will be rejected quickly.
9569
+ * The most abusive loop will be:
9570
+ * r1 += 1
9571
+ * if r1 < 1000000 goto pc-2
9572
+ * 1M insn_procssed limit / 100 == 10k peak states.
9573
+ * This threshold shouldn't be too high either, since states
9574
+ * at the end of the loop are likely to be useful in pruning.
9575
+ */
9576
+ if (env->jmps_processed - env->prev_jmps_processed < 20 &&
9577
+ env->insn_processed - env->prev_insn_processed < 100)
9578
+ add_new_state = false;
9579
+ goto miss;
9580
+ }
51219581 if (states_equal(env, &sl->state, cur)) {
9582
+ sl->hit_cnt++;
51229583 /* reached equivalent register/stack state,
51239584 * prune the search.
51249585 * Registers read by the continuation are read by us.
....@@ -5130,27 +9591,87 @@
51309591 * this state and will pop a new one.
51319592 */
51329593 err = propagate_liveness(env, &sl->state, cur);
9594
+
9595
+ /* if previous state reached the exit with precision and
9596
+ * current state is equivalent to it (except precsion marks)
9597
+ * the precision needs to be propagated back in
9598
+ * the current state.
9599
+ */
9600
+ err = err ? : push_jmp_history(env, cur);
9601
+ err = err ? : propagate_precision(env, &sl->state);
51339602 if (err)
51349603 return err;
51359604 return 1;
51369605 }
5137
- sl = sl->next;
5138
- states_cnt++;
9606
+miss:
9607
+ /* when new state is not going to be added do not increase miss count.
9608
+ * Otherwise several loop iterations will remove the state
9609
+ * recorded earlier. The goal of these heuristics is to have
9610
+ * states from some iterations of the loop (some in the beginning
9611
+ * and some at the end) to help pruning.
9612
+ */
9613
+ if (add_new_state)
9614
+ sl->miss_cnt++;
9615
+ /* heuristic to determine whether this state is beneficial
9616
+ * to keep checking from state equivalence point of view.
9617
+ * Higher numbers increase max_states_per_insn and verification time,
9618
+ * but do not meaningfully decrease insn_processed.
9619
+ */
9620
+ if (sl->miss_cnt > sl->hit_cnt * 3 + 3) {
9621
+ /* the state is unlikely to be useful. Remove it to
9622
+ * speed up verification
9623
+ */
9624
+ *pprev = sl->next;
9625
+ if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) {
9626
+ u32 br = sl->state.branches;
9627
+
9628
+ WARN_ONCE(br,
9629
+ "BUG live_done but branches_to_explore %d\n",
9630
+ br);
9631
+ free_verifier_state(&sl->state, false);
9632
+ kfree(sl);
9633
+ env->peak_states--;
9634
+ } else {
9635
+ /* cannot free this state, since parentage chain may
9636
+ * walk it later. Add it for free_list instead to
9637
+ * be freed at the end of verification
9638
+ */
9639
+ sl->next = env->free_list;
9640
+ env->free_list = sl;
9641
+ }
9642
+ sl = *pprev;
9643
+ continue;
9644
+ }
9645
+next:
9646
+ pprev = &sl->next;
9647
+ sl = *pprev;
51399648 }
51409649
5141
- if (!env->allow_ptr_leaks && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
5142
- return 0;
9650
+ if (env->max_states_per_insn < states_cnt)
9651
+ env->max_states_per_insn = states_cnt;
51439652
5144
- /* there were no equivalent states, remember current one.
5145
- * technically the current state is not proven to be safe yet,
9653
+ if (!env->bpf_capable && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
9654
+ return push_jmp_history(env, cur);
9655
+
9656
+ if (!add_new_state)
9657
+ return push_jmp_history(env, cur);
9658
+
9659
+ /* There were no equivalent states, remember the current one.
9660
+ * Technically the current state is not proven to be safe yet,
51469661 * but it will either reach outer most bpf_exit (which means it's safe)
5147
- * or it will be rejected. Since there are no loops, we won't be
9662
+ * or it will be rejected. When there are no loops the verifier won't be
51489663 * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
5149
- * again on the way to bpf_exit
9664
+ * again on the way to bpf_exit.
9665
+ * When looping the sl->state.branches will be > 0 and this state
9666
+ * will not be considered for equivalence until branches == 0.
51509667 */
51519668 new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
51529669 if (!new_sl)
51539670 return -ENOMEM;
9671
+ env->total_states++;
9672
+ env->peak_states++;
9673
+ env->prev_jmps_processed = env->jmps_processed;
9674
+ env->prev_insn_processed = env->insn_processed;
51549675
51559676 /* add new state to the head of linked list */
51569677 new = &new_sl->state;
....@@ -5160,19 +9681,34 @@
51609681 kfree(new_sl);
51619682 return err;
51629683 }
5163
- new_sl->next = env->explored_states[insn_idx];
5164
- env->explored_states[insn_idx] = new_sl;
5165
- /* connect new state to parentage chain */
5166
- for (i = 0; i < BPF_REG_FP; i++)
5167
- cur_regs(env)[i].parent = &new->frame[new->curframe]->regs[i];
9684
+ new->insn_idx = insn_idx;
9685
+ WARN_ONCE(new->branches != 1,
9686
+ "BUG is_state_visited:branches_to_explore=%d insn %d\n", new->branches, insn_idx);
9687
+
9688
+ cur->parent = new;
9689
+ cur->first_insn_idx = insn_idx;
9690
+ clear_jmp_history(cur);
9691
+ new_sl->next = *explored_state(env, insn_idx);
9692
+ *explored_state(env, insn_idx) = new_sl;
9693
+ /* connect new state to parentage chain. Current frame needs all
9694
+ * registers connected. Only r6 - r9 of the callers are alive (pushed
9695
+ * to the stack implicitly by JITs) so in callers' frames connect just
9696
+ * r6 - r9 as an optimization. Callers will have r1 - r5 connected to
9697
+ * the state of the call instruction (with WRITTEN set), and r0 comes
9698
+ * from callee with its full parentage chain, anyway.
9699
+ */
51689700 /* clear write marks in current state: the writes we did are not writes
51699701 * our child did, so they don't screen off its reads from us.
51709702 * (There are no read marks in current state, because reads always mark
51719703 * their parent and current state never has children yet. Only
51729704 * explored_states can get read marks.)
51739705 */
5174
- for (i = 0; i < BPF_REG_FP; i++)
5175
- cur->frame[cur->curframe]->regs[i].live = REG_LIVE_NONE;
9706
+ for (j = 0; j <= cur->curframe; j++) {
9707
+ for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++)
9708
+ cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i];
9709
+ for (i = 0; i < BPF_REG_FP; i++)
9710
+ cur->frame[j]->regs[i].live = REG_LIVE_NONE;
9711
+ }
51769712
51779713 /* all stack frames are accessible from callee, clear them all */
51789714 for (j = 0; j <= cur->curframe; j++) {
....@@ -5188,36 +9724,60 @@
51889724 return 0;
51899725 }
51909726
9727
+/* Return true if it's OK to have the same insn return a different type. */
9728
+static bool reg_type_mismatch_ok(enum bpf_reg_type type)
9729
+{
9730
+ switch (type) {
9731
+ case PTR_TO_CTX:
9732
+ case PTR_TO_SOCKET:
9733
+ case PTR_TO_SOCKET_OR_NULL:
9734
+ case PTR_TO_SOCK_COMMON:
9735
+ case PTR_TO_SOCK_COMMON_OR_NULL:
9736
+ case PTR_TO_TCP_SOCK:
9737
+ case PTR_TO_TCP_SOCK_OR_NULL:
9738
+ case PTR_TO_XDP_SOCK:
9739
+ case PTR_TO_BTF_ID:
9740
+ case PTR_TO_BTF_ID_OR_NULL:
9741
+ return false;
9742
+ default:
9743
+ return true;
9744
+ }
9745
+}
9746
+
9747
+/* If an instruction was previously used with particular pointer types, then we
9748
+ * need to be careful to avoid cases such as the below, where it may be ok
9749
+ * for one branch accessing the pointer, but not ok for the other branch:
9750
+ *
9751
+ * R1 = sock_ptr
9752
+ * goto X;
9753
+ * ...
9754
+ * R1 = some_other_valid_ptr;
9755
+ * goto X;
9756
+ * ...
9757
+ * R2 = *(u32 *)(R1 + 0);
9758
+ */
9759
+static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
9760
+{
9761
+ return src != prev && (!reg_type_mismatch_ok(src) ||
9762
+ !reg_type_mismatch_ok(prev));
9763
+}
9764
+
51919765 static int do_check(struct bpf_verifier_env *env)
51929766 {
5193
- struct bpf_verifier_state *state;
9767
+ bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
9768
+ struct bpf_verifier_state *state = env->cur_state;
51949769 struct bpf_insn *insns = env->prog->insnsi;
51959770 struct bpf_reg_state *regs;
5196
- int insn_cnt = env->prog->len, i;
5197
- int insn_processed = 0;
9771
+ int insn_cnt = env->prog->len;
51989772 bool do_print_state = false;
5199
-
5200
- state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
5201
- if (!state)
5202
- return -ENOMEM;
5203
- state->curframe = 0;
5204
- state->speculative = false;
5205
- state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
5206
- if (!state->frame[0]) {
5207
- kfree(state);
5208
- return -ENOMEM;
5209
- }
5210
- env->cur_state = state;
5211
- init_func_state(env, state->frame[0],
5212
- BPF_MAIN_FUNC /* callsite */,
5213
- 0 /* frameno */,
5214
- 0 /* subprogno, zero == main subprog */);
9773
+ int prev_insn_idx = -1;
52159774
52169775 for (;;) {
52179776 struct bpf_insn *insn;
52189777 u8 class;
52199778 int err;
52209779
9780
+ env->prev_insn_idx = prev_insn_idx;
52219781 if (env->insn_idx >= insn_cnt) {
52229782 verbose(env, "invalid insn idx %d insn_cnt %d\n",
52239783 env->insn_idx, insn_cnt);
....@@ -5227,10 +9787,10 @@
52279787 insn = &insns[env->insn_idx];
52289788 class = BPF_CLASS(insn->code);
52299789
5230
- if (++insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
9790
+ if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
52319791 verbose(env,
52329792 "BPF program is too large. Processed %d insn\n",
5233
- insn_processed);
9793
+ env->insn_processed);
52349794 return -E2BIG;
52359795 }
52369796
....@@ -5239,7 +9799,7 @@
52399799 return err;
52409800 if (err == 1) {
52419801 /* found equivalent state, can prune the search */
5242
- if (env->log.level) {
9802
+ if (env->log.level & BPF_LOG_LEVEL) {
52439803 if (do_print_state)
52449804 verbose(env, "\nfrom %d to %d%s: safe\n",
52459805 env->prev_insn_idx, env->insn_idx,
....@@ -5257,8 +9817,9 @@
52579817 if (need_resched())
52589818 cond_resched();
52599819
5260
- if (env->log.level > 1 || (env->log.level && do_print_state)) {
5261
- if (env->log.level > 1)
9820
+ if (env->log.level & BPF_LOG_LEVEL2 ||
9821
+ (env->log.level & BPF_LOG_LEVEL && do_print_state)) {
9822
+ if (env->log.level & BPF_LOG_LEVEL2)
52629823 verbose(env, "%d:", env->insn_idx);
52639824 else
52649825 verbose(env, "\nfrom %d to %d%s:",
....@@ -5269,12 +9830,13 @@
52699830 do_print_state = false;
52709831 }
52719832
5272
- if (env->log.level) {
9833
+ if (env->log.level & BPF_LOG_LEVEL) {
52739834 const struct bpf_insn_cbs cbs = {
52749835 .cb_print = verbose,
52759836 .private_data = env,
52769837 };
52779838
9839
+ verbose_linfo(env, env->insn_idx, "; ");
52789840 verbose(env, "%d: ", env->insn_idx);
52799841 print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
52809842 }
....@@ -5288,6 +9850,7 @@
52889850
52899851 regs = cur_regs(env);
52909852 sanitize_mark_insn_seen(env);
9853
+ prev_insn_idx = env->insn_idx;
52919854
52929855 if (class == BPF_ALU || class == BPF_ALU64) {
52939856 err = check_alu_op(env, insn);
....@@ -5328,9 +9891,7 @@
53289891 */
53299892 *prev_src_type = src_reg_type;
53309893
5331
- } else if (src_reg_type != *prev_src_type &&
5332
- (src_reg_type == PTR_TO_CTX ||
5333
- *prev_src_type == PTR_TO_CTX)) {
9894
+ } else if (reg_type_mismatch(src_reg_type, *prev_src_type)) {
53349895 /* ABuser program is trying to use the same insn
53359896 * dst_reg = *(u32*) (src_reg + off)
53369897 * with different pointer types:
....@@ -5375,9 +9936,7 @@
53759936
53769937 if (*prev_dst_type == NOT_INIT) {
53779938 *prev_dst_type = dst_reg_type;
5378
- } else if (dst_reg_type != *prev_dst_type &&
5379
- (dst_reg_type == PTR_TO_CTX ||
5380
- *prev_dst_type == PTR_TO_CTX)) {
9939
+ } else if (reg_type_mismatch(dst_reg_type, *prev_dst_type)) {
53819940 verbose(env, "same insn cannot be used with different pointers\n");
53829941 return -EINVAL;
53839942 }
....@@ -5394,8 +9953,9 @@
53949953 return err;
53959954
53969955 if (is_ctx_reg(env, insn->dst_reg)) {
5397
- verbose(env, "BPF_ST stores into R%d context is not allowed\n",
5398
- insn->dst_reg);
9956
+ verbose(env, "BPF_ST stores into R%d %s is not allowed\n",
9957
+ insn->dst_reg,
9958
+ reg_type_str[reg_state(env, insn->dst_reg)->type]);
53999959 return -EACCES;
54009960 }
54019961
....@@ -5406,19 +9966,27 @@
54069966 if (err)
54079967 return err;
54089968
5409
- } else if (class == BPF_JMP) {
9969
+ } else if (class == BPF_JMP || class == BPF_JMP32) {
54109970 u8 opcode = BPF_OP(insn->code);
54119971
9972
+ env->jmps_processed++;
54129973 if (opcode == BPF_CALL) {
54139974 if (BPF_SRC(insn->code) != BPF_K ||
54149975 insn->off != 0 ||
54159976 (insn->src_reg != BPF_REG_0 &&
54169977 insn->src_reg != BPF_PSEUDO_CALL) ||
5417
- insn->dst_reg != BPF_REG_0) {
9978
+ insn->dst_reg != BPF_REG_0 ||
9979
+ class == BPF_JMP32) {
54189980 verbose(env, "BPF_CALL uses reserved fields\n");
54199981 return -EINVAL;
54209982 }
54219983
9984
+ if (env->cur_state->active_spin_lock &&
9985
+ (insn->src_reg == BPF_PSEUDO_CALL ||
9986
+ insn->imm != BPF_FUNC_spin_unlock)) {
9987
+ verbose(env, "function calls are not allowed while holding a lock\n");
9988
+ return -EINVAL;
9989
+ }
54229990 if (insn->src_reg == BPF_PSEUDO_CALL)
54239991 err = check_func_call(env, insn, &env->insn_idx);
54249992 else
....@@ -5430,7 +9998,8 @@
54309998 if (BPF_SRC(insn->code) != BPF_K ||
54319999 insn->imm != 0 ||
543210000 insn->src_reg != BPF_REG_0 ||
5433
- insn->dst_reg != BPF_REG_0) {
10001
+ insn->dst_reg != BPF_REG_0 ||
10002
+ class == BPF_JMP32) {
543410003 verbose(env, "BPF_JA uses reserved fields\n");
543510004 return -EINVAL;
543610005 }
....@@ -5442,14 +10011,19 @@
544210011 if (BPF_SRC(insn->code) != BPF_K ||
544310012 insn->imm != 0 ||
544410013 insn->src_reg != BPF_REG_0 ||
5445
- insn->dst_reg != BPF_REG_0) {
10014
+ insn->dst_reg != BPF_REG_0 ||
10015
+ class == BPF_JMP32) {
544610016 verbose(env, "BPF_EXIT uses reserved fields\n");
10017
+ return -EINVAL;
10018
+ }
10019
+
10020
+ if (env->cur_state->active_spin_lock) {
10021
+ verbose(env, "bpf_spin_unlock is missing\n");
544710022 return -EINVAL;
544810023 }
544910024
545010025 if (state->curframe) {
545110026 /* exit from nested function */
5452
- env->prev_insn_idx = env->insn_idx;
545310027 err = prepare_func_exit(env, &env->insn_idx);
545410028 if (err)
545510029 return err;
....@@ -5457,27 +10031,17 @@
545710031 continue;
545810032 }
545910033
5460
- /* eBPF calling convetion is such that R0 is used
5461
- * to return the value from eBPF program.
5462
- * Make sure that it's readable at this time
5463
- * of bpf_exit, which means that program wrote
5464
- * something into it earlier
5465
- */
5466
- err = check_reg_arg(env, BPF_REG_0, SRC_OP);
10034
+ err = check_reference_leak(env);
546710035 if (err)
546810036 return err;
5469
-
5470
- if (is_pointer_value(env, BPF_REG_0)) {
5471
- verbose(env, "R0 leaks addr as return value\n");
5472
- return -EACCES;
5473
- }
547410037
547510038 err = check_return_code(env);
547610039 if (err)
547710040 return err;
547810041 process_bpf_exit:
5479
- err = pop_stack(env, &env->prev_insn_idx,
5480
- &env->insn_idx);
10042
+ update_branch_counts(env, env->cur_state);
10043
+ err = pop_stack(env, &prev_insn_idx,
10044
+ &env->insn_idx, pop_log);
548110045 if (err < 0) {
548210046 if (err != -ENOENT)
548310047 return err;
....@@ -5518,17 +10082,93 @@
551810082 env->insn_idx++;
551910083 }
552010084
5521
- verbose(env, "processed %d insns (limit %d), stack depth ",
5522
- insn_processed, BPF_COMPLEXITY_LIMIT_INSNS);
5523
- for (i = 0; i < env->subprog_cnt; i++) {
5524
- u32 depth = env->subprog_info[i].stack_depth;
10085
+ return 0;
10086
+}
552510087
5526
- verbose(env, "%d", depth);
5527
- if (i + 1 < env->subprog_cnt)
5528
- verbose(env, "+");
10088
+/* replace pseudo btf_id with kernel symbol address */
10089
+static int check_pseudo_btf_id(struct bpf_verifier_env *env,
10090
+ struct bpf_insn *insn,
10091
+ struct bpf_insn_aux_data *aux)
10092
+{
10093
+ const struct btf_var_secinfo *vsi;
10094
+ const struct btf_type *datasec;
10095
+ const struct btf_type *t;
10096
+ const char *sym_name;
10097
+ bool percpu = false;
10098
+ u32 type, id = insn->imm;
10099
+ s32 datasec_id;
10100
+ u64 addr;
10101
+ int i;
10102
+
10103
+ if (!btf_vmlinux) {
10104
+ verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
10105
+ return -EINVAL;
552910106 }
5530
- verbose(env, "\n");
5531
- env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
10107
+
10108
+ if (insn[1].imm != 0) {
10109
+ verbose(env, "reserved field (insn[1].imm) is used in pseudo_btf_id ldimm64 insn.\n");
10110
+ return -EINVAL;
10111
+ }
10112
+
10113
+ t = btf_type_by_id(btf_vmlinux, id);
10114
+ if (!t) {
10115
+ verbose(env, "ldimm64 insn specifies invalid btf_id %d.\n", id);
10116
+ return -ENOENT;
10117
+ }
10118
+
10119
+ if (!btf_type_is_var(t)) {
10120
+ verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR.\n",
10121
+ id);
10122
+ return -EINVAL;
10123
+ }
10124
+
10125
+ sym_name = btf_name_by_offset(btf_vmlinux, t->name_off);
10126
+ addr = kallsyms_lookup_name(sym_name);
10127
+ if (!addr) {
10128
+ verbose(env, "ldimm64 failed to find the address for kernel symbol '%s'.\n",
10129
+ sym_name);
10130
+ return -ENOENT;
10131
+ }
10132
+
10133
+ datasec_id = btf_find_by_name_kind(btf_vmlinux, ".data..percpu",
10134
+ BTF_KIND_DATASEC);
10135
+ if (datasec_id > 0) {
10136
+ datasec = btf_type_by_id(btf_vmlinux, datasec_id);
10137
+ for_each_vsi(i, datasec, vsi) {
10138
+ if (vsi->type == id) {
10139
+ percpu = true;
10140
+ break;
10141
+ }
10142
+ }
10143
+ }
10144
+
10145
+ insn[0].imm = (u32)addr;
10146
+ insn[1].imm = addr >> 32;
10147
+
10148
+ type = t->type;
10149
+ t = btf_type_skip_modifiers(btf_vmlinux, type, NULL);
10150
+ if (percpu) {
10151
+ aux->btf_var.reg_type = PTR_TO_PERCPU_BTF_ID;
10152
+ aux->btf_var.btf_id = type;
10153
+ } else if (!btf_type_is_struct(t)) {
10154
+ const struct btf_type *ret;
10155
+ const char *tname;
10156
+ u32 tsize;
10157
+
10158
+ /* resolve the type size of ksym. */
10159
+ ret = btf_resolve_size(btf_vmlinux, t, &tsize);
10160
+ if (IS_ERR(ret)) {
10161
+ tname = btf_name_by_offset(btf_vmlinux, t->name_off);
10162
+ verbose(env, "ldimm64 unable to resolve the size of type '%s': %ld\n",
10163
+ tname, PTR_ERR(ret));
10164
+ return -EINVAL;
10165
+ }
10166
+ aux->btf_var.reg_type = PTR_TO_MEM;
10167
+ aux->btf_var.mem_size = tsize;
10168
+ } else {
10169
+ aux->btf_var.reg_type = PTR_TO_BTF_ID;
10170
+ aux->btf_var.btf_id = type;
10171
+ }
553210172 return 0;
553310173 }
553410174
....@@ -5540,26 +10180,69 @@
554010180 !(map->map_flags & BPF_F_NO_PREALLOC);
554110181 }
554210182
10183
+static bool is_tracing_prog_type(enum bpf_prog_type type)
10184
+{
10185
+ switch (type) {
10186
+ case BPF_PROG_TYPE_KPROBE:
10187
+ case BPF_PROG_TYPE_TRACEPOINT:
10188
+ case BPF_PROG_TYPE_PERF_EVENT:
10189
+ case BPF_PROG_TYPE_RAW_TRACEPOINT:
10190
+ return true;
10191
+ default:
10192
+ return false;
10193
+ }
10194
+}
10195
+
10196
+static bool is_preallocated_map(struct bpf_map *map)
10197
+{
10198
+ if (!check_map_prealloc(map))
10199
+ return false;
10200
+ if (map->inner_map_meta && !check_map_prealloc(map->inner_map_meta))
10201
+ return false;
10202
+ return true;
10203
+}
10204
+
554310205 static int check_map_prog_compatibility(struct bpf_verifier_env *env,
554410206 struct bpf_map *map,
554510207 struct bpf_prog *prog)
554610208
554710209 {
5548
- /* Make sure that BPF_PROG_TYPE_PERF_EVENT programs only use
5549
- * preallocated hash maps, since doing memory allocation
5550
- * in overflow_handler can crash depending on where nmi got
5551
- * triggered.
10210
+ enum bpf_prog_type prog_type = resolve_prog_type(prog);
10211
+ /*
10212
+ * Validate that trace type programs use preallocated hash maps.
10213
+ *
10214
+ * For programs attached to PERF events this is mandatory as the
10215
+ * perf NMI can hit any arbitrary code sequence.
10216
+ *
10217
+ * All other trace types using preallocated hash maps are unsafe as
10218
+ * well because tracepoint or kprobes can be inside locked regions
10219
+ * of the memory allocator or at a place where a recursion into the
10220
+ * memory allocator would see inconsistent state.
10221
+ *
10222
+ * On RT enabled kernels run-time allocation of all trace type
10223
+ * programs is strictly prohibited due to lock type constraints. On
10224
+ * !RT kernels it is allowed for backwards compatibility reasons for
10225
+ * now, but warnings are emitted so developers are made aware of
10226
+ * the unsafety and can fix their programs before this is enforced.
555210227 */
5553
- if (prog->type == BPF_PROG_TYPE_PERF_EVENT) {
5554
- if (!check_map_prealloc(map)) {
10228
+ if (is_tracing_prog_type(prog_type) && !is_preallocated_map(map)) {
10229
+ if (prog_type == BPF_PROG_TYPE_PERF_EVENT) {
555510230 verbose(env, "perf_event programs can only use preallocated hash map\n");
555610231 return -EINVAL;
555710232 }
5558
- if (map->inner_map_meta &&
5559
- !check_map_prealloc(map->inner_map_meta)) {
5560
- verbose(env, "perf_event programs can only use preallocated inner hash map\n");
10233
+ if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
10234
+ verbose(env, "trace type programs can only use preallocated hash map\n");
556110235 return -EINVAL;
556210236 }
10237
+ WARN_ONCE(1, "trace type BPF program uses run-time allocation\n");
10238
+ verbose(env, "trace type programs with run-time allocated hash maps are unsafe. Switch to preallocated hash maps.\n");
10239
+ }
10240
+
10241
+ if ((is_tracing_prog_type(prog_type) ||
10242
+ prog_type == BPF_PROG_TYPE_SOCKET_FILTER) &&
10243
+ map_value_has_spin_lock(map)) {
10244
+ verbose(env, "tracing progs cannot use bpf_spin_lock yet\n");
10245
+ return -EINVAL;
556310246 }
556410247
556510248 if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) &&
....@@ -5568,13 +10251,45 @@
556810251 return -EINVAL;
556910252 }
557010253
10254
+ if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
10255
+ verbose(env, "bpf_struct_ops map cannot be used in prog\n");
10256
+ return -EINVAL;
10257
+ }
10258
+
10259
+ if (prog->aux->sleepable)
10260
+ switch (map->map_type) {
10261
+ case BPF_MAP_TYPE_HASH:
10262
+ case BPF_MAP_TYPE_LRU_HASH:
10263
+ case BPF_MAP_TYPE_ARRAY:
10264
+ if (!is_preallocated_map(map)) {
10265
+ verbose(env,
10266
+ "Sleepable programs can only use preallocated hash maps\n");
10267
+ return -EINVAL;
10268
+ }
10269
+ break;
10270
+ default:
10271
+ verbose(env,
10272
+ "Sleepable programs can only use array and hash maps\n");
10273
+ return -EINVAL;
10274
+ }
10275
+
557110276 return 0;
557210277 }
557310278
5574
-/* look for pseudo eBPF instructions that access map FDs and
5575
- * replace them with actual map pointers
10279
+static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
10280
+{
10281
+ return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
10282
+ map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
10283
+}
10284
+
10285
+/* find and rewrite pseudo imm in ld_imm64 instructions:
10286
+ *
10287
+ * 1. if it accesses map FD, replace it with actual map pointer.
10288
+ * 2. if it accesses btf_id of a VAR, replace it with pointer to the var.
10289
+ *
10290
+ * NOTE: btf_vmlinux is required for converting pseudo btf_id.
557610291 */
5577
-static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
10292
+static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
557810293 {
557910294 struct bpf_insn *insn = env->prog->insnsi;
558010295 int insn_cnt = env->prog->len;
....@@ -5599,8 +10314,10 @@
559910314 }
560010315
560110316 if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
10317
+ struct bpf_insn_aux_data *aux;
560210318 struct bpf_map *map;
560310319 struct fd f;
10320
+ u64 addr;
560410321
560510322 if (i == insn_cnt - 1 || insn[1].code != 0 ||
560610323 insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
....@@ -5609,21 +10326,35 @@
560910326 return -EINVAL;
561010327 }
561110328
5612
- if (insn->src_reg == 0)
10329
+ if (insn[0].src_reg == 0)
561310330 /* valid generic load 64-bit imm */
561410331 goto next_insn;
561510332
5616
- if (insn->src_reg != BPF_PSEUDO_MAP_FD) {
10333
+ if (insn[0].src_reg == BPF_PSEUDO_BTF_ID) {
10334
+ aux = &env->insn_aux_data[i];
10335
+ err = check_pseudo_btf_id(env, insn, aux);
10336
+ if (err)
10337
+ return err;
10338
+ goto next_insn;
10339
+ }
10340
+
10341
+ /* In final convert_pseudo_ld_imm64() step, this is
10342
+ * converted into regular 64-bit imm load insn.
10343
+ */
10344
+ if ((insn[0].src_reg != BPF_PSEUDO_MAP_FD &&
10345
+ insn[0].src_reg != BPF_PSEUDO_MAP_VALUE) ||
10346
+ (insn[0].src_reg == BPF_PSEUDO_MAP_FD &&
10347
+ insn[1].imm != 0)) {
561710348 verbose(env,
561810349 "unrecognized bpf_ld_imm64 insn\n");
561910350 return -EINVAL;
562010351 }
562110352
5622
- f = fdget(insn->imm);
10353
+ f = fdget(insn[0].imm);
562310354 map = __bpf_map_get(f);
562410355 if (IS_ERR(map)) {
562510356 verbose(env, "fd %d is not pointing to valid bpf_map\n",
5626
- insn->imm);
10357
+ insn[0].imm);
562710358 return PTR_ERR(map);
562810359 }
562910360
....@@ -5633,16 +10364,47 @@
563310364 return err;
563410365 }
563510366
5636
- /* store map pointer inside BPF_LD_IMM64 instruction */
5637
- insn[0].imm = (u32) (unsigned long) map;
5638
- insn[1].imm = ((u64) (unsigned long) map) >> 32;
10367
+ aux = &env->insn_aux_data[i];
10368
+ if (insn->src_reg == BPF_PSEUDO_MAP_FD) {
10369
+ addr = (unsigned long)map;
10370
+ } else {
10371
+ u32 off = insn[1].imm;
10372
+
10373
+ if (off >= BPF_MAX_VAR_OFF) {
10374
+ verbose(env, "direct value offset of %u is not allowed\n", off);
10375
+ fdput(f);
10376
+ return -EINVAL;
10377
+ }
10378
+
10379
+ if (!map->ops->map_direct_value_addr) {
10380
+ verbose(env, "no direct value access support for this map type\n");
10381
+ fdput(f);
10382
+ return -EINVAL;
10383
+ }
10384
+
10385
+ err = map->ops->map_direct_value_addr(map, &addr, off);
10386
+ if (err) {
10387
+ verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n",
10388
+ map->value_size, off);
10389
+ fdput(f);
10390
+ return err;
10391
+ }
10392
+
10393
+ aux->map_off = off;
10394
+ addr += off;
10395
+ }
10396
+
10397
+ insn[0].imm = (u32)addr;
10398
+ insn[1].imm = addr >> 32;
563910399
564010400 /* check whether we recorded this map already */
5641
- for (j = 0; j < env->used_map_cnt; j++)
10401
+ for (j = 0; j < env->used_map_cnt; j++) {
564210402 if (env->used_maps[j] == map) {
10403
+ aux->map_index = j;
564310404 fdput(f);
564410405 goto next_insn;
564510406 }
10407
+ }
564610408
564710409 if (env->used_map_cnt >= MAX_USED_MAPS) {
564810410 fdput(f);
....@@ -5654,17 +10416,14 @@
565410416 * will be used by the valid program until it's unloaded
565510417 * and all maps are released in free_used_maps()
565610418 */
5657
- map = bpf_map_inc(map, false);
5658
- if (IS_ERR(map)) {
5659
- fdput(f);
5660
- return PTR_ERR(map);
5661
- }
10419
+ bpf_map_inc(map);
10420
+
10421
+ aux->map_index = env->used_map_cnt;
566210422 env->used_maps[env->used_map_cnt++] = map;
566310423
5664
- if (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE &&
5665
- bpf_cgroup_storage_assign(env->prog, map)) {
5666
- verbose(env,
5667
- "only one cgroup storage is allowed\n");
10424
+ if (bpf_map_is_cgroup_storage(map) &&
10425
+ bpf_cgroup_storage_assign(env->prog->aux, map)) {
10426
+ verbose(env, "only one cgroup storage of each type is allowed\n");
566810427 fdput(f);
566910428 return -EBUSY;
567010429 }
....@@ -5693,14 +10452,8 @@
569310452 /* drop refcnt of maps used by the rejected program */
569410453 static void release_maps(struct bpf_verifier_env *env)
569510454 {
5696
- int i;
5697
-
5698
- if (env->prog->aux->cgroup_storage)
5699
- bpf_cgroup_storage_release(env->prog,
5700
- env->prog->aux->cgroup_storage);
5701
-
5702
- for (i = 0; i < env->used_map_cnt; i++)
5703
- bpf_map_put(env->used_maps[i]);
10455
+ __bpf_free_used_maps(env->prog->aux, env->used_maps,
10456
+ env->used_map_cnt);
570410457 }
570510458
570610459 /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
....@@ -5719,29 +10472,36 @@
571910472 * insni[off, off + cnt). Adjust corresponding insn_aux_data by copying
572010473 * [0, off) and [off, end) to new locations, so the patched range stays zero
572110474 */
5722
-static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len,
5723
- u32 off, u32 cnt)
10475
+static void adjust_insn_aux_data(struct bpf_verifier_env *env,
10476
+ struct bpf_insn_aux_data *new_data,
10477
+ struct bpf_prog *new_prog, u32 off, u32 cnt)
572410478 {
5725
- struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data;
5726
- bool old_seen = old_data[off].seen;
10479
+ struct bpf_insn_aux_data *old_data = env->insn_aux_data;
10480
+ struct bpf_insn *insn = new_prog->insnsi;
10481
+ u32 old_seen = old_data[off].seen;
10482
+ u32 prog_len;
572710483 int i;
572810484
10485
+ /* aux info at OFF always needs adjustment, no matter fast path
10486
+ * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the
10487
+ * original insn at old prog.
10488
+ */
10489
+ old_data[off].zext_dst = insn_has_def32(env, insn + off + cnt - 1);
10490
+
572910491 if (cnt == 1)
5730
- return 0;
5731
- new_data = vzalloc(array_size(prog_len,
5732
- sizeof(struct bpf_insn_aux_data)));
5733
- if (!new_data)
5734
- return -ENOMEM;
10492
+ return;
10493
+ prog_len = new_prog->len;
10494
+
573510495 memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
573610496 memcpy(new_data + off + cnt - 1, old_data + off,
573710497 sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
573810498 for (i = off; i < off + cnt - 1; i++) {
573910499 /* Expand insni[off]'s seen count to the patched range. */
574010500 new_data[i].seen = old_seen;
10501
+ new_data[i].zext_dst = insn_has_def32(env, insn + i);
574110502 }
574210503 env->insn_aux_data = new_data;
574310504 vfree(old_data);
5744
- return 0;
574510505 }
574610506
574710507 static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
....@@ -5758,18 +10518,193 @@
575810518 }
575910519 }
576010520
10521
+static void adjust_poke_descs(struct bpf_prog *prog, u32 off, u32 len)
10522
+{
10523
+ struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab;
10524
+ int i, sz = prog->aux->size_poke_tab;
10525
+ struct bpf_jit_poke_descriptor *desc;
10526
+
10527
+ for (i = 0; i < sz; i++) {
10528
+ desc = &tab[i];
10529
+ if (desc->insn_idx <= off)
10530
+ continue;
10531
+ desc->insn_idx += len - 1;
10532
+ }
10533
+}
10534
+
576110535 static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
576210536 const struct bpf_insn *patch, u32 len)
576310537 {
576410538 struct bpf_prog *new_prog;
10539
+ struct bpf_insn_aux_data *new_data = NULL;
10540
+
10541
+ if (len > 1) {
10542
+ new_data = vzalloc(array_size(env->prog->len + len - 1,
10543
+ sizeof(struct bpf_insn_aux_data)));
10544
+ if (!new_data)
10545
+ return NULL;
10546
+ }
576510547
576610548 new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
5767
- if (!new_prog)
10549
+ if (IS_ERR(new_prog)) {
10550
+ if (PTR_ERR(new_prog) == -ERANGE)
10551
+ verbose(env,
10552
+ "insn %d cannot be patched due to 16-bit range\n",
10553
+ env->insn_aux_data[off].orig_idx);
10554
+ vfree(new_data);
576810555 return NULL;
5769
- if (adjust_insn_aux_data(env, new_prog->len, off, len))
5770
- return NULL;
10556
+ }
10557
+ adjust_insn_aux_data(env, new_data, new_prog, off, len);
577110558 adjust_subprog_starts(env, off, len);
10559
+ adjust_poke_descs(new_prog, off, len);
577210560 return new_prog;
10561
+}
10562
+
10563
+static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
10564
+ u32 off, u32 cnt)
10565
+{
10566
+ int i, j;
10567
+
10568
+ /* find first prog starting at or after off (first to remove) */
10569
+ for (i = 0; i < env->subprog_cnt; i++)
10570
+ if (env->subprog_info[i].start >= off)
10571
+ break;
10572
+ /* find first prog starting at or after off + cnt (first to stay) */
10573
+ for (j = i; j < env->subprog_cnt; j++)
10574
+ if (env->subprog_info[j].start >= off + cnt)
10575
+ break;
10576
+ /* if j doesn't start exactly at off + cnt, we are just removing
10577
+ * the front of previous prog
10578
+ */
10579
+ if (env->subprog_info[j].start != off + cnt)
10580
+ j--;
10581
+
10582
+ if (j > i) {
10583
+ struct bpf_prog_aux *aux = env->prog->aux;
10584
+ int move;
10585
+
10586
+ /* move fake 'exit' subprog as well */
10587
+ move = env->subprog_cnt + 1 - j;
10588
+
10589
+ memmove(env->subprog_info + i,
10590
+ env->subprog_info + j,
10591
+ sizeof(*env->subprog_info) * move);
10592
+ env->subprog_cnt -= j - i;
10593
+
10594
+ /* remove func_info */
10595
+ if (aux->func_info) {
10596
+ move = aux->func_info_cnt - j;
10597
+
10598
+ memmove(aux->func_info + i,
10599
+ aux->func_info + j,
10600
+ sizeof(*aux->func_info) * move);
10601
+ aux->func_info_cnt -= j - i;
10602
+ /* func_info->insn_off is set after all code rewrites,
10603
+ * in adjust_btf_func() - no need to adjust
10604
+ */
10605
+ }
10606
+ } else {
10607
+ /* convert i from "first prog to remove" to "first to adjust" */
10608
+ if (env->subprog_info[i].start == off)
10609
+ i++;
10610
+ }
10611
+
10612
+ /* update fake 'exit' subprog as well */
10613
+ for (; i <= env->subprog_cnt; i++)
10614
+ env->subprog_info[i].start -= cnt;
10615
+
10616
+ return 0;
10617
+}
10618
+
10619
+static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
10620
+ u32 cnt)
10621
+{
10622
+ struct bpf_prog *prog = env->prog;
10623
+ u32 i, l_off, l_cnt, nr_linfo;
10624
+ struct bpf_line_info *linfo;
10625
+
10626
+ nr_linfo = prog->aux->nr_linfo;
10627
+ if (!nr_linfo)
10628
+ return 0;
10629
+
10630
+ linfo = prog->aux->linfo;
10631
+
10632
+ /* find first line info to remove, count lines to be removed */
10633
+ for (i = 0; i < nr_linfo; i++)
10634
+ if (linfo[i].insn_off >= off)
10635
+ break;
10636
+
10637
+ l_off = i;
10638
+ l_cnt = 0;
10639
+ for (; i < nr_linfo; i++)
10640
+ if (linfo[i].insn_off < off + cnt)
10641
+ l_cnt++;
10642
+ else
10643
+ break;
10644
+
10645
+ /* First live insn doesn't match first live linfo, it needs to "inherit"
10646
+ * last removed linfo. prog is already modified, so prog->len == off
10647
+ * means no live instructions after (tail of the program was removed).
10648
+ */
10649
+ if (prog->len != off && l_cnt &&
10650
+ (i == nr_linfo || linfo[i].insn_off != off + cnt)) {
10651
+ l_cnt--;
10652
+ linfo[--i].insn_off = off + cnt;
10653
+ }
10654
+
10655
+ /* remove the line info which refer to the removed instructions */
10656
+ if (l_cnt) {
10657
+ memmove(linfo + l_off, linfo + i,
10658
+ sizeof(*linfo) * (nr_linfo - i));
10659
+
10660
+ prog->aux->nr_linfo -= l_cnt;
10661
+ nr_linfo = prog->aux->nr_linfo;
10662
+ }
10663
+
10664
+ /* pull all linfo[i].insn_off >= off + cnt in by cnt */
10665
+ for (i = l_off; i < nr_linfo; i++)
10666
+ linfo[i].insn_off -= cnt;
10667
+
10668
+ /* fix up all subprogs (incl. 'exit') which start >= off */
10669
+ for (i = 0; i <= env->subprog_cnt; i++)
10670
+ if (env->subprog_info[i].linfo_idx > l_off) {
10671
+ /* program may have started in the removed region but
10672
+ * may not be fully removed
10673
+ */
10674
+ if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
10675
+ env->subprog_info[i].linfo_idx -= l_cnt;
10676
+ else
10677
+ env->subprog_info[i].linfo_idx = l_off;
10678
+ }
10679
+
10680
+ return 0;
10681
+}
10682
+
10683
+static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
10684
+{
10685
+ struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
10686
+ unsigned int orig_prog_len = env->prog->len;
10687
+ int err;
10688
+
10689
+ if (bpf_prog_is_dev_bound(env->prog->aux))
10690
+ bpf_prog_offload_remove_insns(env, off, cnt);
10691
+
10692
+ err = bpf_remove_insns(env->prog, off, cnt);
10693
+ if (err)
10694
+ return err;
10695
+
10696
+ err = adjust_subprog_starts_after_remove(env, off, cnt);
10697
+ if (err)
10698
+ return err;
10699
+
10700
+ err = bpf_adj_linfo_after_remove(env, off, cnt);
10701
+ if (err)
10702
+ return err;
10703
+
10704
+ memmove(aux_data + off, aux_data + off + cnt,
10705
+ sizeof(*aux_data) * (orig_prog_len - off - cnt));
10706
+
10707
+ return 0;
577310708 }
577410709
577510710 /* The verifier does more data flow analysis than llvm and will not
....@@ -5795,11 +10730,177 @@
579510730 if (aux_data[i].seen)
579610731 continue;
579710732 memcpy(insn + i, &trap, sizeof(trap));
10733
+ aux_data[i].zext_dst = false;
579810734 }
579910735 }
580010736
5801
-/* convert load instructions that access fields of 'struct __sk_buff'
5802
- * into sequence of instructions that access fields of 'struct sk_buff'
10737
+static bool insn_is_cond_jump(u8 code)
10738
+{
10739
+ u8 op;
10740
+
10741
+ if (BPF_CLASS(code) == BPF_JMP32)
10742
+ return true;
10743
+
10744
+ if (BPF_CLASS(code) != BPF_JMP)
10745
+ return false;
10746
+
10747
+ op = BPF_OP(code);
10748
+ return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL;
10749
+}
10750
+
10751
+static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
10752
+{
10753
+ struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
10754
+ struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
10755
+ struct bpf_insn *insn = env->prog->insnsi;
10756
+ const int insn_cnt = env->prog->len;
10757
+ int i;
10758
+
10759
+ for (i = 0; i < insn_cnt; i++, insn++) {
10760
+ if (!insn_is_cond_jump(insn->code))
10761
+ continue;
10762
+
10763
+ if (!aux_data[i + 1].seen)
10764
+ ja.off = insn->off;
10765
+ else if (!aux_data[i + 1 + insn->off].seen)
10766
+ ja.off = 0;
10767
+ else
10768
+ continue;
10769
+
10770
+ if (bpf_prog_is_dev_bound(env->prog->aux))
10771
+ bpf_prog_offload_replace_insn(env, i, &ja);
10772
+
10773
+ memcpy(insn, &ja, sizeof(ja));
10774
+ }
10775
+}
10776
+
10777
+static int opt_remove_dead_code(struct bpf_verifier_env *env)
10778
+{
10779
+ struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
10780
+ int insn_cnt = env->prog->len;
10781
+ int i, err;
10782
+
10783
+ for (i = 0; i < insn_cnt; i++) {
10784
+ int j;
10785
+
10786
+ j = 0;
10787
+ while (i + j < insn_cnt && !aux_data[i + j].seen)
10788
+ j++;
10789
+ if (!j)
10790
+ continue;
10791
+
10792
+ err = verifier_remove_insns(env, i, j);
10793
+ if (err)
10794
+ return err;
10795
+ insn_cnt = env->prog->len;
10796
+ }
10797
+
10798
+ return 0;
10799
+}
10800
+
10801
+static int opt_remove_nops(struct bpf_verifier_env *env)
10802
+{
10803
+ const struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
10804
+ struct bpf_insn *insn = env->prog->insnsi;
10805
+ int insn_cnt = env->prog->len;
10806
+ int i, err;
10807
+
10808
+ for (i = 0; i < insn_cnt; i++) {
10809
+ if (memcmp(&insn[i], &ja, sizeof(ja)))
10810
+ continue;
10811
+
10812
+ err = verifier_remove_insns(env, i, 1);
10813
+ if (err)
10814
+ return err;
10815
+ insn_cnt--;
10816
+ i--;
10817
+ }
10818
+
10819
+ return 0;
10820
+}
10821
+
10822
+static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
10823
+ const union bpf_attr *attr)
10824
+{
10825
+ struct bpf_insn *patch, zext_patch[2], rnd_hi32_patch[4];
10826
+ struct bpf_insn_aux_data *aux = env->insn_aux_data;
10827
+ int i, patch_len, delta = 0, len = env->prog->len;
10828
+ struct bpf_insn *insns = env->prog->insnsi;
10829
+ struct bpf_prog *new_prog;
10830
+ bool rnd_hi32;
10831
+
10832
+ rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32;
10833
+ zext_patch[1] = BPF_ZEXT_REG(0);
10834
+ rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0);
10835
+ rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
10836
+ rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX);
10837
+ for (i = 0; i < len; i++) {
10838
+ int adj_idx = i + delta;
10839
+ struct bpf_insn insn;
10840
+
10841
+ insn = insns[adj_idx];
10842
+ if (!aux[adj_idx].zext_dst) {
10843
+ u8 code, class;
10844
+ u32 imm_rnd;
10845
+
10846
+ if (!rnd_hi32)
10847
+ continue;
10848
+
10849
+ code = insn.code;
10850
+ class = BPF_CLASS(code);
10851
+ if (insn_no_def(&insn))
10852
+ continue;
10853
+
10854
+ /* NOTE: arg "reg" (the fourth one) is only used for
10855
+ * BPF_STX which has been ruled out in above
10856
+ * check, it is safe to pass NULL here.
10857
+ */
10858
+ if (is_reg64(env, &insn, insn.dst_reg, NULL, DST_OP)) {
10859
+ if (class == BPF_LD &&
10860
+ BPF_MODE(code) == BPF_IMM)
10861
+ i++;
10862
+ continue;
10863
+ }
10864
+
10865
+ /* ctx load could be transformed into wider load. */
10866
+ if (class == BPF_LDX &&
10867
+ aux[adj_idx].ptr_type == PTR_TO_CTX)
10868
+ continue;
10869
+
10870
+ imm_rnd = get_random_int();
10871
+ rnd_hi32_patch[0] = insn;
10872
+ rnd_hi32_patch[1].imm = imm_rnd;
10873
+ rnd_hi32_patch[3].dst_reg = insn.dst_reg;
10874
+ patch = rnd_hi32_patch;
10875
+ patch_len = 4;
10876
+ goto apply_patch_buffer;
10877
+ }
10878
+
10879
+ if (!bpf_jit_needs_zext())
10880
+ continue;
10881
+
10882
+ zext_patch[0] = insn;
10883
+ zext_patch[1].dst_reg = insn.dst_reg;
10884
+ zext_patch[1].src_reg = insn.dst_reg;
10885
+ patch = zext_patch;
10886
+ patch_len = 2;
10887
+apply_patch_buffer:
10888
+ new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len);
10889
+ if (!new_prog)
10890
+ return -ENOMEM;
10891
+ env->prog = new_prog;
10892
+ insns = new_prog->insnsi;
10893
+ aux = env->insn_aux_data;
10894
+ delta += patch_len - 1;
10895
+ }
10896
+
10897
+ return 0;
10898
+}
10899
+
10900
+/* convert load instructions that access fields of a context type into a
10901
+ * sequence of instructions that access fields of the underlying structure:
10902
+ * struct __sk_buff -> struct sk_buff
10903
+ * struct bpf_sock_ops -> struct sock
580310904 */
580410905 static int convert_ctx_accesses(struct bpf_verifier_env *env)
580510906 {
....@@ -5812,7 +10913,11 @@
581210913 enum bpf_access_type type;
581310914 bool is_narrower_load;
581410915
5815
- if (ops->gen_prologue) {
10916
+ if (ops->gen_prologue || env->seen_direct_write) {
10917
+ if (!ops->gen_prologue) {
10918
+ verbose(env, "bpf verifier is misconfigured\n");
10919
+ return -EINVAL;
10920
+ }
581610921 cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
581710922 env->prog);
581810923 if (cnt >= ARRAY_SIZE(insn_buf)) {
....@@ -5828,12 +10933,13 @@
582810933 }
582910934 }
583010935
5831
- if (!ops->convert_ctx_access || bpf_prog_is_dev_bound(env->prog->aux))
10936
+ if (bpf_prog_is_dev_bound(env->prog->aux))
583210937 return 0;
583310938
583410939 insn = env->prog->insnsi + delta;
583510940
583610941 for (i = 0; i < insn_cnt; i++, insn++) {
10942
+ bpf_convert_ctx_access_t convert_ctx_access;
583710943 bool ctx_access;
583810944
583910945 if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
....@@ -5877,8 +10983,35 @@
587710983 if (!ctx_access)
587810984 continue;
587910985
5880
- if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX)
10986
+ switch (env->insn_aux_data[i + delta].ptr_type) {
10987
+ case PTR_TO_CTX:
10988
+ if (!ops->convert_ctx_access)
10989
+ continue;
10990
+ convert_ctx_access = ops->convert_ctx_access;
10991
+ break;
10992
+ case PTR_TO_SOCKET:
10993
+ case PTR_TO_SOCK_COMMON:
10994
+ convert_ctx_access = bpf_sock_convert_ctx_access;
10995
+ break;
10996
+ case PTR_TO_TCP_SOCK:
10997
+ convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
10998
+ break;
10999
+ case PTR_TO_XDP_SOCK:
11000
+ convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
11001
+ break;
11002
+ case PTR_TO_BTF_ID:
11003
+ if (type == BPF_READ) {
11004
+ insn->code = BPF_LDX | BPF_PROBE_MEM |
11005
+ BPF_SIZE((insn)->code);
11006
+ env->prog->aux->num_exentries++;
11007
+ } else if (resolve_prog_type(env->prog) != BPF_PROG_TYPE_STRUCT_OPS) {
11008
+ verbose(env, "Writes through BTF pointers are not allowed\n");
11009
+ return -EINVAL;
11010
+ }
588111011 continue;
11012
+ default:
11013
+ continue;
11014
+ }
588211015
588311016 ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
588411017 size = BPF_LDST_BYTES(insn);
....@@ -5910,8 +11043,8 @@
591011043 }
591111044
591211045 target_size = 0;
5913
- cnt = ops->convert_ctx_access(type, insn, insn_buf, env->prog,
5914
- &target_size);
11046
+ cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
11047
+ &target_size);
591511048 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
591611049 (ctx_field_size && !target_size)) {
591711050 verbose(env, "bpf verifier is misconfigured\n");
....@@ -5919,8 +11052,12 @@
591911052 }
592011053
592111054 if (is_narrower_load && size < target_size) {
5922
- u8 shift = (off & (size_default - 1)) * 8;
5923
-
11055
+ u8 shift = bpf_ctx_narrow_access_offset(
11056
+ off, size, size_default) * 8;
11057
+ if (shift && cnt + 1 >= ARRAY_SIZE(insn_buf)) {
11058
+ verbose(env, "bpf verifier narrow ctx load misconfigured\n");
11059
+ return -EINVAL;
11060
+ }
592411061 if (ctx_field_size <= 4) {
592511062 if (shift)
592611063 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
....@@ -5956,9 +11093,10 @@
595611093 {
595711094 struct bpf_prog *prog = env->prog, **func, *tmp;
595811095 int i, j, subprog_start, subprog_end = 0, len, subprog;
11096
+ struct bpf_map *map_ptr;
595911097 struct bpf_insn *insn;
596011098 void *old_bpf_func;
5961
- int err = -ENOMEM;
11099
+ int err, num_exentries;
596211100
596311101 if (env->subprog_cnt <= 1)
596411102 return 0;
....@@ -5989,6 +11127,11 @@
598911127 insn->imm = 1;
599011128 }
599111129
11130
+ err = bpf_prog_alloc_jited_linfo(prog);
11131
+ if (err)
11132
+ goto out_undo_insn;
11133
+
11134
+ err = -ENOMEM;
599211135 func = kcalloc(env->subprog_cnt, sizeof(prog), GFP_KERNEL);
599311136 if (!func)
599411137 goto out_undo_insn;
....@@ -5998,7 +11141,12 @@
599811141 subprog_end = env->subprog_info[i + 1].start;
599911142
600011143 len = subprog_end - subprog_start;
6001
- func[i] = bpf_prog_alloc(bpf_prog_size(len), GFP_USER);
11144
+ /* BPF_PROG_RUN doesn't call subprogs directly,
11145
+ * hence main prog stats include the runtime of subprogs.
11146
+ * subprogs don't have IDs and not reachable via prog_get_next_id
11147
+ * func[i]->aux->stats will never be accessed and stays NULL
11148
+ */
11149
+ func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
600211150 if (!func[i])
600311151 goto out_free;
600411152 memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
....@@ -6008,12 +11156,53 @@
600811156 if (bpf_prog_calc_tag(func[i]))
600911157 goto out_free;
601011158 func[i]->is_func = 1;
6011
- /* Use bpf_prog_F_tag to indicate functions in stack traces.
6012
- * Long term would need debug info to populate names
6013
- */
11159
+ func[i]->aux->func_idx = i;
11160
+ /* the btf and func_info will be freed only at prog->aux */
11161
+ func[i]->aux->btf = prog->aux->btf;
11162
+ func[i]->aux->func_info = prog->aux->func_info;
11163
+ func[i]->aux->func_info_cnt = prog->aux->func_info_cnt;
11164
+
11165
+ for (j = 0; j < prog->aux->size_poke_tab; j++) {
11166
+ u32 insn_idx = prog->aux->poke_tab[j].insn_idx;
11167
+ int ret;
11168
+
11169
+ if (!(insn_idx >= subprog_start &&
11170
+ insn_idx <= subprog_end))
11171
+ continue;
11172
+
11173
+ ret = bpf_jit_add_poke_descriptor(func[i],
11174
+ &prog->aux->poke_tab[j]);
11175
+ if (ret < 0) {
11176
+ verbose(env, "adding tail call poke descriptor failed\n");
11177
+ goto out_free;
11178
+ }
11179
+
11180
+ func[i]->insnsi[insn_idx - subprog_start].imm = ret + 1;
11181
+
11182
+ map_ptr = func[i]->aux->poke_tab[ret].tail_call.map;
11183
+ ret = map_ptr->ops->map_poke_track(map_ptr, func[i]->aux);
11184
+ if (ret < 0) {
11185
+ verbose(env, "tracking tail call prog failed\n");
11186
+ goto out_free;
11187
+ }
11188
+ }
11189
+
601411190 func[i]->aux->name[0] = 'F';
601511191 func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
601611192 func[i]->jit_requested = 1;
11193
+ func[i]->aux->linfo = prog->aux->linfo;
11194
+ func[i]->aux->nr_linfo = prog->aux->nr_linfo;
11195
+ func[i]->aux->jited_linfo = prog->aux->jited_linfo;
11196
+ func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx;
11197
+ num_exentries = 0;
11198
+ insn = func[i]->insnsi;
11199
+ for (j = 0; j < func[i]->len; j++, insn++) {
11200
+ if (BPF_CLASS(insn->code) == BPF_LDX &&
11201
+ BPF_MODE(insn->code) == BPF_PROBE_MEM)
11202
+ num_exentries++;
11203
+ }
11204
+ func[i]->aux->num_exentries = num_exentries;
11205
+ func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable;
601711206 func[i] = bpf_int_jit_compile(func[i]);
601811207 if (!func[i]->jited) {
601911208 err = -ENOTSUPP;
....@@ -6021,6 +11210,19 @@
602111210 }
602211211 cond_resched();
602311212 }
11213
+
11214
+ /* Untrack main program's aux structs so that during map_poke_run()
11215
+ * we will not stumble upon the unfilled poke descriptors; each
11216
+ * of the main program's poke descs got distributed across subprogs
11217
+ * and got tracked onto map, so we are sure that none of them will
11218
+ * be missed after the operation below
11219
+ */
11220
+ for (i = 0; i < prog->aux->size_poke_tab; i++) {
11221
+ map_ptr = prog->aux->poke_tab[i].tail_call.map;
11222
+
11223
+ map_ptr->ops->map_poke_untrack(map_ptr, prog->aux);
11224
+ }
11225
+
602411226 /* at this point all bpf functions were successfully JITed
602511227 * now populate all bpf_calls with correct addresses and
602611228 * run last pass of JIT
....@@ -6032,9 +11234,8 @@
603211234 insn->src_reg != BPF_PSEUDO_CALL)
603311235 continue;
603411236 subprog = insn->off;
6035
- insn->imm = (u64 (*)(u64, u64, u64, u64, u64))
6036
- func[subprog]->bpf_func -
6037
- __bpf_call_base;
11237
+ insn->imm = BPF_CAST_CALL(func[subprog]->bpf_func) -
11238
+ __bpf_call_base;
603811239 }
603911240
604011241 /* we use the aux data to keep a list of the start addresses
....@@ -6087,11 +11288,19 @@
608711288 prog->bpf_func = func[0]->bpf_func;
608811289 prog->aux->func = func;
608911290 prog->aux->func_cnt = env->subprog_cnt;
11291
+ bpf_prog_free_unused_jited_linfo(prog);
609011292 return 0;
609111293 out_free:
6092
- for (i = 0; i < env->subprog_cnt; i++)
6093
- if (func[i])
6094
- bpf_jit_free(func[i]);
11294
+ for (i = 0; i < env->subprog_cnt; i++) {
11295
+ if (!func[i])
11296
+ continue;
11297
+
11298
+ for (j = 0; j < func[i]->aux->size_poke_tab; j++) {
11299
+ map_ptr = func[i]->aux->poke_tab[j].tail_call.map;
11300
+ map_ptr->ops->map_poke_untrack(map_ptr, func[i]->aux);
11301
+ }
11302
+ bpf_jit_free(func[i]);
11303
+ }
609511304 kfree(func);
609611305 out_undo_insn:
609711306 /* cleanup main prog to be interpreted */
....@@ -6103,6 +11312,7 @@
610311312 insn->off = 0;
610411313 insn->imm = env->insn_aux_data[i].call_imm;
610511314 }
11315
+ bpf_prog_free_jited_linfo(prog);
610611316 return err;
610711317 }
610811318
....@@ -6113,10 +11323,10 @@
611311323 struct bpf_insn *insn = prog->insnsi;
611411324 int i, depth;
611511325 #endif
6116
- int err;
11326
+ int err = 0;
611711327
6118
- err = 0;
6119
- if (env->prog->jit_requested) {
11328
+ if (env->prog->jit_requested &&
11329
+ !bpf_prog_is_dev_bound(env->prog->aux)) {
612011330 err = jit_subprogs(env);
612111331 if (err == 0)
612211332 return 0;
....@@ -6124,6 +11334,13 @@
612411334 return err;
612511335 }
612611336 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
11337
+ if (env->subprog_cnt > 1 && env->prog->aux->tail_call_reachable) {
11338
+ /* When JIT fails the progs with bpf2bpf calls and tail_calls
11339
+ * have to be rejected, since interpreter doesn't support them yet.
11340
+ */
11341
+ verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
11342
+ return -EINVAL;
11343
+ }
612711344 for (i = 0; i < prog->len; i++, insn++) {
612811345 if (insn->code != (BPF_JMP | BPF_CALL) ||
612911346 insn->src_reg != BPF_PSEUDO_CALL)
....@@ -6146,6 +11363,7 @@
614611363 static int fixup_bpf_calls(struct bpf_verifier_env *env)
614711364 {
614811365 struct bpf_prog *prog = env->prog;
11366
+ bool expect_blinding = bpf_jit_blinding_enabled(prog);
614911367 struct bpf_insn *insn = prog->insnsi;
615011368 const struct bpf_func_proto *fn;
615111369 const int insn_cnt = prog->len;
....@@ -6154,7 +11372,7 @@
615411372 struct bpf_insn insn_buf[16];
615511373 struct bpf_prog *new_prog;
615611374 struct bpf_map *map_ptr;
6157
- int i, cnt, delta = 0;
11375
+ int i, ret, cnt, delta = 0;
615811376
615911377 for (i = 0; i < insn_cnt; i++, insn++) {
616011378 if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
....@@ -6162,31 +11380,30 @@
616211380 insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
616311381 insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
616411382 bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
6165
- struct bpf_insn mask_and_div[] = {
6166
- BPF_MOV_REG(BPF_CLASS(insn->code), BPF_REG_AX, insn->src_reg),
11383
+ bool isdiv = BPF_OP(insn->code) == BPF_DIV;
11384
+ struct bpf_insn *patchlet;
11385
+ struct bpf_insn chk_and_div[] = {
616711386 /* [R,W]x div 0 -> 0 */
6168
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, 2),
6169
- BPF_RAW_REG(*insn, insn->dst_reg, BPF_REG_AX),
11387
+ BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
11388
+ BPF_JNE | BPF_K, insn->src_reg,
11389
+ 0, 2, 0),
11390
+ BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
617011391 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
6171
- BPF_ALU_REG(BPF_CLASS(insn->code), BPF_XOR, insn->dst_reg, insn->dst_reg),
11392
+ *insn,
617211393 };
6173
- struct bpf_insn mask_and_mod[] = {
6174
- BPF_MOV_REG(BPF_CLASS(insn->code), BPF_REG_AX, insn->src_reg),
6175
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, 1 + (is64 ? 0 : 1)),
6176
- BPF_RAW_REG(*insn, insn->dst_reg, BPF_REG_AX),
11394
+ struct bpf_insn chk_and_mod[] = {
11395
+ /* [R,W]x mod 0 -> [R,W]x */
11396
+ BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
11397
+ BPF_JEQ | BPF_K, insn->src_reg,
11398
+ 0, 1 + (is64 ? 0 : 1), 0),
11399
+ *insn,
617711400 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
617811401 BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
617911402 };
6180
- struct bpf_insn *patchlet;
618111403
6182
- if (insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
6183
- insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
6184
- patchlet = mask_and_div;
6185
- cnt = ARRAY_SIZE(mask_and_div);
6186
- } else {
6187
- patchlet = mask_and_mod;
6188
- cnt = ARRAY_SIZE(mask_and_mod) - (is64 ? 2 : 0);
6189
- }
11404
+ patchlet = isdiv ? chk_and_div : chk_and_mod;
11405
+ cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
11406
+ ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0);
619011407
619111408 new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
619211409 if (!new_prog)
....@@ -6288,7 +11505,9 @@
628811505 * the program array.
628911506 */
629011507 prog->cb_access = 1;
6291
- env->prog->aux->stack_depth = MAX_BPF_STACK;
11508
+ if (!allow_tail_call_in_subprogs(env))
11509
+ prog->aux->stack_depth = MAX_BPF_STACK;
11510
+ prog->aux->max_pkt_offset = MAX_PACKET_OFF;
629211511
629311512 /* mark bpf_tail_call as different opcode to avoid
629411513 * conditional branch in the interpeter for every normal
....@@ -6299,6 +11518,28 @@
629911518 insn->code = BPF_JMP | BPF_TAIL_CALL;
630011519
630111520 aux = &env->insn_aux_data[i + delta];
11521
+ if (env->bpf_capable && !expect_blinding &&
11522
+ prog->jit_requested &&
11523
+ !bpf_map_key_poisoned(aux) &&
11524
+ !bpf_map_ptr_poisoned(aux) &&
11525
+ !bpf_map_ptr_unpriv(aux)) {
11526
+ struct bpf_jit_poke_descriptor desc = {
11527
+ .reason = BPF_POKE_REASON_TAIL_CALL,
11528
+ .tail_call.map = BPF_MAP_PTR(aux->map_ptr_state),
11529
+ .tail_call.key = bpf_map_key_immediate(aux),
11530
+ .insn_idx = i + delta,
11531
+ };
11532
+
11533
+ ret = bpf_jit_add_poke_descriptor(prog, &desc);
11534
+ if (ret < 0) {
11535
+ verbose(env, "adding tail call poke descriptor failed\n");
11536
+ return ret;
11537
+ }
11538
+
11539
+ insn->imm = ret + 1;
11540
+ continue;
11541
+ }
11542
+
630211543 if (!bpf_map_ptr_unpriv(aux))
630311544 continue;
630411545
....@@ -6313,7 +11554,7 @@
631311554 return -EINVAL;
631411555 }
631511556
6316
- map_ptr = BPF_MAP_PTR(aux->map_state);
11557
+ map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
631711558 insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
631811559 map_ptr->max_entries, 2);
631911560 insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
....@@ -6339,17 +11580,22 @@
633911580 if (prog->jit_requested && BITS_PER_LONG == 64 &&
634011581 (insn->imm == BPF_FUNC_map_lookup_elem ||
634111582 insn->imm == BPF_FUNC_map_update_elem ||
6342
- insn->imm == BPF_FUNC_map_delete_elem)) {
11583
+ insn->imm == BPF_FUNC_map_delete_elem ||
11584
+ insn->imm == BPF_FUNC_map_push_elem ||
11585
+ insn->imm == BPF_FUNC_map_pop_elem ||
11586
+ insn->imm == BPF_FUNC_map_peek_elem)) {
634311587 aux = &env->insn_aux_data[i + delta];
634411588 if (bpf_map_ptr_poisoned(aux))
634511589 goto patch_call_imm;
634611590
6347
- map_ptr = BPF_MAP_PTR(aux->map_state);
11591
+ map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
634811592 ops = map_ptr->ops;
634911593 if (insn->imm == BPF_FUNC_map_lookup_elem &&
635011594 ops->map_gen_lookup) {
635111595 cnt = ops->map_gen_lookup(map_ptr, insn_buf);
6352
- if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
11596
+ if (cnt == -EOPNOTSUPP)
11597
+ goto patch_map_ops_generic;
11598
+ if (cnt <= 0 || cnt >= ARRAY_SIZE(insn_buf)) {
635311599 verbose(env, "bpf verifier is misconfigured\n");
635411600 return -EINVAL;
635511601 }
....@@ -6372,6 +11618,14 @@
637211618 BUILD_BUG_ON(!__same_type(ops->map_update_elem,
637311619 (int (*)(struct bpf_map *map, void *key, void *value,
637411620 u64 flags))NULL));
11621
+ BUILD_BUG_ON(!__same_type(ops->map_push_elem,
11622
+ (int (*)(struct bpf_map *map, void *value,
11623
+ u64 flags))NULL));
11624
+ BUILD_BUG_ON(!__same_type(ops->map_pop_elem,
11625
+ (int (*)(struct bpf_map *map, void *value))NULL));
11626
+ BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
11627
+ (int (*)(struct bpf_map *map, void *value))NULL));
11628
+patch_map_ops_generic:
637511629 switch (insn->imm) {
637611630 case BPF_FUNC_map_lookup_elem:
637711631 insn->imm = BPF_CAST_CALL(ops->map_lookup_elem) -
....@@ -6385,9 +11639,45 @@
638511639 insn->imm = BPF_CAST_CALL(ops->map_delete_elem) -
638611640 __bpf_call_base;
638711641 continue;
11642
+ case BPF_FUNC_map_push_elem:
11643
+ insn->imm = BPF_CAST_CALL(ops->map_push_elem) -
11644
+ __bpf_call_base;
11645
+ continue;
11646
+ case BPF_FUNC_map_pop_elem:
11647
+ insn->imm = BPF_CAST_CALL(ops->map_pop_elem) -
11648
+ __bpf_call_base;
11649
+ continue;
11650
+ case BPF_FUNC_map_peek_elem:
11651
+ insn->imm = BPF_CAST_CALL(ops->map_peek_elem) -
11652
+ __bpf_call_base;
11653
+ continue;
638811654 }
638911655
639011656 goto patch_call_imm;
11657
+ }
11658
+
11659
+ if (prog->jit_requested && BITS_PER_LONG == 64 &&
11660
+ insn->imm == BPF_FUNC_jiffies64) {
11661
+ struct bpf_insn ld_jiffies_addr[2] = {
11662
+ BPF_LD_IMM64(BPF_REG_0,
11663
+ (unsigned long)&jiffies),
11664
+ };
11665
+
11666
+ insn_buf[0] = ld_jiffies_addr[0];
11667
+ insn_buf[1] = ld_jiffies_addr[1];
11668
+ insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0,
11669
+ BPF_REG_0, 0);
11670
+ cnt = 3;
11671
+
11672
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf,
11673
+ cnt);
11674
+ if (!new_prog)
11675
+ return -ENOMEM;
11676
+
11677
+ delta += cnt - 1;
11678
+ env->prog = prog = new_prog;
11679
+ insn = new_prog->insnsi + i + delta;
11680
+ continue;
639111681 }
639211682
639311683 patch_call_imm:
....@@ -6404,6 +11694,23 @@
640411694 insn->imm = fn->func - __bpf_call_base;
640511695 }
640611696
11697
+ /* Since poke tab is now finalized, publish aux to tracker. */
11698
+ for (i = 0; i < prog->aux->size_poke_tab; i++) {
11699
+ map_ptr = prog->aux->poke_tab[i].tail_call.map;
11700
+ if (!map_ptr->ops->map_poke_track ||
11701
+ !map_ptr->ops->map_poke_untrack ||
11702
+ !map_ptr->ops->map_poke_run) {
11703
+ verbose(env, "bpf verifier is misconfigured\n");
11704
+ return -EINVAL;
11705
+ }
11706
+
11707
+ ret = map_ptr->ops->map_poke_track(map_ptr, prog->aux);
11708
+ if (ret < 0) {
11709
+ verbose(env, "tracking tail call prog failed\n");
11710
+ return ret;
11711
+ }
11712
+ }
11713
+
640711714 return 0;
640811715 }
640911716
....@@ -6412,29 +11719,602 @@
641211719 struct bpf_verifier_state_list *sl, *sln;
641311720 int i;
641411721
11722
+ sl = env->free_list;
11723
+ while (sl) {
11724
+ sln = sl->next;
11725
+ free_verifier_state(&sl->state, false);
11726
+ kfree(sl);
11727
+ sl = sln;
11728
+ }
11729
+ env->free_list = NULL;
11730
+
641511731 if (!env->explored_states)
641611732 return;
641711733
6418
- for (i = 0; i < env->prog->len; i++) {
11734
+ for (i = 0; i < state_htab_size(env); i++) {
641911735 sl = env->explored_states[i];
642011736
6421
- if (sl)
6422
- while (sl != STATE_LIST_MARK) {
6423
- sln = sl->next;
6424
- free_verifier_state(&sl->state, false);
6425
- kfree(sl);
6426
- sl = sln;
6427
- }
11737
+ while (sl) {
11738
+ sln = sl->next;
11739
+ free_verifier_state(&sl->state, false);
11740
+ kfree(sl);
11741
+ sl = sln;
11742
+ }
11743
+ env->explored_states[i] = NULL;
642811744 }
6429
-
6430
- kfree(env->explored_states);
643111745 }
643211746
6433
-int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
11747
+static int do_check_common(struct bpf_verifier_env *env, int subprog)
643411748 {
11749
+ bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
11750
+ struct bpf_verifier_state *state;
11751
+ struct bpf_reg_state *regs;
11752
+ int ret, i;
11753
+
11754
+ env->prev_linfo = NULL;
11755
+ env->pass_cnt++;
11756
+
11757
+ state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
11758
+ if (!state)
11759
+ return -ENOMEM;
11760
+ state->curframe = 0;
11761
+ state->speculative = false;
11762
+ state->branches = 1;
11763
+ state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
11764
+ if (!state->frame[0]) {
11765
+ kfree(state);
11766
+ return -ENOMEM;
11767
+ }
11768
+ env->cur_state = state;
11769
+ init_func_state(env, state->frame[0],
11770
+ BPF_MAIN_FUNC /* callsite */,
11771
+ 0 /* frameno */,
11772
+ subprog);
11773
+
11774
+ regs = state->frame[state->curframe]->regs;
11775
+ if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) {
11776
+ ret = btf_prepare_func_args(env, subprog, regs);
11777
+ if (ret)
11778
+ goto out;
11779
+ for (i = BPF_REG_1; i <= BPF_REG_5; i++) {
11780
+ if (regs[i].type == PTR_TO_CTX)
11781
+ mark_reg_known_zero(env, regs, i);
11782
+ else if (regs[i].type == SCALAR_VALUE)
11783
+ mark_reg_unknown(env, regs, i);
11784
+ }
11785
+ } else {
11786
+ /* 1st arg to a function */
11787
+ regs[BPF_REG_1].type = PTR_TO_CTX;
11788
+ mark_reg_known_zero(env, regs, BPF_REG_1);
11789
+ ret = btf_check_func_arg_match(env, subprog, regs);
11790
+ if (ret == -EFAULT)
11791
+ /* unlikely verifier bug. abort.
11792
+ * ret == 0 and ret < 0 are sadly acceptable for
11793
+ * main() function due to backward compatibility.
11794
+ * Like socket filter program may be written as:
11795
+ * int bpf_prog(struct pt_regs *ctx)
11796
+ * and never dereference that ctx in the program.
11797
+ * 'struct pt_regs' is a type mismatch for socket
11798
+ * filter that should be using 'struct __sk_buff'.
11799
+ */
11800
+ goto out;
11801
+ }
11802
+
11803
+ ret = do_check(env);
11804
+out:
11805
+ /* check for NULL is necessary, since cur_state can be freed inside
11806
+ * do_check() under memory pressure.
11807
+ */
11808
+ if (env->cur_state) {
11809
+ free_verifier_state(env->cur_state, true);
11810
+ env->cur_state = NULL;
11811
+ }
11812
+ while (!pop_stack(env, NULL, NULL, false));
11813
+ if (!ret && pop_log)
11814
+ bpf_vlog_reset(&env->log, 0);
11815
+ free_states(env);
11816
+ return ret;
11817
+}
11818
+
11819
+/* Verify all global functions in a BPF program one by one based on their BTF.
11820
+ * All global functions must pass verification. Otherwise the whole program is rejected.
11821
+ * Consider:
11822
+ * int bar(int);
11823
+ * int foo(int f)
11824
+ * {
11825
+ * return bar(f);
11826
+ * }
11827
+ * int bar(int b)
11828
+ * {
11829
+ * ...
11830
+ * }
11831
+ * foo() will be verified first for R1=any_scalar_value. During verification it
11832
+ * will be assumed that bar() already verified successfully and call to bar()
11833
+ * from foo() will be checked for type match only. Later bar() will be verified
11834
+ * independently to check that it's safe for R1=any_scalar_value.
11835
+ */
11836
+static int do_check_subprogs(struct bpf_verifier_env *env)
11837
+{
11838
+ struct bpf_prog_aux *aux = env->prog->aux;
11839
+ int i, ret;
11840
+
11841
+ if (!aux->func_info)
11842
+ return 0;
11843
+
11844
+ for (i = 1; i < env->subprog_cnt; i++) {
11845
+ if (aux->func_info_aux[i].linkage != BTF_FUNC_GLOBAL)
11846
+ continue;
11847
+ env->insn_idx = env->subprog_info[i].start;
11848
+ WARN_ON_ONCE(env->insn_idx == 0);
11849
+ ret = do_check_common(env, i);
11850
+ if (ret) {
11851
+ return ret;
11852
+ } else if (env->log.level & BPF_LOG_LEVEL) {
11853
+ verbose(env,
11854
+ "Func#%d is safe for any args that match its prototype\n",
11855
+ i);
11856
+ }
11857
+ }
11858
+ return 0;
11859
+}
11860
+
11861
+static int do_check_main(struct bpf_verifier_env *env)
11862
+{
11863
+ int ret;
11864
+
11865
+ env->insn_idx = 0;
11866
+ ret = do_check_common(env, 0);
11867
+ if (!ret)
11868
+ env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
11869
+ return ret;
11870
+}
11871
+
11872
+
11873
+static void print_verification_stats(struct bpf_verifier_env *env)
11874
+{
11875
+ int i;
11876
+
11877
+ if (env->log.level & BPF_LOG_STATS) {
11878
+ verbose(env, "verification time %lld usec\n",
11879
+ div_u64(env->verification_time, 1000));
11880
+ verbose(env, "stack depth ");
11881
+ for (i = 0; i < env->subprog_cnt; i++) {
11882
+ u32 depth = env->subprog_info[i].stack_depth;
11883
+
11884
+ verbose(env, "%d", depth);
11885
+ if (i + 1 < env->subprog_cnt)
11886
+ verbose(env, "+");
11887
+ }
11888
+ verbose(env, "\n");
11889
+ }
11890
+ verbose(env, "processed %d insns (limit %d) max_states_per_insn %d "
11891
+ "total_states %d peak_states %d mark_read %d\n",
11892
+ env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS,
11893
+ env->max_states_per_insn, env->total_states,
11894
+ env->peak_states, env->longest_mark_read_walk);
11895
+}
11896
+
11897
+static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
11898
+{
11899
+ const struct btf_type *t, *func_proto;
11900
+ const struct bpf_struct_ops *st_ops;
11901
+ const struct btf_member *member;
11902
+ struct bpf_prog *prog = env->prog;
11903
+ u32 btf_id, member_idx;
11904
+ const char *mname;
11905
+
11906
+ if (!prog->gpl_compatible) {
11907
+ verbose(env, "struct ops programs must have a GPL compatible license\n");
11908
+ return -EINVAL;
11909
+ }
11910
+
11911
+ btf_id = prog->aux->attach_btf_id;
11912
+ st_ops = bpf_struct_ops_find(btf_id);
11913
+ if (!st_ops) {
11914
+ verbose(env, "attach_btf_id %u is not a supported struct\n",
11915
+ btf_id);
11916
+ return -ENOTSUPP;
11917
+ }
11918
+
11919
+ t = st_ops->type;
11920
+ member_idx = prog->expected_attach_type;
11921
+ if (member_idx >= btf_type_vlen(t)) {
11922
+ verbose(env, "attach to invalid member idx %u of struct %s\n",
11923
+ member_idx, st_ops->name);
11924
+ return -EINVAL;
11925
+ }
11926
+
11927
+ member = &btf_type_member(t)[member_idx];
11928
+ mname = btf_name_by_offset(btf_vmlinux, member->name_off);
11929
+ func_proto = btf_type_resolve_func_ptr(btf_vmlinux, member->type,
11930
+ NULL);
11931
+ if (!func_proto) {
11932
+ verbose(env, "attach to invalid member %s(@idx %u) of struct %s\n",
11933
+ mname, member_idx, st_ops->name);
11934
+ return -EINVAL;
11935
+ }
11936
+
11937
+ if (st_ops->check_member) {
11938
+ int err = st_ops->check_member(t, member);
11939
+
11940
+ if (err) {
11941
+ verbose(env, "attach to unsupported member %s of struct %s\n",
11942
+ mname, st_ops->name);
11943
+ return err;
11944
+ }
11945
+ }
11946
+
11947
+ prog->aux->attach_func_proto = func_proto;
11948
+ prog->aux->attach_func_name = mname;
11949
+ env->ops = st_ops->verifier_ops;
11950
+
11951
+ return 0;
11952
+}
11953
+#define SECURITY_PREFIX "security_"
11954
+
11955
+static int check_attach_modify_return(unsigned long addr, const char *func_name)
11956
+{
11957
+ if (within_error_injection_list(addr) ||
11958
+ !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1))
11959
+ return 0;
11960
+
11961
+ return -EINVAL;
11962
+}
11963
+
11964
+/* non exhaustive list of sleepable bpf_lsm_*() functions */
11965
+BTF_SET_START(btf_sleepable_lsm_hooks)
11966
+#ifdef CONFIG_BPF_LSM
11967
+BTF_ID(func, bpf_lsm_bprm_committed_creds)
11968
+#else
11969
+BTF_ID_UNUSED
11970
+#endif
11971
+BTF_SET_END(btf_sleepable_lsm_hooks)
11972
+
11973
+static int check_sleepable_lsm_hook(u32 btf_id)
11974
+{
11975
+ return btf_id_set_contains(&btf_sleepable_lsm_hooks, btf_id);
11976
+}
11977
+
11978
+/* list of non-sleepable functions that are otherwise on
11979
+ * ALLOW_ERROR_INJECTION list
11980
+ */
11981
+BTF_SET_START(btf_non_sleepable_error_inject)
11982
+/* Three functions below can be called from sleepable and non-sleepable context.
11983
+ * Assume non-sleepable from bpf safety point of view.
11984
+ */
11985
+BTF_ID(func, __add_to_page_cache_locked)
11986
+BTF_ID(func, should_fail_alloc_page)
11987
+BTF_ID(func, should_failslab)
11988
+BTF_SET_END(btf_non_sleepable_error_inject)
11989
+
11990
+static int check_non_sleepable_error_inject(u32 btf_id)
11991
+{
11992
+ return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id);
11993
+}
11994
+
11995
+int bpf_check_attach_target(struct bpf_verifier_log *log,
11996
+ const struct bpf_prog *prog,
11997
+ const struct bpf_prog *tgt_prog,
11998
+ u32 btf_id,
11999
+ struct bpf_attach_target_info *tgt_info)
12000
+{
12001
+ bool prog_extension = prog->type == BPF_PROG_TYPE_EXT;
12002
+ const char prefix[] = "btf_trace_";
12003
+ int ret = 0, subprog = -1, i;
12004
+ const struct btf_type *t;
12005
+ bool conservative = true;
12006
+ const char *tname;
12007
+ struct btf *btf;
12008
+ long addr = 0;
12009
+
12010
+ if (!btf_id) {
12011
+ bpf_log(log, "Tracing programs must provide btf_id\n");
12012
+ return -EINVAL;
12013
+ }
12014
+ btf = tgt_prog ? tgt_prog->aux->btf : btf_vmlinux;
12015
+ if (!btf) {
12016
+ bpf_log(log,
12017
+ "FENTRY/FEXIT program can only be attached to another program annotated with BTF\n");
12018
+ return -EINVAL;
12019
+ }
12020
+ t = btf_type_by_id(btf, btf_id);
12021
+ if (!t) {
12022
+ bpf_log(log, "attach_btf_id %u is invalid\n", btf_id);
12023
+ return -EINVAL;
12024
+ }
12025
+ tname = btf_name_by_offset(btf, t->name_off);
12026
+ if (!tname) {
12027
+ bpf_log(log, "attach_btf_id %u doesn't have a name\n", btf_id);
12028
+ return -EINVAL;
12029
+ }
12030
+ if (tgt_prog) {
12031
+ struct bpf_prog_aux *aux = tgt_prog->aux;
12032
+
12033
+ for (i = 0; i < aux->func_info_cnt; i++)
12034
+ if (aux->func_info[i].type_id == btf_id) {
12035
+ subprog = i;
12036
+ break;
12037
+ }
12038
+ if (subprog == -1) {
12039
+ bpf_log(log, "Subprog %s doesn't exist\n", tname);
12040
+ return -EINVAL;
12041
+ }
12042
+ conservative = aux->func_info_aux[subprog].unreliable;
12043
+ if (prog_extension) {
12044
+ if (conservative) {
12045
+ bpf_log(log,
12046
+ "Cannot replace static functions\n");
12047
+ return -EINVAL;
12048
+ }
12049
+ if (!prog->jit_requested) {
12050
+ bpf_log(log,
12051
+ "Extension programs should be JITed\n");
12052
+ return -EINVAL;
12053
+ }
12054
+ }
12055
+ if (!tgt_prog->jited) {
12056
+ bpf_log(log, "Can attach to only JITed progs\n");
12057
+ return -EINVAL;
12058
+ }
12059
+ if (tgt_prog->type == prog->type) {
12060
+ /* Cannot fentry/fexit another fentry/fexit program.
12061
+ * Cannot attach program extension to another extension.
12062
+ * It's ok to attach fentry/fexit to extension program.
12063
+ */
12064
+ bpf_log(log, "Cannot recursively attach\n");
12065
+ return -EINVAL;
12066
+ }
12067
+ if (tgt_prog->type == BPF_PROG_TYPE_TRACING &&
12068
+ prog_extension &&
12069
+ (tgt_prog->expected_attach_type == BPF_TRACE_FENTRY ||
12070
+ tgt_prog->expected_attach_type == BPF_TRACE_FEXIT)) {
12071
+ /* Program extensions can extend all program types
12072
+ * except fentry/fexit. The reason is the following.
12073
+ * The fentry/fexit programs are used for performance
12074
+ * analysis, stats and can be attached to any program
12075
+ * type except themselves. When extension program is
12076
+ * replacing XDP function it is necessary to allow
12077
+ * performance analysis of all functions. Both original
12078
+ * XDP program and its program extension. Hence
12079
+ * attaching fentry/fexit to BPF_PROG_TYPE_EXT is
12080
+ * allowed. If extending of fentry/fexit was allowed it
12081
+ * would be possible to create long call chain
12082
+ * fentry->extension->fentry->extension beyond
12083
+ * reasonable stack size. Hence extending fentry is not
12084
+ * allowed.
12085
+ */
12086
+ bpf_log(log, "Cannot extend fentry/fexit\n");
12087
+ return -EINVAL;
12088
+ }
12089
+ } else {
12090
+ if (prog_extension) {
12091
+ bpf_log(log, "Cannot replace kernel functions\n");
12092
+ return -EINVAL;
12093
+ }
12094
+ }
12095
+
12096
+ switch (prog->expected_attach_type) {
12097
+ case BPF_TRACE_RAW_TP:
12098
+ if (tgt_prog) {
12099
+ bpf_log(log,
12100
+ "Only FENTRY/FEXIT progs are attachable to another BPF prog\n");
12101
+ return -EINVAL;
12102
+ }
12103
+ if (!btf_type_is_typedef(t)) {
12104
+ bpf_log(log, "attach_btf_id %u is not a typedef\n",
12105
+ btf_id);
12106
+ return -EINVAL;
12107
+ }
12108
+ if (strncmp(prefix, tname, sizeof(prefix) - 1)) {
12109
+ bpf_log(log, "attach_btf_id %u points to wrong type name %s\n",
12110
+ btf_id, tname);
12111
+ return -EINVAL;
12112
+ }
12113
+ tname += sizeof(prefix) - 1;
12114
+ t = btf_type_by_id(btf, t->type);
12115
+ if (!btf_type_is_ptr(t))
12116
+ /* should never happen in valid vmlinux build */
12117
+ return -EINVAL;
12118
+ t = btf_type_by_id(btf, t->type);
12119
+ if (!btf_type_is_func_proto(t))
12120
+ /* should never happen in valid vmlinux build */
12121
+ return -EINVAL;
12122
+
12123
+ break;
12124
+ case BPF_TRACE_ITER:
12125
+ if (!btf_type_is_func(t)) {
12126
+ bpf_log(log, "attach_btf_id %u is not a function\n",
12127
+ btf_id);
12128
+ return -EINVAL;
12129
+ }
12130
+ t = btf_type_by_id(btf, t->type);
12131
+ if (!btf_type_is_func_proto(t))
12132
+ return -EINVAL;
12133
+ ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
12134
+ if (ret)
12135
+ return ret;
12136
+ break;
12137
+ default:
12138
+ if (!prog_extension)
12139
+ return -EINVAL;
12140
+ fallthrough;
12141
+ case BPF_MODIFY_RETURN:
12142
+ case BPF_LSM_MAC:
12143
+ case BPF_TRACE_FENTRY:
12144
+ case BPF_TRACE_FEXIT:
12145
+ if (!btf_type_is_func(t)) {
12146
+ bpf_log(log, "attach_btf_id %u is not a function\n",
12147
+ btf_id);
12148
+ return -EINVAL;
12149
+ }
12150
+ if (prog_extension &&
12151
+ btf_check_type_match(log, prog, btf, t))
12152
+ return -EINVAL;
12153
+ t = btf_type_by_id(btf, t->type);
12154
+ if (!btf_type_is_func_proto(t))
12155
+ return -EINVAL;
12156
+
12157
+ if ((prog->aux->saved_dst_prog_type || prog->aux->saved_dst_attach_type) &&
12158
+ (!tgt_prog || prog->aux->saved_dst_prog_type != tgt_prog->type ||
12159
+ prog->aux->saved_dst_attach_type != tgt_prog->expected_attach_type))
12160
+ return -EINVAL;
12161
+
12162
+ if (tgt_prog && conservative)
12163
+ t = NULL;
12164
+
12165
+ ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
12166
+ if (ret < 0)
12167
+ return ret;
12168
+
12169
+ if (tgt_prog) {
12170
+ if (subprog == 0)
12171
+ addr = (long) tgt_prog->bpf_func;
12172
+ else
12173
+ addr = (long) tgt_prog->aux->func[subprog]->bpf_func;
12174
+ } else {
12175
+ addr = kallsyms_lookup_name(tname);
12176
+ if (!addr) {
12177
+ bpf_log(log,
12178
+ "The address of function %s cannot be found\n",
12179
+ tname);
12180
+ return -ENOENT;
12181
+ }
12182
+ }
12183
+
12184
+ if (prog->aux->sleepable) {
12185
+ ret = -EINVAL;
12186
+ switch (prog->type) {
12187
+ case BPF_PROG_TYPE_TRACING:
12188
+ /* fentry/fexit/fmod_ret progs can be sleepable only if they are
12189
+ * attached to ALLOW_ERROR_INJECTION and are not in denylist.
12190
+ */
12191
+ if (!check_non_sleepable_error_inject(btf_id) &&
12192
+ within_error_injection_list(addr))
12193
+ ret = 0;
12194
+ break;
12195
+ case BPF_PROG_TYPE_LSM:
12196
+ /* LSM progs check that they are attached to bpf_lsm_*() funcs.
12197
+ * Only some of them are sleepable.
12198
+ */
12199
+ if (check_sleepable_lsm_hook(btf_id))
12200
+ ret = 0;
12201
+ break;
12202
+ default:
12203
+ break;
12204
+ }
12205
+ if (ret) {
12206
+ bpf_log(log, "%s is not sleepable\n", tname);
12207
+ return ret;
12208
+ }
12209
+ } else if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
12210
+ if (tgt_prog) {
12211
+ bpf_log(log, "can't modify return codes of BPF programs\n");
12212
+ return -EINVAL;
12213
+ }
12214
+ ret = check_attach_modify_return(addr, tname);
12215
+ if (ret) {
12216
+ bpf_log(log, "%s() is not modifiable\n", tname);
12217
+ return ret;
12218
+ }
12219
+ }
12220
+
12221
+ break;
12222
+ }
12223
+ tgt_info->tgt_addr = addr;
12224
+ tgt_info->tgt_name = tname;
12225
+ tgt_info->tgt_type = t;
12226
+ return 0;
12227
+}
12228
+
12229
+static int check_attach_btf_id(struct bpf_verifier_env *env)
12230
+{
12231
+ struct bpf_prog *prog = env->prog;
12232
+ struct bpf_prog *tgt_prog = prog->aux->dst_prog;
12233
+ struct bpf_attach_target_info tgt_info = {};
12234
+ u32 btf_id = prog->aux->attach_btf_id;
12235
+ struct bpf_trampoline *tr;
12236
+ int ret;
12237
+ u64 key;
12238
+
12239
+ if (prog->aux->sleepable && prog->type != BPF_PROG_TYPE_TRACING &&
12240
+ prog->type != BPF_PROG_TYPE_LSM) {
12241
+ verbose(env, "Only fentry/fexit/fmod_ret and lsm programs can be sleepable\n");
12242
+ return -EINVAL;
12243
+ }
12244
+
12245
+ if (prog->type == BPF_PROG_TYPE_STRUCT_OPS)
12246
+ return check_struct_ops_btf_id(env);
12247
+
12248
+ if (prog->type != BPF_PROG_TYPE_TRACING &&
12249
+ prog->type != BPF_PROG_TYPE_LSM &&
12250
+ prog->type != BPF_PROG_TYPE_EXT)
12251
+ return 0;
12252
+
12253
+ ret = bpf_check_attach_target(&env->log, prog, tgt_prog, btf_id, &tgt_info);
12254
+ if (ret)
12255
+ return ret;
12256
+
12257
+ if (tgt_prog && prog->type == BPF_PROG_TYPE_EXT) {
12258
+ /* to make freplace equivalent to their targets, they need to
12259
+ * inherit env->ops and expected_attach_type for the rest of the
12260
+ * verification
12261
+ */
12262
+ env->ops = bpf_verifier_ops[tgt_prog->type];
12263
+ prog->expected_attach_type = tgt_prog->expected_attach_type;
12264
+ }
12265
+
12266
+ /* store info about the attachment target that will be used later */
12267
+ prog->aux->attach_func_proto = tgt_info.tgt_type;
12268
+ prog->aux->attach_func_name = tgt_info.tgt_name;
12269
+
12270
+ if (tgt_prog) {
12271
+ prog->aux->saved_dst_prog_type = tgt_prog->type;
12272
+ prog->aux->saved_dst_attach_type = tgt_prog->expected_attach_type;
12273
+ }
12274
+
12275
+ if (prog->expected_attach_type == BPF_TRACE_RAW_TP) {
12276
+ prog->aux->attach_btf_trace = true;
12277
+ return 0;
12278
+ } else if (prog->expected_attach_type == BPF_TRACE_ITER) {
12279
+ if (!bpf_iter_prog_supported(prog))
12280
+ return -EINVAL;
12281
+ return 0;
12282
+ }
12283
+
12284
+ if (prog->type == BPF_PROG_TYPE_LSM) {
12285
+ ret = bpf_lsm_verify_prog(&env->log, prog);
12286
+ if (ret < 0)
12287
+ return ret;
12288
+ }
12289
+
12290
+ key = bpf_trampoline_compute_key(tgt_prog, btf_id);
12291
+ tr = bpf_trampoline_get(key, &tgt_info);
12292
+ if (!tr)
12293
+ return -ENOMEM;
12294
+
12295
+ prog->aux->dst_trampoline = tr;
12296
+ return 0;
12297
+}
12298
+
12299
+struct btf *bpf_get_btf_vmlinux(void)
12300
+{
12301
+ if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
12302
+ mutex_lock(&bpf_verifier_lock);
12303
+ if (!btf_vmlinux)
12304
+ btf_vmlinux = btf_parse_vmlinux();
12305
+ mutex_unlock(&bpf_verifier_lock);
12306
+ }
12307
+ return btf_vmlinux;
12308
+}
12309
+
12310
+int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
12311
+ union bpf_attr __user *uattr)
12312
+{
12313
+ u64 start_time = ktime_get_ns();
643512314 struct bpf_verifier_env *env;
643612315 struct bpf_verifier_log *log;
6437
- int ret = -EINVAL;
12316
+ int i, len, ret = -EINVAL;
12317
+ bool is_priv;
643812318
643912319 /* no program is valid */
644012320 if (ARRAY_SIZE(bpf_verifier_ops) == 0)
....@@ -6448,17 +12328,23 @@
644812328 return -ENOMEM;
644912329 log = &env->log;
645012330
12331
+ len = (*prog)->len;
645112332 env->insn_aux_data =
6452
- vzalloc(array_size(sizeof(struct bpf_insn_aux_data),
6453
- (*prog)->len));
12333
+ vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len));
645412334 ret = -ENOMEM;
645512335 if (!env->insn_aux_data)
645612336 goto err_free_env;
12337
+ for (i = 0; i < len; i++)
12338
+ env->insn_aux_data[i].orig_idx = i;
645712339 env->prog = *prog;
645812340 env->ops = bpf_verifier_ops[env->prog->type];
12341
+ is_priv = bpf_capable();
12342
+
12343
+ bpf_get_btf_vmlinux();
645912344
646012345 /* grab the mutex to protect few globals used by verifier */
6461
- mutex_lock(&bpf_verifier_lock);
12346
+ if (!is_priv)
12347
+ mutex_lock(&bpf_verifier_lock);
646212348
646312349 if (attr->log_level || attr->log_buf || attr->log_size) {
646412350 /* user requested verbose verifier output
....@@ -6468,58 +12354,93 @@
646812354 log->ubuf = (char __user *) (unsigned long) attr->log_buf;
646912355 log->len_total = attr->log_size;
647012356
6471
- ret = -EINVAL;
647212357 /* log attributes have to be sane */
6473
- if (log->len_total < 128 || log->len_total > UINT_MAX >> 8 ||
6474
- !log->level || !log->ubuf)
12358
+ if (!bpf_verifier_log_attr_valid(log)) {
12359
+ ret = -EINVAL;
647512360 goto err_unlock;
12361
+ }
12362
+ }
12363
+
12364
+ if (IS_ERR(btf_vmlinux)) {
12365
+ /* Either gcc or pahole or kernel are broken. */
12366
+ verbose(env, "in-kernel BTF is malformed\n");
12367
+ ret = PTR_ERR(btf_vmlinux);
12368
+ goto skip_full_check;
647612369 }
647712370
647812371 env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
647912372 if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
648012373 env->strict_alignment = true;
6481
-
648212374 if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
648312375 env->strict_alignment = false;
648412376
6485
- ret = replace_map_fd_with_map_ptr(env);
6486
- if (ret < 0)
6487
- goto skip_full_check;
12377
+ env->allow_ptr_leaks = bpf_allow_ptr_leaks();
12378
+ env->allow_uninit_stack = bpf_allow_uninit_stack();
12379
+ env->allow_ptr_to_map_access = bpf_allow_ptr_to_map_access();
12380
+ env->bypass_spec_v1 = bpf_bypass_spec_v1();
12381
+ env->bypass_spec_v4 = bpf_bypass_spec_v4();
12382
+ env->bpf_capable = bpf_capable();
648812383
6489
- if (bpf_prog_is_dev_bound(env->prog->aux)) {
6490
- ret = bpf_prog_offload_verifier_prep(env);
6491
- if (ret)
6492
- goto skip_full_check;
6493
- }
12384
+ if (is_priv)
12385
+ env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
649412386
6495
- env->explored_states = kcalloc(env->prog->len,
12387
+ env->explored_states = kvcalloc(state_htab_size(env),
649612388 sizeof(struct bpf_verifier_state_list *),
649712389 GFP_USER);
649812390 ret = -ENOMEM;
649912391 if (!env->explored_states)
650012392 goto skip_full_check;
650112393
6502
- env->allow_ptr_leaks = capable(CAP_SYS_ADMIN);
12394
+ ret = check_subprogs(env);
12395
+ if (ret < 0)
12396
+ goto skip_full_check;
12397
+
12398
+ ret = check_btf_info(env, attr, uattr);
12399
+ if (ret < 0)
12400
+ goto skip_full_check;
12401
+
12402
+ ret = check_attach_btf_id(env);
12403
+ if (ret)
12404
+ goto skip_full_check;
12405
+
12406
+ ret = resolve_pseudo_ldimm64(env);
12407
+ if (ret < 0)
12408
+ goto skip_full_check;
12409
+
12410
+ if (bpf_prog_is_dev_bound(env->prog->aux)) {
12411
+ ret = bpf_prog_offload_verifier_prep(env->prog);
12412
+ if (ret)
12413
+ goto skip_full_check;
12414
+ }
650312415
650412416 ret = check_cfg(env);
650512417 if (ret < 0)
650612418 goto skip_full_check;
650712419
6508
- ret = do_check(env);
6509
- if (env->cur_state) {
6510
- free_verifier_state(env->cur_state, true);
6511
- env->cur_state = NULL;
6512
- }
12420
+ ret = do_check_subprogs(env);
12421
+ ret = ret ?: do_check_main(env);
12422
+
12423
+ if (ret == 0 && bpf_prog_is_dev_bound(env->prog->aux))
12424
+ ret = bpf_prog_offload_finalize(env);
651312425
651412426 skip_full_check:
6515
- while (!pop_stack(env, NULL, NULL));
6516
- free_states(env);
6517
-
6518
- if (ret == 0)
6519
- sanitize_dead_code(env);
12427
+ kvfree(env->explored_states);
652012428
652112429 if (ret == 0)
652212430 ret = check_max_stack_depth(env);
12431
+
12432
+ /* instruction rewrites happen after this point */
12433
+ if (is_priv) {
12434
+ if (ret == 0)
12435
+ opt_hard_wire_dead_code_branches(env);
12436
+ if (ret == 0)
12437
+ ret = opt_remove_dead_code(env);
12438
+ if (ret == 0)
12439
+ ret = opt_remove_nops(env);
12440
+ } else {
12441
+ if (ret == 0)
12442
+ sanitize_dead_code(env);
12443
+ }
652312444
652412445 if (ret == 0)
652512446 /* program is valid, convert *(u32*)(ctx + off) accesses */
....@@ -6528,8 +12449,20 @@
652812449 if (ret == 0)
652912450 ret = fixup_bpf_calls(env);
653012451
12452
+ /* do 32-bit optimization after insn patching has done so those patched
12453
+ * insns could be handled correctly.
12454
+ */
12455
+ if (ret == 0 && !bpf_prog_is_dev_bound(env->prog->aux)) {
12456
+ ret = opt_subreg_zext_lo32_rnd_hi32(env, attr);
12457
+ env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret
12458
+ : false;
12459
+ }
12460
+
653112461 if (ret == 0)
653212462 ret = fixup_call_args(env);
12463
+
12464
+ env->verification_time = ktime_get_ns() - start_time;
12465
+ print_verification_stats(env);
653312466
653412467 if (log->level && bpf_verifier_log_full(log))
653512468 ret = -ENOSPC;
....@@ -6559,15 +12492,26 @@
655912492 convert_pseudo_ld_imm64(env);
656012493 }
656112494
12495
+ if (ret == 0)
12496
+ adjust_btf_func(env);
12497
+
656212498 err_release_maps:
656312499 if (!env->prog->aux->used_maps)
656412500 /* if we didn't copy map pointers into bpf_prog_info, release
656512501 * them now. Otherwise free_used_maps() will release them.
656612502 */
656712503 release_maps(env);
12504
+
12505
+ /* extension progs temporarily inherit the attach_type of their targets
12506
+ for verification purposes, so set it back to zero before returning
12507
+ */
12508
+ if (env->prog->type == BPF_PROG_TYPE_EXT)
12509
+ env->prog->expected_attach_type = 0;
12510
+
656812511 *prog = env->prog;
656912512 err_unlock:
6570
- mutex_unlock(&bpf_verifier_lock);
12513
+ if (!is_priv)
12514
+ mutex_unlock(&bpf_verifier_lock);
657112515 vfree(env->insn_aux_data);
657212516 err_free_env:
657312517 kfree(env);