hc
2024-05-13 9d77db3c730780c8ef5ccd4b66403ff5675cfe4e
kernel/kernel/bpf/core.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 * Linux Socket Filter - Kernel level socket filtering
34 *
....@@ -12,29 +13,31 @@
1213 * Alexei Starovoitov <ast@plumgrid.com>
1314 * Daniel Borkmann <dborkman@redhat.com>
1415 *
15
- * This program is free software; you can redistribute it and/or
16
- * modify it under the terms of the GNU General Public License
17
- * as published by the Free Software Foundation; either version
18
- * 2 of the License, or (at your option) any later version.
19
- *
2016 * Andi Kleen - Fix a few bad bugs and races.
2117 * Kris Katterjohn - Added many additional checks in bpf_check_classic()
2218 */
2319
20
+#include <uapi/linux/btf.h>
2421 #include <linux/filter.h>
2522 #include <linux/skbuff.h>
2623 #include <linux/vmalloc.h>
2724 #include <linux/random.h>
2825 #include <linux/moduleloader.h>
2926 #include <linux/bpf.h>
30
-#include <linux/frame.h>
27
+#include <linux/btf.h>
28
+#include <linux/objtool.h>
3129 #include <linux/rbtree_latch.h>
3230 #include <linux/kallsyms.h>
3331 #include <linux/rcupdate.h>
3432 #include <linux/perf_event.h>
33
+#include <linux/extable.h>
34
+#include <linux/log2.h>
35
+#include <linux/nospec.h>
3536
3637 #include <asm/barrier.h>
3738 #include <asm/unaligned.h>
39
+
40
+#include <trace/hooks/memory.h>
3841
3942 /* Registers */
4043 #define BPF_R0 regs[BPF_REG_0]
....@@ -66,25 +69,27 @@
6669 {
6770 u8 *ptr = NULL;
6871
69
- if (k >= SKF_NET_OFF)
72
+ if (k >= SKF_NET_OFF) {
7073 ptr = skb_network_header(skb) + k - SKF_NET_OFF;
71
- else if (k >= SKF_LL_OFF)
74
+ } else if (k >= SKF_LL_OFF) {
75
+ if (unlikely(!skb_mac_header_was_set(skb)))
76
+ return NULL;
7277 ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
73
-
78
+ }
7479 if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb))
7580 return ptr;
7681
7782 return NULL;
7883 }
7984
80
-struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
85
+struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flags)
8186 {
8287 gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags;
8388 struct bpf_prog_aux *aux;
8489 struct bpf_prog *fp;
8590
8691 size = round_up(size, PAGE_SIZE);
87
- fp = __vmalloc(size, gfp_flags, PAGE_KERNEL);
92
+ fp = __vmalloc(size, gfp_flags);
8893 if (fp == NULL)
8994 return NULL;
9095
....@@ -99,11 +104,124 @@
99104 fp->aux->prog = fp;
100105 fp->jit_requested = ebpf_jit_enabled();
101106
102
- INIT_LIST_HEAD_RCU(&fp->aux->ksym_lnode);
107
+ INIT_LIST_HEAD_RCU(&fp->aux->ksym.lnode);
108
+ mutex_init(&fp->aux->used_maps_mutex);
109
+ mutex_init(&fp->aux->dst_mutex);
103110
104111 return fp;
105112 }
113
+
114
+struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
115
+{
116
+ gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags;
117
+ struct bpf_prog *prog;
118
+ int cpu;
119
+
120
+ prog = bpf_prog_alloc_no_stats(size, gfp_extra_flags);
121
+ if (!prog)
122
+ return NULL;
123
+
124
+ prog->aux->stats = alloc_percpu_gfp(struct bpf_prog_stats, gfp_flags);
125
+ if (!prog->aux->stats) {
126
+ kfree(prog->aux);
127
+ vfree(prog);
128
+ return NULL;
129
+ }
130
+
131
+ for_each_possible_cpu(cpu) {
132
+ struct bpf_prog_stats *pstats;
133
+
134
+ pstats = per_cpu_ptr(prog->aux->stats, cpu);
135
+ u64_stats_init(&pstats->syncp);
136
+ }
137
+ return prog;
138
+}
106139 EXPORT_SYMBOL_GPL(bpf_prog_alloc);
140
+
141
+int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog)
142
+{
143
+ if (!prog->aux->nr_linfo || !prog->jit_requested)
144
+ return 0;
145
+
146
+ prog->aux->jited_linfo = kcalloc(prog->aux->nr_linfo,
147
+ sizeof(*prog->aux->jited_linfo),
148
+ GFP_KERNEL | __GFP_NOWARN);
149
+ if (!prog->aux->jited_linfo)
150
+ return -ENOMEM;
151
+
152
+ return 0;
153
+}
154
+
155
+void bpf_prog_free_jited_linfo(struct bpf_prog *prog)
156
+{
157
+ kfree(prog->aux->jited_linfo);
158
+ prog->aux->jited_linfo = NULL;
159
+}
160
+
161
+void bpf_prog_free_unused_jited_linfo(struct bpf_prog *prog)
162
+{
163
+ if (prog->aux->jited_linfo && !prog->aux->jited_linfo[0])
164
+ bpf_prog_free_jited_linfo(prog);
165
+}
166
+
167
+/* The jit engine is responsible to provide an array
168
+ * for insn_off to the jited_off mapping (insn_to_jit_off).
169
+ *
170
+ * The idx to this array is the insn_off. Hence, the insn_off
171
+ * here is relative to the prog itself instead of the main prog.
172
+ * This array has one entry for each xlated bpf insn.
173
+ *
174
+ * jited_off is the byte off to the last byte of the jited insn.
175
+ *
176
+ * Hence, with
177
+ * insn_start:
178
+ * The first bpf insn off of the prog. The insn off
179
+ * here is relative to the main prog.
180
+ * e.g. if prog is a subprog, insn_start > 0
181
+ * linfo_idx:
182
+ * The prog's idx to prog->aux->linfo and jited_linfo
183
+ *
184
+ * jited_linfo[linfo_idx] = prog->bpf_func
185
+ *
186
+ * For i > linfo_idx,
187
+ *
188
+ * jited_linfo[i] = prog->bpf_func +
189
+ * insn_to_jit_off[linfo[i].insn_off - insn_start - 1]
190
+ */
191
+void bpf_prog_fill_jited_linfo(struct bpf_prog *prog,
192
+ const u32 *insn_to_jit_off)
193
+{
194
+ u32 linfo_idx, insn_start, insn_end, nr_linfo, i;
195
+ const struct bpf_line_info *linfo;
196
+ void **jited_linfo;
197
+
198
+ if (!prog->aux->jited_linfo)
199
+ /* Userspace did not provide linfo */
200
+ return;
201
+
202
+ linfo_idx = prog->aux->linfo_idx;
203
+ linfo = &prog->aux->linfo[linfo_idx];
204
+ insn_start = linfo[0].insn_off;
205
+ insn_end = insn_start + prog->len;
206
+
207
+ jited_linfo = &prog->aux->jited_linfo[linfo_idx];
208
+ jited_linfo[0] = prog->bpf_func;
209
+
210
+ nr_linfo = prog->aux->nr_linfo - linfo_idx;
211
+
212
+ for (i = 1; i < nr_linfo && linfo[i].insn_off < insn_end; i++)
213
+ /* The verifier ensures that linfo[i].insn_off is
214
+ * strictly increasing
215
+ */
216
+ jited_linfo[i] = prog->bpf_func +
217
+ insn_to_jit_off[linfo[i].insn_off - insn_start - 1];
218
+}
219
+
220
+void bpf_prog_free_linfo(struct bpf_prog *prog)
221
+{
222
+ bpf_prog_free_jited_linfo(prog);
223
+ kvfree(prog->aux->linfo);
224
+}
107225
108226 struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
109227 gfp_t gfp_extra_flags)
....@@ -112,8 +230,6 @@
112230 struct bpf_prog *fp;
113231 u32 pages, delta;
114232 int ret;
115
-
116
- BUG_ON(fp_old == NULL);
117233
118234 size = round_up(size, PAGE_SIZE);
119235 pages = size / PAGE_SIZE;
....@@ -125,7 +241,7 @@
125241 if (ret)
126242 return NULL;
127243
128
- fp = __vmalloc(size, gfp_flags, PAGE_KERNEL);
244
+ fp = __vmalloc(size, gfp_flags);
129245 if (fp == NULL) {
130246 __bpf_prog_uncharge(fp_old->aux->user, delta);
131247 } else {
....@@ -145,16 +261,22 @@
145261
146262 void __bpf_prog_free(struct bpf_prog *fp)
147263 {
148
- kfree(fp->aux);
264
+ if (fp->aux) {
265
+ mutex_destroy(&fp->aux->used_maps_mutex);
266
+ mutex_destroy(&fp->aux->dst_mutex);
267
+ free_percpu(fp->aux->stats);
268
+ kfree(fp->aux->poke_tab);
269
+ kfree(fp->aux);
270
+ }
149271 vfree(fp);
150272 }
151273
152274 int bpf_prog_calc_tag(struct bpf_prog *fp)
153275 {
154
- const u32 bits_offset = SHA_MESSAGE_BYTES - sizeof(__be64);
276
+ const u32 bits_offset = SHA1_BLOCK_SIZE - sizeof(__be64);
155277 u32 raw_size = bpf_prog_tag_scratch_size(fp);
156
- u32 digest[SHA_DIGEST_WORDS];
157
- u32 ws[SHA_WORKSPACE_WORDS];
278
+ u32 digest[SHA1_DIGEST_WORDS];
279
+ u32 ws[SHA1_WORKSPACE_WORDS];
158280 u32 i, bsize, psize, blocks;
159281 struct bpf_insn *dst;
160282 bool was_ld_map;
....@@ -166,7 +288,7 @@
166288 if (!raw)
167289 return -ENOMEM;
168290
169
- sha_init(digest);
291
+ sha1_init(digest);
170292 memset(ws, 0, sizeof(ws));
171293
172294 /* We need to take out the map fd for the digest calculation
....@@ -177,7 +299,8 @@
177299 dst[i] = fp->insnsi[i];
178300 if (!was_ld_map &&
179301 dst[i].code == (BPF_LD | BPF_IMM | BPF_DW) &&
180
- dst[i].src_reg == BPF_PSEUDO_MAP_FD) {
302
+ (dst[i].src_reg == BPF_PSEUDO_MAP_FD ||
303
+ dst[i].src_reg == BPF_PSEUDO_MAP_VALUE)) {
181304 was_ld_map = true;
182305 dst[i].imm = 0;
183306 } else if (was_ld_map &&
....@@ -196,8 +319,8 @@
196319 memset(&raw[psize], 0, raw_size - psize);
197320 raw[psize++] = 0x80;
198321
199
- bsize = round_up(psize, SHA_MESSAGE_BYTES);
200
- blocks = bsize / SHA_MESSAGE_BYTES;
322
+ bsize = round_up(psize, SHA1_BLOCK_SIZE);
323
+ blocks = bsize / SHA1_BLOCK_SIZE;
201324 todo = raw;
202325 if (bsize - psize >= sizeof(__be64)) {
203326 bits = (__be64 *)(todo + bsize - sizeof(__be64));
....@@ -208,12 +331,12 @@
208331 *bits = cpu_to_be64((psize - 1) << 3);
209332
210333 while (blocks--) {
211
- sha_transform(digest, todo, ws);
212
- todo += SHA_MESSAGE_BYTES;
334
+ sha1_transform(digest, todo, ws);
335
+ todo += SHA1_BLOCK_SIZE;
213336 }
214337
215338 result = (__force __be32 *)digest;
216
- for (i = 0; i < SHA_DIGEST_WORDS; i++)
339
+ for (i = 0; i < SHA1_DIGEST_WORDS; i++)
217340 result[i] = cpu_to_be32(digest[i]);
218341 memcpy(fp->tag, result, sizeof(fp->tag));
219342
....@@ -221,15 +344,16 @@
221344 return 0;
222345 }
223346
224
-static int bpf_adj_delta_to_imm(struct bpf_insn *insn, u32 pos, u32 delta,
225
- u32 curr, const bool probe_pass)
347
+static int bpf_adj_delta_to_imm(struct bpf_insn *insn, u32 pos, s32 end_old,
348
+ s32 end_new, s32 curr, const bool probe_pass)
226349 {
227350 const s64 imm_min = S32_MIN, imm_max = S32_MAX;
351
+ s32 delta = end_new - end_old;
228352 s64 imm = insn->imm;
229353
230
- if (curr < pos && curr + imm + 1 > pos)
354
+ if (curr < pos && curr + imm + 1 >= end_old)
231355 imm += delta;
232
- else if (curr > pos + delta && curr + imm + 1 <= pos + delta)
356
+ else if (curr >= end_new && curr + imm + 1 < end_new)
233357 imm -= delta;
234358 if (imm < imm_min || imm > imm_max)
235359 return -ERANGE;
....@@ -238,15 +362,16 @@
238362 return 0;
239363 }
240364
241
-static int bpf_adj_delta_to_off(struct bpf_insn *insn, u32 pos, u32 delta,
242
- u32 curr, const bool probe_pass)
365
+static int bpf_adj_delta_to_off(struct bpf_insn *insn, u32 pos, s32 end_old,
366
+ s32 end_new, s32 curr, const bool probe_pass)
243367 {
244368 const s32 off_min = S16_MIN, off_max = S16_MAX;
369
+ s32 delta = end_new - end_old;
245370 s32 off = insn->off;
246371
247
- if (curr < pos && curr + off + 1 > pos)
372
+ if (curr < pos && curr + off + 1 >= end_old)
248373 off += delta;
249
- else if (curr > pos + delta && curr + off + 1 <= pos + delta)
374
+ else if (curr >= end_new && curr + off + 1 < end_new)
250375 off -= delta;
251376 if (off < off_min || off > off_max)
252377 return -ERANGE;
....@@ -255,10 +380,10 @@
255380 return 0;
256381 }
257382
258
-static int bpf_adj_branches(struct bpf_prog *prog, u32 pos, u32 delta,
259
- const bool probe_pass)
383
+static int bpf_adj_branches(struct bpf_prog *prog, u32 pos, s32 end_old,
384
+ s32 end_new, const bool probe_pass)
260385 {
261
- u32 i, insn_cnt = prog->len + (probe_pass ? delta : 0);
386
+ u32 i, insn_cnt = prog->len + (probe_pass ? end_new - end_old : 0);
262387 struct bpf_insn *insn = prog->insnsi;
263388 int ret = 0;
264389
....@@ -270,22 +395,23 @@
270395 * do any other adjustments. Therefore skip the patchlet.
271396 */
272397 if (probe_pass && i == pos) {
273
- i += delta + 1;
274
- insn++;
398
+ i = end_new;
399
+ insn = prog->insnsi + end_old;
275400 }
276401 code = insn->code;
277
- if (BPF_CLASS(code) != BPF_JMP ||
402
+ if ((BPF_CLASS(code) != BPF_JMP &&
403
+ BPF_CLASS(code) != BPF_JMP32) ||
278404 BPF_OP(code) == BPF_EXIT)
279405 continue;
280406 /* Adjust offset of jmps if we cross patch boundaries. */
281407 if (BPF_OP(code) == BPF_CALL) {
282408 if (insn->src_reg != BPF_PSEUDO_CALL)
283409 continue;
284
- ret = bpf_adj_delta_to_imm(insn, pos, delta, i,
285
- probe_pass);
410
+ ret = bpf_adj_delta_to_imm(insn, pos, end_old,
411
+ end_new, i, probe_pass);
286412 } else {
287
- ret = bpf_adj_delta_to_off(insn, pos, delta, i,
288
- probe_pass);
413
+ ret = bpf_adj_delta_to_off(insn, pos, end_old,
414
+ end_new, i, probe_pass);
289415 }
290416 if (ret)
291417 break;
....@@ -294,12 +420,33 @@
294420 return ret;
295421 }
296422
423
+static void bpf_adj_linfo(struct bpf_prog *prog, u32 off, u32 delta)
424
+{
425
+ struct bpf_line_info *linfo;
426
+ u32 i, nr_linfo;
427
+
428
+ nr_linfo = prog->aux->nr_linfo;
429
+ if (!nr_linfo || !delta)
430
+ return;
431
+
432
+ linfo = prog->aux->linfo;
433
+
434
+ for (i = 0; i < nr_linfo; i++)
435
+ if (off < linfo[i].insn_off)
436
+ break;
437
+
438
+ /* Push all off < linfo[i].insn_off by delta */
439
+ for (; i < nr_linfo; i++)
440
+ linfo[i].insn_off += delta;
441
+}
442
+
297443 struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
298444 const struct bpf_insn *patch, u32 len)
299445 {
300446 u32 insn_adj_cnt, insn_rest, insn_delta = len - 1;
301447 const u32 cnt_max = S16_MAX;
302448 struct bpf_prog *prog_adj;
449
+ int err;
303450
304451 /* Since our patchlet doesn't expand the image, we're done. */
305452 if (insn_delta == 0) {
....@@ -315,8 +462,8 @@
315462 * we afterwards may not fail anymore.
316463 */
317464 if (insn_adj_cnt > cnt_max &&
318
- bpf_adj_branches(prog, off, insn_delta, true))
319
- return NULL;
465
+ (err = bpf_adj_branches(prog, off, off + 1, off + len, true)))
466
+ return ERR_PTR(err);
320467
321468 /* Several new instructions need to be inserted. Make room
322469 * for them. Likely, there's no need for a new allocation as
....@@ -325,7 +472,7 @@
325472 prog_adj = bpf_prog_realloc(prog, bpf_prog_size(insn_adj_cnt),
326473 GFP_USER);
327474 if (!prog_adj)
328
- return NULL;
475
+ return ERR_PTR(-ENOMEM);
329476
330477 prog_adj->len = insn_adj_cnt;
331478
....@@ -347,12 +494,26 @@
347494 * the ship has sailed to reverse to the original state. An
348495 * overflow cannot happen at this point.
349496 */
350
- BUG_ON(bpf_adj_branches(prog_adj, off, insn_delta, false));
497
+ BUG_ON(bpf_adj_branches(prog_adj, off, off + 1, off + len, false));
498
+
499
+ bpf_adj_linfo(prog_adj, off, insn_delta);
351500
352501 return prog_adj;
353502 }
354503
355
-void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp)
504
+int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt)
505
+{
506
+ /* Branch offsets can't overflow when program is shrinking, no need
507
+ * to call bpf_adj_branches(..., true) here
508
+ */
509
+ memmove(prog->insnsi + off, prog->insnsi + off + cnt,
510
+ sizeof(struct bpf_insn) * (prog->len - off - cnt));
511
+ prog->len -= cnt;
512
+
513
+ return WARN_ON_ONCE(bpf_adj_branches(prog, off, off + cnt, off, false));
514
+}
515
+
516
+static void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp)
356517 {
357518 int i;
358519
....@@ -368,29 +529,31 @@
368529
369530 #ifdef CONFIG_BPF_JIT
370531 /* All BPF JIT sysctl knobs here. */
371
-int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON);
532
+int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_DEFAULT_ON);
533
+int bpf_jit_kallsyms __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_DEFAULT_ON);
372534 int bpf_jit_harden __read_mostly;
373
-int bpf_jit_kallsyms __read_mostly;
374535 long bpf_jit_limit __read_mostly;
375536 long bpf_jit_limit_max __read_mostly;
376537
377
-static __always_inline void
378
-bpf_get_prog_addr_region(const struct bpf_prog *prog,
379
- unsigned long *symbol_start,
380
- unsigned long *symbol_end)
538
+static void
539
+bpf_prog_ksym_set_addr(struct bpf_prog *prog)
381540 {
382541 const struct bpf_binary_header *hdr = bpf_jit_binary_hdr(prog);
383542 unsigned long addr = (unsigned long)hdr;
384543
385544 WARN_ON_ONCE(!bpf_prog_ebpf_jited(prog));
386545
387
- *symbol_start = addr;
388
- *symbol_end = addr + hdr->pages * PAGE_SIZE;
546
+ prog->aux->ksym.start = (unsigned long) prog->bpf_func;
547
+ prog->aux->ksym.end = addr + hdr->pages * PAGE_SIZE;
389548 }
390549
391
-static void bpf_get_prog_name(const struct bpf_prog *prog, char *sym)
550
+static void
551
+bpf_prog_ksym_set_name(struct bpf_prog *prog)
392552 {
553
+ char *sym = prog->aux->ksym.name;
393554 const char *end = sym + KSYM_NAME_LEN;
555
+ const struct btf_type *type;
556
+ const char *func_name;
394557
395558 BUILD_BUG_ON(sizeof("bpf_prog_") +
396559 sizeof(prog->tag) * 2 +
....@@ -405,42 +568,43 @@
405568
406569 sym += snprintf(sym, KSYM_NAME_LEN, "bpf_prog_");
407570 sym = bin2hex(sym, prog->tag, sizeof(prog->tag));
571
+
572
+ /* prog->aux->name will be ignored if full btf name is available */
573
+ if (prog->aux->func_info_cnt) {
574
+ type = btf_type_by_id(prog->aux->btf,
575
+ prog->aux->func_info[prog->aux->func_idx].type_id);
576
+ func_name = btf_name_by_offset(prog->aux->btf, type->name_off);
577
+ snprintf(sym, (size_t)(end - sym), "_%s", func_name);
578
+ return;
579
+ }
580
+
408581 if (prog->aux->name[0])
409582 snprintf(sym, (size_t)(end - sym), "_%s", prog->aux->name);
410583 else
411584 *sym = 0;
412585 }
413586
414
-static __always_inline unsigned long
415
-bpf_get_prog_addr_start(struct latch_tree_node *n)
587
+static unsigned long bpf_get_ksym_start(struct latch_tree_node *n)
416588 {
417
- unsigned long symbol_start, symbol_end;
418
- const struct bpf_prog_aux *aux;
419
-
420
- aux = container_of(n, struct bpf_prog_aux, ksym_tnode);
421
- bpf_get_prog_addr_region(aux->prog, &symbol_start, &symbol_end);
422
-
423
- return symbol_start;
589
+ return container_of(n, struct bpf_ksym, tnode)->start;
424590 }
425591
426592 static __always_inline bool bpf_tree_less(struct latch_tree_node *a,
427593 struct latch_tree_node *b)
428594 {
429
- return bpf_get_prog_addr_start(a) < bpf_get_prog_addr_start(b);
595
+ return bpf_get_ksym_start(a) < bpf_get_ksym_start(b);
430596 }
431597
432598 static __always_inline int bpf_tree_comp(void *key, struct latch_tree_node *n)
433599 {
434600 unsigned long val = (unsigned long)key;
435
- unsigned long symbol_start, symbol_end;
436
- const struct bpf_prog_aux *aux;
601
+ const struct bpf_ksym *ksym;
437602
438
- aux = container_of(n, struct bpf_prog_aux, ksym_tnode);
439
- bpf_get_prog_addr_region(aux->prog, &symbol_start, &symbol_end);
603
+ ksym = container_of(n, struct bpf_ksym, tnode);
440604
441
- if (val < symbol_start)
605
+ if (val < ksym->start)
442606 return -1;
443
- if (val >= symbol_end)
607
+ if (val >= ksym->end)
444608 return 1;
445609
446610 return 0;
....@@ -455,20 +619,29 @@
455619 static LIST_HEAD(bpf_kallsyms);
456620 static struct latch_tree_root bpf_tree __cacheline_aligned;
457621
458
-static void bpf_prog_ksym_node_add(struct bpf_prog_aux *aux)
622
+void bpf_ksym_add(struct bpf_ksym *ksym)
459623 {
460
- WARN_ON_ONCE(!list_empty(&aux->ksym_lnode));
461
- list_add_tail_rcu(&aux->ksym_lnode, &bpf_kallsyms);
462
- latch_tree_insert(&aux->ksym_tnode, &bpf_tree, &bpf_tree_ops);
624
+ spin_lock_bh(&bpf_lock);
625
+ WARN_ON_ONCE(!list_empty(&ksym->lnode));
626
+ list_add_tail_rcu(&ksym->lnode, &bpf_kallsyms);
627
+ latch_tree_insert(&ksym->tnode, &bpf_tree, &bpf_tree_ops);
628
+ spin_unlock_bh(&bpf_lock);
463629 }
464630
465
-static void bpf_prog_ksym_node_del(struct bpf_prog_aux *aux)
631
+static void __bpf_ksym_del(struct bpf_ksym *ksym)
466632 {
467
- if (list_empty(&aux->ksym_lnode))
633
+ if (list_empty(&ksym->lnode))
468634 return;
469635
470
- latch_tree_erase(&aux->ksym_tnode, &bpf_tree, &bpf_tree_ops);
471
- list_del_rcu(&aux->ksym_lnode);
636
+ latch_tree_erase(&ksym->tnode, &bpf_tree, &bpf_tree_ops);
637
+ list_del_rcu(&ksym->lnode);
638
+}
639
+
640
+void bpf_ksym_del(struct bpf_ksym *ksym)
641
+{
642
+ spin_lock_bh(&bpf_lock);
643
+ __bpf_ksym_del(ksym);
644
+ spin_unlock_bh(&bpf_lock);
472645 }
473646
474647 static bool bpf_prog_kallsyms_candidate(const struct bpf_prog *fp)
....@@ -478,19 +651,21 @@
478651
479652 static bool bpf_prog_kallsyms_verify_off(const struct bpf_prog *fp)
480653 {
481
- return list_empty(&fp->aux->ksym_lnode) ||
482
- fp->aux->ksym_lnode.prev == LIST_POISON2;
654
+ return list_empty(&fp->aux->ksym.lnode) ||
655
+ fp->aux->ksym.lnode.prev == LIST_POISON2;
483656 }
484657
485658 void bpf_prog_kallsyms_add(struct bpf_prog *fp)
486659 {
487660 if (!bpf_prog_kallsyms_candidate(fp) ||
488
- !capable(CAP_SYS_ADMIN))
661
+ !bpf_capable())
489662 return;
490663
491
- spin_lock_bh(&bpf_lock);
492
- bpf_prog_ksym_node_add(fp->aux);
493
- spin_unlock_bh(&bpf_lock);
664
+ bpf_prog_ksym_set_addr(fp);
665
+ bpf_prog_ksym_set_name(fp);
666
+ fp->aux->ksym.prog = true;
667
+
668
+ bpf_ksym_add(&fp->aux->ksym);
494669 }
495670
496671 void bpf_prog_kallsyms_del(struct bpf_prog *fp)
....@@ -498,36 +673,30 @@
498673 if (!bpf_prog_kallsyms_candidate(fp))
499674 return;
500675
501
- spin_lock_bh(&bpf_lock);
502
- bpf_prog_ksym_node_del(fp->aux);
503
- spin_unlock_bh(&bpf_lock);
676
+ bpf_ksym_del(&fp->aux->ksym);
504677 }
505678
506
-static struct bpf_prog *bpf_prog_kallsyms_find(unsigned long addr)
679
+static struct bpf_ksym *bpf_ksym_find(unsigned long addr)
507680 {
508681 struct latch_tree_node *n;
509682
510
- if (!bpf_jit_kallsyms_enabled())
511
- return NULL;
512
-
513683 n = latch_tree_find((void *)addr, &bpf_tree, &bpf_tree_ops);
514
- return n ?
515
- container_of(n, struct bpf_prog_aux, ksym_tnode)->prog :
516
- NULL;
684
+ return n ? container_of(n, struct bpf_ksym, tnode) : NULL;
517685 }
518686
519687 const char *__bpf_address_lookup(unsigned long addr, unsigned long *size,
520688 unsigned long *off, char *sym)
521689 {
522
- unsigned long symbol_start, symbol_end;
523
- struct bpf_prog *prog;
690
+ struct bpf_ksym *ksym;
524691 char *ret = NULL;
525692
526693 rcu_read_lock();
527
- prog = bpf_prog_kallsyms_find(addr);
528
- if (prog) {
529
- bpf_get_prog_addr_region(prog, &symbol_start, &symbol_end);
530
- bpf_get_prog_name(prog, sym);
694
+ ksym = bpf_ksym_find(addr);
695
+ if (ksym) {
696
+ unsigned long symbol_start = ksym->start;
697
+ unsigned long symbol_end = ksym->end;
698
+
699
+ strncpy(sym, ksym->name, KSYM_NAME_LEN);
531700
532701 ret = sym;
533702 if (size)
....@@ -545,17 +714,43 @@
545714 bool ret;
546715
547716 rcu_read_lock();
548
- ret = bpf_prog_kallsyms_find(addr) != NULL;
717
+ ret = bpf_ksym_find(addr) != NULL;
549718 rcu_read_unlock();
550719
551720 return ret;
552721 }
553722
723
+static struct bpf_prog *bpf_prog_ksym_find(unsigned long addr)
724
+{
725
+ struct bpf_ksym *ksym = bpf_ksym_find(addr);
726
+
727
+ return ksym && ksym->prog ?
728
+ container_of(ksym, struct bpf_prog_aux, ksym)->prog :
729
+ NULL;
730
+}
731
+
732
+const struct exception_table_entry *search_bpf_extables(unsigned long addr)
733
+{
734
+ const struct exception_table_entry *e = NULL;
735
+ struct bpf_prog *prog;
736
+
737
+ rcu_read_lock();
738
+ prog = bpf_prog_ksym_find(addr);
739
+ if (!prog)
740
+ goto out;
741
+ if (!prog->aux->num_exentries)
742
+ goto out;
743
+
744
+ e = search_extable(prog->aux->extable, prog->aux->num_exentries, addr);
745
+out:
746
+ rcu_read_unlock();
747
+ return e;
748
+}
749
+
554750 int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
555751 char *sym)
556752 {
557
- unsigned long symbol_start, symbol_end;
558
- struct bpf_prog_aux *aux;
753
+ struct bpf_ksym *ksym;
559754 unsigned int it = 0;
560755 int ret = -ERANGE;
561756
....@@ -563,14 +758,13 @@
563758 return ret;
564759
565760 rcu_read_lock();
566
- list_for_each_entry_rcu(aux, &bpf_kallsyms, ksym_lnode) {
761
+ list_for_each_entry_rcu(ksym, &bpf_kallsyms, lnode) {
567762 if (it++ != symnum)
568763 continue;
569764
570
- bpf_get_prog_addr_region(aux->prog, &symbol_start, &symbol_end);
571
- bpf_get_prog_name(aux->prog, sym);
765
+ strncpy(sym, ksym->name, KSYM_NAME_LEN);
572766
573
- *value = symbol_start;
767
+ *value = ksym->start;
574768 *type = BPF_SYM_ELF_TYPE;
575769
576770 ret = 0;
....@@ -579,6 +773,40 @@
579773 rcu_read_unlock();
580774
581775 return ret;
776
+}
777
+
778
+int bpf_jit_add_poke_descriptor(struct bpf_prog *prog,
779
+ struct bpf_jit_poke_descriptor *poke)
780
+{
781
+ struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab;
782
+ static const u32 poke_tab_max = 1024;
783
+ u32 slot = prog->aux->size_poke_tab;
784
+ u32 size = slot + 1;
785
+
786
+ if (size > poke_tab_max)
787
+ return -ENOSPC;
788
+ if (poke->tailcall_target || poke->tailcall_target_stable ||
789
+ poke->tailcall_bypass || poke->adj_off || poke->bypass_addr)
790
+ return -EINVAL;
791
+
792
+ switch (poke->reason) {
793
+ case BPF_POKE_REASON_TAIL_CALL:
794
+ if (!poke->tail_call.map)
795
+ return -EINVAL;
796
+ break;
797
+ default:
798
+ return -EINVAL;
799
+ }
800
+
801
+ tab = krealloc(tab, size * sizeof(*poke), GFP_KERNEL);
802
+ if (!tab)
803
+ return -ENOMEM;
804
+
805
+ memcpy(&tab[slot], poke, sizeof(*poke));
806
+ prog->aux->size_poke_tab = size;
807
+ prog->aux->poke_tab = tab;
808
+
809
+ return slot;
582810 }
583811
584812 static atomic_long_t bpf_jit_current;
....@@ -600,17 +828,17 @@
600828 {
601829 /* Only used as heuristic here to derive limit. */
602830 bpf_jit_limit_max = bpf_jit_alloc_exec_limit();
603
- bpf_jit_limit = min_t(u64, round_up(bpf_jit_limit_max >> 2,
831
+ bpf_jit_limit = min_t(u64, round_up(bpf_jit_limit_max >> 1,
604832 PAGE_SIZE), LONG_MAX);
605833 return 0;
606834 }
607835 pure_initcall(bpf_jit_charge_init);
608836
609
-static int bpf_jit_charge_modmem(u32 pages)
837
+int bpf_jit_charge_modmem(u32 pages)
610838 {
611839 if (atomic_long_add_return(pages, &bpf_jit_current) >
612840 (bpf_jit_limit >> PAGE_SHIFT)) {
613
- if (!capable(CAP_SYS_ADMIN)) {
841
+ if (!bpf_capable()) {
614842 atomic_long_sub(pages, &bpf_jit_current);
615843 return -EPERM;
616844 }
....@@ -619,18 +847,20 @@
619847 return 0;
620848 }
621849
622
-static void bpf_jit_uncharge_modmem(u32 pages)
850
+void bpf_jit_uncharge_modmem(u32 pages)
623851 {
624852 atomic_long_sub(pages, &bpf_jit_current);
625853 }
626854
627
-#if IS_ENABLED(CONFIG_BPF_JIT) && IS_ENABLED(CONFIG_CFI_CLANG)
628
-bool __weak arch_bpf_jit_check_func(const struct bpf_prog *prog)
855
+void *__weak bpf_jit_alloc_exec(unsigned long size)
629856 {
630
- return true;
857
+ return module_alloc(size);
631858 }
632
-EXPORT_SYMBOL_GPL(arch_bpf_jit_check_func);
633
-#endif
859
+
860
+void __weak bpf_jit_free_exec(void *addr)
861
+{
862
+ module_memfree(addr);
863
+}
634864
635865 struct bpf_binary_header *
636866 bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
....@@ -639,6 +869,9 @@
639869 {
640870 struct bpf_binary_header *hdr;
641871 u32 size, hole, start, pages;
872
+
873
+ WARN_ON_ONCE(!is_power_of_2(alignment) ||
874
+ alignment > BPF_IMAGE_ALIGNMENT);
642875
643876 /* Most of BPF filters are really small, but if some of them
644877 * fill a page, allow at least 128 extra bytes to insert a
....@@ -649,7 +882,7 @@
649882
650883 if (bpf_jit_charge_modmem(pages))
651884 return NULL;
652
- hdr = module_alloc(size);
885
+ hdr = bpf_jit_alloc_exec(size);
653886 if (!hdr) {
654887 bpf_jit_uncharge_modmem(pages);
655888 return NULL;
....@@ -658,7 +891,6 @@
658891 /* Fill space with illegal/arch-dep instructions. */
659892 bpf_fill_ill_insns(hdr, size);
660893
661
- bpf_jit_set_header_magic(hdr);
662894 hdr->pages = pages;
663895 hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)),
664896 PAGE_SIZE - sizeof(*hdr));
....@@ -674,7 +906,9 @@
674906 {
675907 u32 pages = hdr->pages;
676908
677
- module_memfree(hdr);
909
+ trace_android_vh_set_memory_rw((unsigned long)hdr, pages);
910
+ trace_android_vh_set_memory_nx((unsigned long)hdr, pages);
911
+ bpf_jit_free_exec(hdr);
678912 bpf_jit_uncharge_modmem(pages);
679913 }
680914
....@@ -687,7 +921,6 @@
687921 if (fp->jited) {
688922 struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp);
689923
690
- bpf_jit_binary_unlock_ro(hdr);
691924 bpf_jit_binary_free(hdr);
692925
693926 WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(fp));
....@@ -696,9 +929,44 @@
696929 bpf_prog_unlock_free(fp);
697930 }
698931
932
+int bpf_jit_get_func_addr(const struct bpf_prog *prog,
933
+ const struct bpf_insn *insn, bool extra_pass,
934
+ u64 *func_addr, bool *func_addr_fixed)
935
+{
936
+ s16 off = insn->off;
937
+ s32 imm = insn->imm;
938
+ u8 *addr;
939
+
940
+ *func_addr_fixed = insn->src_reg != BPF_PSEUDO_CALL;
941
+ if (!*func_addr_fixed) {
942
+ /* Place-holder address till the last pass has collected
943
+ * all addresses for JITed subprograms in which case we
944
+ * can pick them up from prog->aux.
945
+ */
946
+ if (!extra_pass)
947
+ addr = NULL;
948
+ else if (prog->aux->func &&
949
+ off >= 0 && off < prog->aux->func_cnt)
950
+ addr = (u8 *)prog->aux->func[off]->bpf_func;
951
+ else
952
+ return -EINVAL;
953
+ } else {
954
+ /* Address of a BPF helper call. Since part of the core
955
+ * kernel, it's always at a fixed location. __bpf_call_base
956
+ * and the helper with imm relative to it are both in core
957
+ * kernel.
958
+ */
959
+ addr = (u8 *)__bpf_call_base + imm;
960
+ }
961
+
962
+ *func_addr = (unsigned long)addr;
963
+ return 0;
964
+}
965
+
699966 static int bpf_jit_blind_insn(const struct bpf_insn *from,
700967 const struct bpf_insn *aux,
701
- struct bpf_insn *to_buff)
968
+ struct bpf_insn *to_buff,
969
+ bool emit_zext)
702970 {
703971 struct bpf_insn *to = to_buff;
704972 u32 imm_rnd = get_random_int();
....@@ -717,6 +985,9 @@
717985 * below.
718986 *
719987 * Constant blinding is only used by JITs, not in the interpreter.
988
+ * The interpreter uses AX in some occasions as a local temporary
989
+ * register e.g. in DIV or MOD instructions.
990
+ *
720991 * In restricted circumstances, the verifier can also use the AX
721992 * register for rewrites as long as they do not interfere with
722993 * the above cases!
....@@ -780,6 +1051,27 @@
7801051 *to++ = BPF_JMP_REG(from->code, from->dst_reg, BPF_REG_AX, off);
7811052 break;
7821053
1054
+ case BPF_JMP32 | BPF_JEQ | BPF_K:
1055
+ case BPF_JMP32 | BPF_JNE | BPF_K:
1056
+ case BPF_JMP32 | BPF_JGT | BPF_K:
1057
+ case BPF_JMP32 | BPF_JLT | BPF_K:
1058
+ case BPF_JMP32 | BPF_JGE | BPF_K:
1059
+ case BPF_JMP32 | BPF_JLE | BPF_K:
1060
+ case BPF_JMP32 | BPF_JSGT | BPF_K:
1061
+ case BPF_JMP32 | BPF_JSLT | BPF_K:
1062
+ case BPF_JMP32 | BPF_JSGE | BPF_K:
1063
+ case BPF_JMP32 | BPF_JSLE | BPF_K:
1064
+ case BPF_JMP32 | BPF_JSET | BPF_K:
1065
+ /* Accommodate for extra offset in case of a backjump. */
1066
+ off = from->off;
1067
+ if (off < 0)
1068
+ off -= 2;
1069
+ *to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
1070
+ *to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
1071
+ *to++ = BPF_JMP32_REG(from->code, from->dst_reg, BPF_REG_AX,
1072
+ off);
1073
+ break;
1074
+
7831075 case BPF_LD | BPF_IMM | BPF_DW:
7841076 *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[1].imm);
7851077 *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
....@@ -789,6 +1081,8 @@
7891081 case 0: /* Part 2 of BPF_LD | BPF_IMM | BPF_DW. */
7901082 *to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[0].imm);
7911083 *to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
1084
+ if (emit_zext)
1085
+ *to++ = BPF_ZEXT_REG(BPF_REG_AX);
7921086 *to++ = BPF_ALU64_REG(BPF_OR, aux[0].dst_reg, BPF_REG_AX);
7931087 break;
7941088
....@@ -811,7 +1105,7 @@
8111105 gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags;
8121106 struct bpf_prog *fp;
8131107
814
- fp = __vmalloc(fp_other->pages * PAGE_SIZE, gfp_flags, PAGE_KERNEL);
1108
+ fp = __vmalloc(fp_other->pages * PAGE_SIZE, gfp_flags);
8151109 if (fp != NULL) {
8161110 /* aux->prog still points to the fp_other one, so
8171111 * when promoting the clone to the real program,
....@@ -872,18 +1166,19 @@
8721166 insn[1].code == 0)
8731167 memcpy(aux, insn, sizeof(aux));
8741168
875
- rewritten = bpf_jit_blind_insn(insn, aux, insn_buff);
1169
+ rewritten = bpf_jit_blind_insn(insn, aux, insn_buff,
1170
+ clone->aux->verifier_zext);
8761171 if (!rewritten)
8771172 continue;
8781173
8791174 tmp = bpf_patch_insn_single(clone, i, insn_buff, rewritten);
880
- if (!tmp) {
1175
+ if (IS_ERR(tmp)) {
8811176 /* Patching may have repointed aux->prog during
8821177 * realloc from the original one, so we need to
8831178 * fix it up here on error.
8841179 */
8851180 bpf_jit_prog_release_other(prog, clone);
886
- return ERR_PTR(-ENOMEM);
1181
+ return tmp;
8871182 }
8881183
8891184 clone = tmp;
....@@ -916,32 +1211,34 @@
9161211 #define BPF_INSN_MAP(INSN_2, INSN_3) \
9171212 /* 32 bit ALU operations. */ \
9181213 /* Register based. */ \
919
- INSN_3(ALU, ADD, X), \
920
- INSN_3(ALU, SUB, X), \
921
- INSN_3(ALU, AND, X), \
922
- INSN_3(ALU, OR, X), \
923
- INSN_3(ALU, LSH, X), \
924
- INSN_3(ALU, RSH, X), \
925
- INSN_3(ALU, XOR, X), \
926
- INSN_3(ALU, MUL, X), \
927
- INSN_3(ALU, MOV, X), \
928
- INSN_3(ALU, DIV, X), \
929
- INSN_3(ALU, MOD, X), \
1214
+ INSN_3(ALU, ADD, X), \
1215
+ INSN_3(ALU, SUB, X), \
1216
+ INSN_3(ALU, AND, X), \
1217
+ INSN_3(ALU, OR, X), \
1218
+ INSN_3(ALU, LSH, X), \
1219
+ INSN_3(ALU, RSH, X), \
1220
+ INSN_3(ALU, XOR, X), \
1221
+ INSN_3(ALU, MUL, X), \
1222
+ INSN_3(ALU, MOV, X), \
1223
+ INSN_3(ALU, ARSH, X), \
1224
+ INSN_3(ALU, DIV, X), \
1225
+ INSN_3(ALU, MOD, X), \
9301226 INSN_2(ALU, NEG), \
9311227 INSN_3(ALU, END, TO_BE), \
9321228 INSN_3(ALU, END, TO_LE), \
9331229 /* Immediate based. */ \
934
- INSN_3(ALU, ADD, K), \
935
- INSN_3(ALU, SUB, K), \
936
- INSN_3(ALU, AND, K), \
937
- INSN_3(ALU, OR, K), \
938
- INSN_3(ALU, LSH, K), \
939
- INSN_3(ALU, RSH, K), \
940
- INSN_3(ALU, XOR, K), \
941
- INSN_3(ALU, MUL, K), \
942
- INSN_3(ALU, MOV, K), \
943
- INSN_3(ALU, DIV, K), \
944
- INSN_3(ALU, MOD, K), \
1230
+ INSN_3(ALU, ADD, K), \
1231
+ INSN_3(ALU, SUB, K), \
1232
+ INSN_3(ALU, AND, K), \
1233
+ INSN_3(ALU, OR, K), \
1234
+ INSN_3(ALU, LSH, K), \
1235
+ INSN_3(ALU, RSH, K), \
1236
+ INSN_3(ALU, XOR, K), \
1237
+ INSN_3(ALU, MUL, K), \
1238
+ INSN_3(ALU, MOV, K), \
1239
+ INSN_3(ALU, ARSH, K), \
1240
+ INSN_3(ALU, DIV, K), \
1241
+ INSN_3(ALU, MOD, K), \
9451242 /* 64 bit ALU operations. */ \
9461243 /* Register based. */ \
9471244 INSN_3(ALU64, ADD, X), \
....@@ -974,6 +1271,31 @@
9741271 INSN_2(JMP, CALL), \
9751272 /* Exit instruction. */ \
9761273 INSN_2(JMP, EXIT), \
1274
+ /* 32-bit Jump instructions. */ \
1275
+ /* Register based. */ \
1276
+ INSN_3(JMP32, JEQ, X), \
1277
+ INSN_3(JMP32, JNE, X), \
1278
+ INSN_3(JMP32, JGT, X), \
1279
+ INSN_3(JMP32, JLT, X), \
1280
+ INSN_3(JMP32, JGE, X), \
1281
+ INSN_3(JMP32, JLE, X), \
1282
+ INSN_3(JMP32, JSGT, X), \
1283
+ INSN_3(JMP32, JSLT, X), \
1284
+ INSN_3(JMP32, JSGE, X), \
1285
+ INSN_3(JMP32, JSLE, X), \
1286
+ INSN_3(JMP32, JSET, X), \
1287
+ /* Immediate based. */ \
1288
+ INSN_3(JMP32, JEQ, K), \
1289
+ INSN_3(JMP32, JNE, K), \
1290
+ INSN_3(JMP32, JGT, K), \
1291
+ INSN_3(JMP32, JLT, K), \
1292
+ INSN_3(JMP32, JGE, K), \
1293
+ INSN_3(JMP32, JLE, K), \
1294
+ INSN_3(JMP32, JSGT, K), \
1295
+ INSN_3(JMP32, JSLT, K), \
1296
+ INSN_3(JMP32, JSGE, K), \
1297
+ INSN_3(JMP32, JSLE, K), \
1298
+ INSN_3(JMP32, JSET, K), \
9771299 /* Jump instructions. */ \
9781300 /* Register based. */ \
9791301 INSN_3(JMP, JEQ, X), \
....@@ -1044,10 +1366,17 @@
10441366 }
10451367
10461368 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
1369
+u64 __weak bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
1370
+{
1371
+ memset(dst, 0, size);
1372
+ return -EFAULT;
1373
+}
1374
+
10471375 /**
10481376 * __bpf_prog_run - run eBPF program on a given context
1049
- * @ctx: is the data we are operating on
1377
+ * @regs: is the array of MAX_BPF_EXT_REG eBPF pseudo-registers
10501378 * @insn: is the array of eBPF instructions
1379
+ * @stack: is the eBPF storage stack
10511380 *
10521381 * Decode and execute eBPF instructions.
10531382 */
....@@ -1055,7 +1384,7 @@
10551384 {
10561385 #define BPF_INSN_2_LBL(x, y) [BPF_##x | BPF_##y] = &&x##_##y
10571386 #define BPF_INSN_3_LBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = &&x##_##y##_##z
1058
- static const void *jumptable[256] = {
1387
+ static const void * const jumptable[256] __annotate_jump_table = {
10591388 [0 ... 255] = &&default_label,
10601389 /* Now overwrite non-defaults ... */
10611390 BPF_INSN_MAP(BPF_INSN_2_LBL, BPF_INSN_3_LBL),
....@@ -1063,11 +1392,14 @@
10631392 [BPF_JMP | BPF_CALL_ARGS] = &&JMP_CALL_ARGS,
10641393 [BPF_JMP | BPF_TAIL_CALL] = &&JMP_TAIL_CALL,
10651394 [BPF_ST | BPF_NOSPEC] = &&ST_NOSPEC,
1395
+ [BPF_LDX | BPF_PROBE_MEM | BPF_B] = &&LDX_PROBE_MEM_B,
1396
+ [BPF_LDX | BPF_PROBE_MEM | BPF_H] = &&LDX_PROBE_MEM_H,
1397
+ [BPF_LDX | BPF_PROBE_MEM | BPF_W] = &&LDX_PROBE_MEM_W,
1398
+ [BPF_LDX | BPF_PROBE_MEM | BPF_DW] = &&LDX_PROBE_MEM_DW,
10661399 };
10671400 #undef BPF_INSN_3_LBL
10681401 #undef BPF_INSN_2_LBL
10691402 u32 tail_call_cnt = 0;
1070
- u64 tmp;
10711403
10721404 #define CONT ({ insn++; goto select_insn; })
10731405 #define CONT_JMP ({ insn++; goto select_insn; })
....@@ -1075,29 +1407,54 @@
10751407 select_insn:
10761408 goto *jumptable[insn->code];
10771409
1078
- /* ALU */
1079
-#define ALU(OPCODE, OP) \
1080
- ALU64_##OPCODE##_X: \
1081
- DST = DST OP SRC; \
1082
- CONT; \
1083
- ALU_##OPCODE##_X: \
1084
- DST = (u32) DST OP (u32) SRC; \
1085
- CONT; \
1086
- ALU64_##OPCODE##_K: \
1087
- DST = DST OP IMM; \
1088
- CONT; \
1089
- ALU_##OPCODE##_K: \
1090
- DST = (u32) DST OP (u32) IMM; \
1410
+ /* Explicitly mask the register-based shift amounts with 63 or 31
1411
+ * to avoid undefined behavior. Normally this won't affect the
1412
+ * generated code, for example, in case of native 64 bit archs such
1413
+ * as x86-64 or arm64, the compiler is optimizing the AND away for
1414
+ * the interpreter. In case of JITs, each of the JIT backends compiles
1415
+ * the BPF shift operations to machine instructions which produce
1416
+ * implementation-defined results in such a case; the resulting
1417
+ * contents of the register may be arbitrary, but program behaviour
1418
+ * as a whole remains defined. In other words, in case of JIT backends,
1419
+ * the AND must /not/ be added to the emitted LSH/RSH/ARSH translation.
1420
+ */
1421
+ /* ALU (shifts) */
1422
+#define SHT(OPCODE, OP) \
1423
+ ALU64_##OPCODE##_X: \
1424
+ DST = DST OP (SRC & 63); \
1425
+ CONT; \
1426
+ ALU_##OPCODE##_X: \
1427
+ DST = (u32) DST OP ((u32) SRC & 31); \
1428
+ CONT; \
1429
+ ALU64_##OPCODE##_K: \
1430
+ DST = DST OP IMM; \
1431
+ CONT; \
1432
+ ALU_##OPCODE##_K: \
1433
+ DST = (u32) DST OP (u32) IMM; \
10911434 CONT;
1092
-
1435
+ /* ALU (rest) */
1436
+#define ALU(OPCODE, OP) \
1437
+ ALU64_##OPCODE##_X: \
1438
+ DST = DST OP SRC; \
1439
+ CONT; \
1440
+ ALU_##OPCODE##_X: \
1441
+ DST = (u32) DST OP (u32) SRC; \
1442
+ CONT; \
1443
+ ALU64_##OPCODE##_K: \
1444
+ DST = DST OP IMM; \
1445
+ CONT; \
1446
+ ALU_##OPCODE##_K: \
1447
+ DST = (u32) DST OP (u32) IMM; \
1448
+ CONT;
10931449 ALU(ADD, +)
10941450 ALU(SUB, -)
10951451 ALU(AND, &)
10961452 ALU(OR, |)
1097
- ALU(LSH, <<)
1098
- ALU(RSH, >>)
10991453 ALU(XOR, ^)
11001454 ALU(MUL, *)
1455
+ SHT(LSH, <<)
1456
+ SHT(RSH, >>)
1457
+#undef SHT
11011458 #undef ALU
11021459 ALU_NEG:
11031460 DST = (u32) -DST;
....@@ -1121,43 +1478,49 @@
11211478 DST = (u64) (u32) insn[0].imm | ((u64) (u32) insn[1].imm) << 32;
11221479 insn++;
11231480 CONT;
1481
+ ALU_ARSH_X:
1482
+ DST = (u64) (u32) (((s32) DST) >> (SRC & 31));
1483
+ CONT;
1484
+ ALU_ARSH_K:
1485
+ DST = (u64) (u32) (((s32) DST) >> IMM);
1486
+ CONT;
11241487 ALU64_ARSH_X:
1125
- (*(s64 *) &DST) >>= SRC;
1488
+ (*(s64 *) &DST) >>= (SRC & 63);
11261489 CONT;
11271490 ALU64_ARSH_K:
11281491 (*(s64 *) &DST) >>= IMM;
11291492 CONT;
11301493 ALU64_MOD_X:
1131
- div64_u64_rem(DST, SRC, &tmp);
1132
- DST = tmp;
1494
+ div64_u64_rem(DST, SRC, &AX);
1495
+ DST = AX;
11331496 CONT;
11341497 ALU_MOD_X:
1135
- tmp = (u32) DST;
1136
- DST = do_div(tmp, (u32) SRC);
1498
+ AX = (u32) DST;
1499
+ DST = do_div(AX, (u32) SRC);
11371500 CONT;
11381501 ALU64_MOD_K:
1139
- div64_u64_rem(DST, IMM, &tmp);
1140
- DST = tmp;
1502
+ div64_u64_rem(DST, IMM, &AX);
1503
+ DST = AX;
11411504 CONT;
11421505 ALU_MOD_K:
1143
- tmp = (u32) DST;
1144
- DST = do_div(tmp, (u32) IMM);
1506
+ AX = (u32) DST;
1507
+ DST = do_div(AX, (u32) IMM);
11451508 CONT;
11461509 ALU64_DIV_X:
11471510 DST = div64_u64(DST, SRC);
11481511 CONT;
11491512 ALU_DIV_X:
1150
- tmp = (u32) DST;
1151
- do_div(tmp, (u32) SRC);
1152
- DST = (u32) tmp;
1513
+ AX = (u32) DST;
1514
+ do_div(AX, (u32) SRC);
1515
+ DST = (u32) AX;
11531516 CONT;
11541517 ALU64_DIV_K:
11551518 DST = div64_u64(DST, IMM);
11561519 CONT;
11571520 ALU_DIV_K:
1158
- tmp = (u32) DST;
1159
- do_div(tmp, (u32) IMM);
1160
- DST = (u32) tmp;
1521
+ AX = (u32) DST;
1522
+ do_div(AX, (u32) IMM);
1523
+ DST = (u32) AX;
11611524 CONT;
11621525 ALU_END_TO_BE:
11631526 switch (IMM) {
....@@ -1222,7 +1585,7 @@
12221585
12231586 /* ARG1 at this point is guaranteed to point to CTX from
12241587 * the verifier side due to the fact that the tail call is
1225
- * handeled like a helper, that is, bpf_tail_call_proto,
1588
+ * handled like a helper, that is, bpf_tail_call_proto,
12261589 * where arg1_type is ARG_PTR_TO_CTX.
12271590 */
12281591 insn = prog->insnsi;
....@@ -1230,145 +1593,49 @@
12301593 out:
12311594 CONT;
12321595 }
1233
- /* JMP */
12341596 JMP_JA:
12351597 insn += insn->off;
12361598 CONT;
1237
- JMP_JEQ_X:
1238
- if (DST == SRC) {
1239
- insn += insn->off;
1240
- CONT_JMP;
1241
- }
1242
- CONT;
1243
- JMP_JEQ_K:
1244
- if (DST == IMM) {
1245
- insn += insn->off;
1246
- CONT_JMP;
1247
- }
1248
- CONT;
1249
- JMP_JNE_X:
1250
- if (DST != SRC) {
1251
- insn += insn->off;
1252
- CONT_JMP;
1253
- }
1254
- CONT;
1255
- JMP_JNE_K:
1256
- if (DST != IMM) {
1257
- insn += insn->off;
1258
- CONT_JMP;
1259
- }
1260
- CONT;
1261
- JMP_JGT_X:
1262
- if (DST > SRC) {
1263
- insn += insn->off;
1264
- CONT_JMP;
1265
- }
1266
- CONT;
1267
- JMP_JGT_K:
1268
- if (DST > IMM) {
1269
- insn += insn->off;
1270
- CONT_JMP;
1271
- }
1272
- CONT;
1273
- JMP_JLT_X:
1274
- if (DST < SRC) {
1275
- insn += insn->off;
1276
- CONT_JMP;
1277
- }
1278
- CONT;
1279
- JMP_JLT_K:
1280
- if (DST < IMM) {
1281
- insn += insn->off;
1282
- CONT_JMP;
1283
- }
1284
- CONT;
1285
- JMP_JGE_X:
1286
- if (DST >= SRC) {
1287
- insn += insn->off;
1288
- CONT_JMP;
1289
- }
1290
- CONT;
1291
- JMP_JGE_K:
1292
- if (DST >= IMM) {
1293
- insn += insn->off;
1294
- CONT_JMP;
1295
- }
1296
- CONT;
1297
- JMP_JLE_X:
1298
- if (DST <= SRC) {
1299
- insn += insn->off;
1300
- CONT_JMP;
1301
- }
1302
- CONT;
1303
- JMP_JLE_K:
1304
- if (DST <= IMM) {
1305
- insn += insn->off;
1306
- CONT_JMP;
1307
- }
1308
- CONT;
1309
- JMP_JSGT_X:
1310
- if (((s64) DST) > ((s64) SRC)) {
1311
- insn += insn->off;
1312
- CONT_JMP;
1313
- }
1314
- CONT;
1315
- JMP_JSGT_K:
1316
- if (((s64) DST) > ((s64) IMM)) {
1317
- insn += insn->off;
1318
- CONT_JMP;
1319
- }
1320
- CONT;
1321
- JMP_JSLT_X:
1322
- if (((s64) DST) < ((s64) SRC)) {
1323
- insn += insn->off;
1324
- CONT_JMP;
1325
- }
1326
- CONT;
1327
- JMP_JSLT_K:
1328
- if (((s64) DST) < ((s64) IMM)) {
1329
- insn += insn->off;
1330
- CONT_JMP;
1331
- }
1332
- CONT;
1333
- JMP_JSGE_X:
1334
- if (((s64) DST) >= ((s64) SRC)) {
1335
- insn += insn->off;
1336
- CONT_JMP;
1337
- }
1338
- CONT;
1339
- JMP_JSGE_K:
1340
- if (((s64) DST) >= ((s64) IMM)) {
1341
- insn += insn->off;
1342
- CONT_JMP;
1343
- }
1344
- CONT;
1345
- JMP_JSLE_X:
1346
- if (((s64) DST) <= ((s64) SRC)) {
1347
- insn += insn->off;
1348
- CONT_JMP;
1349
- }
1350
- CONT;
1351
- JMP_JSLE_K:
1352
- if (((s64) DST) <= ((s64) IMM)) {
1353
- insn += insn->off;
1354
- CONT_JMP;
1355
- }
1356
- CONT;
1357
- JMP_JSET_X:
1358
- if (DST & SRC) {
1359
- insn += insn->off;
1360
- CONT_JMP;
1361
- }
1362
- CONT;
1363
- JMP_JSET_K:
1364
- if (DST & IMM) {
1365
- insn += insn->off;
1366
- CONT_JMP;
1367
- }
1368
- CONT;
13691599 JMP_EXIT:
13701600 return BPF_R0;
1371
-
1601
+ /* JMP */
1602
+#define COND_JMP(SIGN, OPCODE, CMP_OP) \
1603
+ JMP_##OPCODE##_X: \
1604
+ if ((SIGN##64) DST CMP_OP (SIGN##64) SRC) { \
1605
+ insn += insn->off; \
1606
+ CONT_JMP; \
1607
+ } \
1608
+ CONT; \
1609
+ JMP32_##OPCODE##_X: \
1610
+ if ((SIGN##32) DST CMP_OP (SIGN##32) SRC) { \
1611
+ insn += insn->off; \
1612
+ CONT_JMP; \
1613
+ } \
1614
+ CONT; \
1615
+ JMP_##OPCODE##_K: \
1616
+ if ((SIGN##64) DST CMP_OP (SIGN##64) IMM) { \
1617
+ insn += insn->off; \
1618
+ CONT_JMP; \
1619
+ } \
1620
+ CONT; \
1621
+ JMP32_##OPCODE##_K: \
1622
+ if ((SIGN##32) DST CMP_OP (SIGN##32) IMM) { \
1623
+ insn += insn->off; \
1624
+ CONT_JMP; \
1625
+ } \
1626
+ CONT;
1627
+ COND_JMP(u, JEQ, ==)
1628
+ COND_JMP(u, JNE, !=)
1629
+ COND_JMP(u, JGT, >)
1630
+ COND_JMP(u, JLT, <)
1631
+ COND_JMP(u, JGE, >=)
1632
+ COND_JMP(u, JLE, <=)
1633
+ COND_JMP(u, JSET, &)
1634
+ COND_JMP(s, JSGT, >)
1635
+ COND_JMP(s, JSLT, <)
1636
+ COND_JMP(s, JSGE, >=)
1637
+ COND_JMP(s, JSLE, <=)
1638
+#undef COND_JMP
13721639 /* ST, STX and LDX*/
13731640 ST_NOSPEC:
13741641 /* Speculation barrier for mitigating Speculative Store Bypass.
....@@ -1380,9 +1647,7 @@
13801647 * reuse preexisting logic from Spectre v1 mitigation that
13811648 * happens to produce the required code on x86 for v4 as well.
13821649 */
1383
-#ifdef CONFIG_X86
13841650 barrier_nospec();
1385
-#endif
13861651 CONT;
13871652 #define LDST(SIZEOP, SIZE) \
13881653 STX_MEM_##SIZEOP: \
....@@ -1393,6 +1658,11 @@
13931658 CONT; \
13941659 LDX_MEM_##SIZEOP: \
13951660 DST = *(SIZE *)(unsigned long) (SRC + insn->off); \
1661
+ CONT; \
1662
+ LDX_PROBE_MEM_##SIZEOP: \
1663
+ bpf_probe_read_kernel(&DST, sizeof(SIZE), \
1664
+ (const void *)(long) (SRC + insn->off)); \
1665
+ DST = *((SIZE *)&DST); \
13961666 CONT;
13971667
13981668 LDST(B, u8)
....@@ -1400,6 +1670,7 @@
14001670 LDST(W, u32)
14011671 LDST(DW, u64)
14021672 #undef LDST
1673
+
14031674 STX_XADD_W: /* lock xadd *(u32 *)(dst_reg + off16) += src_reg */
14041675 atomic_add((u32) SRC, (atomic_t *)(unsigned long)
14051676 (DST + insn->off));
....@@ -1420,7 +1691,6 @@
14201691 BUG_ON(1);
14211692 return 0;
14221693 }
1423
-STACK_FRAME_NON_STANDARD(___bpf_prog_run); /* jump table */
14241694
14251695 #define PROG_NAME(stack_size) __bpf_prog_run##stack_size
14261696 #define DEFINE_BPF_PROG_RUN(stack_size) \
....@@ -1508,28 +1778,34 @@
15081778 bool bpf_prog_array_compatible(struct bpf_array *array,
15091779 const struct bpf_prog *fp)
15101780 {
1781
+ bool ret;
1782
+
15111783 if (fp->kprobe_override)
15121784 return false;
15131785
1514
- if (!array->owner_prog_type) {
1786
+ spin_lock(&array->aux->owner.lock);
1787
+
1788
+ if (!array->aux->owner.type) {
15151789 /* There's no owner yet where we could check for
15161790 * compatibility.
15171791 */
1518
- array->owner_prog_type = fp->type;
1519
- array->owner_jited = fp->jited;
1520
-
1521
- return true;
1792
+ array->aux->owner.type = fp->type;
1793
+ array->aux->owner.jited = fp->jited;
1794
+ ret = true;
1795
+ } else {
1796
+ ret = array->aux->owner.type == fp->type &&
1797
+ array->aux->owner.jited == fp->jited;
15221798 }
1523
-
1524
- return array->owner_prog_type == fp->type &&
1525
- array->owner_jited == fp->jited;
1799
+ spin_unlock(&array->aux->owner.lock);
1800
+ return ret;
15261801 }
15271802
15281803 static int bpf_check_tail_call(const struct bpf_prog *fp)
15291804 {
15301805 struct bpf_prog_aux *aux = fp->aux;
1531
- int i;
1806
+ int i, ret = 0;
15321807
1808
+ mutex_lock(&aux->used_maps_mutex);
15331809 for (i = 0; i < aux->used_map_cnt; i++) {
15341810 struct bpf_map *map = aux->used_maps[i];
15351811 struct bpf_array *array;
....@@ -1538,11 +1814,15 @@
15381814 continue;
15391815
15401816 array = container_of(map, struct bpf_array, map);
1541
- if (!bpf_prog_array_compatible(array, fp))
1542
- return -EINVAL;
1817
+ if (!bpf_prog_array_compatible(array, fp)) {
1818
+ ret = -EINVAL;
1819
+ goto out;
1820
+ }
15431821 }
15441822
1545
- return 0;
1823
+out:
1824
+ mutex_unlock(&aux->used_maps_mutex);
1825
+ return ret;
15461826 }
15471827
15481828 static void bpf_prog_select_func(struct bpf_prog *fp)
....@@ -1581,13 +1861,20 @@
15811861 * be JITed, but falls back to the interpreter.
15821862 */
15831863 if (!bpf_prog_is_dev_bound(fp->aux)) {
1864
+ *err = bpf_prog_alloc_jited_linfo(fp);
1865
+ if (*err)
1866
+ return fp;
1867
+
15841868 fp = bpf_int_jit_compile(fp);
1585
-#ifdef CONFIG_BPF_JIT_ALWAYS_ON
15861869 if (!fp->jited) {
1870
+ bpf_prog_free_jited_linfo(fp);
1871
+#ifdef CONFIG_BPF_JIT_ALWAYS_ON
15871872 *err = -ENOTSUPP;
15881873 return fp;
1589
- }
15901874 #endif
1875
+ } else {
1876
+ bpf_prog_free_unused_jited_linfo(fp);
1877
+ }
15911878 } else {
15921879 *err = bpf_prog_offload_compile(fp);
15931880 if (*err)
....@@ -1646,38 +1933,42 @@
16461933 return &empty_prog_array.hdr;
16471934 }
16481935
1649
-void bpf_prog_array_free(struct bpf_prog_array __rcu *progs)
1936
+void bpf_prog_array_free(struct bpf_prog_array *progs)
16501937 {
1651
- if (!progs ||
1652
- progs == (struct bpf_prog_array __rcu *)&empty_prog_array.hdr)
1938
+ if (!progs || progs == &empty_prog_array.hdr)
16531939 return;
16541940 kfree_rcu(progs, rcu);
16551941 }
16561942
1657
-int bpf_prog_array_length(struct bpf_prog_array __rcu *array)
1943
+int bpf_prog_array_length(struct bpf_prog_array *array)
16581944 {
16591945 struct bpf_prog_array_item *item;
16601946 u32 cnt = 0;
16611947
1662
- rcu_read_lock();
1663
- item = rcu_dereference(array)->items;
1664
- for (; item->prog; item++)
1948
+ for (item = array->items; item->prog; item++)
16651949 if (item->prog != &dummy_bpf_prog.prog)
16661950 cnt++;
1667
- rcu_read_unlock();
16681951 return cnt;
16691952 }
16701953
1954
+bool bpf_prog_array_is_empty(struct bpf_prog_array *array)
1955
+{
1956
+ struct bpf_prog_array_item *item;
16711957
1672
-static bool bpf_prog_array_copy_core(struct bpf_prog_array __rcu *array,
1958
+ for (item = array->items; item->prog; item++)
1959
+ if (item->prog != &dummy_bpf_prog.prog)
1960
+ return false;
1961
+ return true;
1962
+}
1963
+
1964
+static bool bpf_prog_array_copy_core(struct bpf_prog_array *array,
16731965 u32 *prog_ids,
16741966 u32 request_cnt)
16751967 {
16761968 struct bpf_prog_array_item *item;
16771969 int i = 0;
16781970
1679
- item = rcu_dereference_check(array, 1)->items;
1680
- for (; item->prog; item++) {
1971
+ for (item = array->items; item->prog; item++) {
16811972 if (item->prog == &dummy_bpf_prog.prog)
16821973 continue;
16831974 prog_ids[i] = item->prog->aux->id;
....@@ -1690,7 +1981,7 @@
16901981 return !!(item->prog);
16911982 }
16921983
1693
-int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *array,
1984
+int bpf_prog_array_copy_to_user(struct bpf_prog_array *array,
16941985 __u32 __user *prog_ids, u32 cnt)
16951986 {
16961987 unsigned long err = 0;
....@@ -1701,18 +1992,12 @@
17011992 * cnt = bpf_prog_array_length();
17021993 * if (cnt > 0)
17031994 * bpf_prog_array_copy_to_user(..., cnt);
1704
- * so below kcalloc doesn't need extra cnt > 0 check, but
1705
- * bpf_prog_array_length() releases rcu lock and
1706
- * prog array could have been swapped with empty or larger array,
1707
- * so always copy 'cnt' prog_ids to the user.
1708
- * In a rare race the user will see zero prog_ids
1995
+ * so below kcalloc doesn't need extra cnt > 0 check.
17091996 */
17101997 ids = kcalloc(cnt, sizeof(u32), GFP_USER | __GFP_NOWARN);
17111998 if (!ids)
17121999 return -ENOMEM;
1713
- rcu_read_lock();
17142000 nospc = bpf_prog_array_copy_core(array, ids, cnt);
1715
- rcu_read_unlock();
17162001 err = copy_to_user(prog_ids, ids, cnt * sizeof(u32));
17172002 kfree(ids);
17182003 if (err)
....@@ -1722,19 +2007,74 @@
17222007 return 0;
17232008 }
17242009
1725
-void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *array,
2010
+void bpf_prog_array_delete_safe(struct bpf_prog_array *array,
17262011 struct bpf_prog *old_prog)
17272012 {
1728
- struct bpf_prog_array_item *item = array->items;
2013
+ struct bpf_prog_array_item *item;
17292014
1730
- for (; item->prog; item++)
2015
+ for (item = array->items; item->prog; item++)
17312016 if (item->prog == old_prog) {
17322017 WRITE_ONCE(item->prog, &dummy_bpf_prog.prog);
17332018 break;
17342019 }
17352020 }
17362021
1737
-int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
2022
+/**
2023
+ * bpf_prog_array_delete_safe_at() - Replaces the program at the given
2024
+ * index into the program array with
2025
+ * a dummy no-op program.
2026
+ * @array: a bpf_prog_array
2027
+ * @index: the index of the program to replace
2028
+ *
2029
+ * Skips over dummy programs, by not counting them, when calculating
2030
+ * the position of the program to replace.
2031
+ *
2032
+ * Return:
2033
+ * * 0 - Success
2034
+ * * -EINVAL - Invalid index value. Must be a non-negative integer.
2035
+ * * -ENOENT - Index out of range
2036
+ */
2037
+int bpf_prog_array_delete_safe_at(struct bpf_prog_array *array, int index)
2038
+{
2039
+ return bpf_prog_array_update_at(array, index, &dummy_bpf_prog.prog);
2040
+}
2041
+
2042
+/**
2043
+ * bpf_prog_array_update_at() - Updates the program at the given index
2044
+ * into the program array.
2045
+ * @array: a bpf_prog_array
2046
+ * @index: the index of the program to update
2047
+ * @prog: the program to insert into the array
2048
+ *
2049
+ * Skips over dummy programs, by not counting them, when calculating
2050
+ * the position of the program to update.
2051
+ *
2052
+ * Return:
2053
+ * * 0 - Success
2054
+ * * -EINVAL - Invalid index value. Must be a non-negative integer.
2055
+ * * -ENOENT - Index out of range
2056
+ */
2057
+int bpf_prog_array_update_at(struct bpf_prog_array *array, int index,
2058
+ struct bpf_prog *prog)
2059
+{
2060
+ struct bpf_prog_array_item *item;
2061
+
2062
+ if (unlikely(index < 0))
2063
+ return -EINVAL;
2064
+
2065
+ for (item = array->items; item->prog; item++) {
2066
+ if (item->prog == &dummy_bpf_prog.prog)
2067
+ continue;
2068
+ if (!index) {
2069
+ WRITE_ONCE(item->prog, prog);
2070
+ return 0;
2071
+ }
2072
+ index--;
2073
+ }
2074
+ return -ENOENT;
2075
+}
2076
+
2077
+int bpf_prog_array_copy(struct bpf_prog_array *old_array,
17382078 struct bpf_prog *exclude_prog,
17392079 struct bpf_prog *include_prog,
17402080 struct bpf_prog_array **new_array)
....@@ -1798,7 +2138,7 @@
17982138 return 0;
17992139 }
18002140
1801
-int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array,
2141
+int bpf_prog_array_copy_info(struct bpf_prog_array *array,
18022142 u32 *prog_ids, u32 request_cnt,
18032143 u32 *prog_cnt)
18042144 {
....@@ -1818,18 +2158,41 @@
18182158 : 0;
18192159 }
18202160
2161
+void __bpf_free_used_maps(struct bpf_prog_aux *aux,
2162
+ struct bpf_map **used_maps, u32 len)
2163
+{
2164
+ struct bpf_map *map;
2165
+ u32 i;
2166
+
2167
+ for (i = 0; i < len; i++) {
2168
+ map = used_maps[i];
2169
+ if (map->ops->map_poke_untrack)
2170
+ map->ops->map_poke_untrack(map, aux);
2171
+ bpf_map_put(map);
2172
+ }
2173
+}
2174
+
2175
+static void bpf_free_used_maps(struct bpf_prog_aux *aux)
2176
+{
2177
+ __bpf_free_used_maps(aux, aux->used_maps, aux->used_map_cnt);
2178
+ kfree(aux->used_maps);
2179
+}
2180
+
18212181 static void bpf_prog_free_deferred(struct work_struct *work)
18222182 {
18232183 struct bpf_prog_aux *aux;
18242184 int i;
18252185
18262186 aux = container_of(work, struct bpf_prog_aux, work);
2187
+ bpf_free_used_maps(aux);
18272188 if (bpf_prog_is_dev_bound(aux))
18282189 bpf_prog_offload_destroy(aux->prog);
18292190 #ifdef CONFIG_PERF_EVENTS
18302191 if (aux->prog->has_callchain_buf)
18312192 put_callchain_buffers();
18322193 #endif
2194
+ if (aux->dst_trampoline)
2195
+ bpf_trampoline_put(aux->dst_trampoline);
18332196 for (i = 0; i < aux->func_cnt; i++)
18342197 bpf_jit_free(aux->func[i]);
18352198 if (aux->func_cnt) {
....@@ -1845,6 +2208,8 @@
18452208 {
18462209 struct bpf_prog_aux *aux = fp->aux;
18472210
2211
+ if (aux->dst_prog)
2212
+ bpf_prog_put(aux->dst_prog);
18482213 INIT_WORK(&aux->work, bpf_prog_free_deferred);
18492214 schedule_work(&aux->work);
18502215 }
....@@ -1876,10 +2241,21 @@
18762241 return res;
18772242 }
18782243
2244
+BPF_CALL_0(bpf_get_raw_cpu_id)
2245
+{
2246
+ return raw_smp_processor_id();
2247
+}
2248
+
18792249 /* Weak definitions of helper functions in case we don't have bpf syscall. */
18802250 const struct bpf_func_proto bpf_map_lookup_elem_proto __weak;
18812251 const struct bpf_func_proto bpf_map_update_elem_proto __weak;
18822252 const struct bpf_func_proto bpf_map_delete_elem_proto __weak;
2253
+const struct bpf_func_proto bpf_map_push_elem_proto __weak;
2254
+const struct bpf_func_proto bpf_map_pop_elem_proto __weak;
2255
+const struct bpf_func_proto bpf_map_peek_elem_proto __weak;
2256
+const struct bpf_func_proto bpf_spin_lock_proto __weak;
2257
+const struct bpf_func_proto bpf_spin_unlock_proto __weak;
2258
+const struct bpf_func_proto bpf_jiffies64_proto __weak;
18832259
18842260 const struct bpf_func_proto bpf_get_prandom_u32_proto __weak;
18852261 const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak;
....@@ -1890,10 +2266,12 @@
18902266 const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak;
18912267 const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
18922268 const struct bpf_func_proto bpf_get_current_comm_proto __weak;
1893
-const struct bpf_func_proto bpf_sock_map_update_proto __weak;
1894
-const struct bpf_func_proto bpf_sock_hash_update_proto __weak;
18952269 const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak;
2270
+const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto __weak;
18962271 const struct bpf_func_proto bpf_get_local_storage_proto __weak;
2272
+const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto __weak;
2273
+const struct bpf_func_proto bpf_snprintf_btf_proto __weak;
2274
+const struct bpf_func_proto bpf_seq_printf_btf_proto __weak;
18972275
18982276 const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
18992277 {
....@@ -1939,6 +2317,15 @@
19392317 return false;
19402318 }
19412319
2320
+/* Return TRUE if the JIT backend wants verifier to enable sub-register usage
2321
+ * analysis code and wants explicit zero extension inserted by verifier.
2322
+ * Otherwise, return FALSE.
2323
+ */
2324
+bool __weak bpf_jit_needs_zext(void)
2325
+{
2326
+ return false;
2327
+}
2328
+
19422329 /* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call
19432330 * skb_copy_bits(), so provide a weak definition of it for NET-less config.
19442331 */
....@@ -1948,8 +2335,19 @@
19482335 return -EFAULT;
19492336 }
19502337
2338
+int __weak bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
2339
+ void *addr1, void *addr2)
2340
+{
2341
+ return -ENOTSUPP;
2342
+}
2343
+
2344
+DEFINE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
2345
+EXPORT_SYMBOL(bpf_stats_enabled_key);
2346
+
19512347 /* All definitions of tracepoints related to BPF. */
2348
+#undef TRACE_INCLUDE_PATH
19522349 #define CREATE_TRACE_POINTS
19532350 #include <linux/bpf_trace.h>
19542351
19552352 EXPORT_TRACEPOINT_SYMBOL_GPL(xdp_exception);
2353
+EXPORT_TRACEPOINT_SYMBOL_GPL(xdp_bulk_tx);