hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/kernel/bpf/core.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 * Linux Socket Filter - Kernel level socket filtering
34 *
....@@ -12,29 +13,30 @@
1213 * Alexei Starovoitov <ast@plumgrid.com>
1314 * Daniel Borkmann <dborkman@redhat.com>
1415 *
15
- * This program is free software; you can redistribute it and/or
16
- * modify it under the terms of the GNU General Public License
17
- * as published by the Free Software Foundation; either version
18
- * 2 of the License, or (at your option) any later version.
19
- *
2016 * Andi Kleen - Fix a few bad bugs and races.
2117 * Kris Katterjohn - Added many additional checks in bpf_check_classic()
2218 */
2319
20
+#include <uapi/linux/btf.h>
2421 #include <linux/filter.h>
2522 #include <linux/skbuff.h>
2623 #include <linux/vmalloc.h>
2724 #include <linux/random.h>
2825 #include <linux/moduleloader.h>
2926 #include <linux/bpf.h>
30
-#include <linux/frame.h>
27
+#include <linux/btf.h>
28
+#include <linux/objtool.h>
3129 #include <linux/rbtree_latch.h>
3230 #include <linux/kallsyms.h>
3331 #include <linux/rcupdate.h>
3432 #include <linux/perf_event.h>
33
+#include <linux/extable.h>
34
+#include <linux/log2.h>
3535
3636 #include <asm/barrier.h>
3737 #include <asm/unaligned.h>
38
+
39
+#include <trace/hooks/memory.h>
3840
3941 /* Registers */
4042 #define BPF_R0 regs[BPF_REG_0]
....@@ -66,25 +68,27 @@
6668 {
6769 u8 *ptr = NULL;
6870
69
- if (k >= SKF_NET_OFF)
71
+ if (k >= SKF_NET_OFF) {
7072 ptr = skb_network_header(skb) + k - SKF_NET_OFF;
71
- else if (k >= SKF_LL_OFF)
73
+ } else if (k >= SKF_LL_OFF) {
74
+ if (unlikely(!skb_mac_header_was_set(skb)))
75
+ return NULL;
7276 ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
73
-
77
+ }
7478 if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb))
7579 return ptr;
7680
7781 return NULL;
7882 }
7983
80
-struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
84
+struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flags)
8185 {
8286 gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags;
8387 struct bpf_prog_aux *aux;
8488 struct bpf_prog *fp;
8589
8690 size = round_up(size, PAGE_SIZE);
87
- fp = __vmalloc(size, gfp_flags, PAGE_KERNEL);
91
+ fp = __vmalloc(size, gfp_flags);
8892 if (fp == NULL)
8993 return NULL;
9094
....@@ -99,11 +103,124 @@
99103 fp->aux->prog = fp;
100104 fp->jit_requested = ebpf_jit_enabled();
101105
102
- INIT_LIST_HEAD_RCU(&fp->aux->ksym_lnode);
106
+ INIT_LIST_HEAD_RCU(&fp->aux->ksym.lnode);
107
+ mutex_init(&fp->aux->used_maps_mutex);
108
+ mutex_init(&fp->aux->dst_mutex);
103109
104110 return fp;
105111 }
112
+
113
+struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
114
+{
115
+ gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags;
116
+ struct bpf_prog *prog;
117
+ int cpu;
118
+
119
+ prog = bpf_prog_alloc_no_stats(size, gfp_extra_flags);
120
+ if (!prog)
121
+ return NULL;
122
+
123
+ prog->aux->stats = alloc_percpu_gfp(struct bpf_prog_stats, gfp_flags);
124
+ if (!prog->aux->stats) {
125
+ kfree(prog->aux);
126
+ vfree(prog);
127
+ return NULL;
128
+ }
129
+
130
+ for_each_possible_cpu(cpu) {
131
+ struct bpf_prog_stats *pstats;
132
+
133
+ pstats = per_cpu_ptr(prog->aux->stats, cpu);
134
+ u64_stats_init(&pstats->syncp);
135
+ }
136
+ return prog;
137
+}
106138 EXPORT_SYMBOL_GPL(bpf_prog_alloc);
139
+
140
+int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog)
141
+{
142
+ if (!prog->aux->nr_linfo || !prog->jit_requested)
143
+ return 0;
144
+
145
+ prog->aux->jited_linfo = kcalloc(prog->aux->nr_linfo,
146
+ sizeof(*prog->aux->jited_linfo),
147
+ GFP_KERNEL | __GFP_NOWARN);
148
+ if (!prog->aux->jited_linfo)
149
+ return -ENOMEM;
150
+
151
+ return 0;
152
+}
153
+
154
+void bpf_prog_free_jited_linfo(struct bpf_prog *prog)
155
+{
156
+ kfree(prog->aux->jited_linfo);
157
+ prog->aux->jited_linfo = NULL;
158
+}
159
+
160
+void bpf_prog_free_unused_jited_linfo(struct bpf_prog *prog)
161
+{
162
+ if (prog->aux->jited_linfo && !prog->aux->jited_linfo[0])
163
+ bpf_prog_free_jited_linfo(prog);
164
+}
165
+
166
+/* The jit engine is responsible to provide an array
167
+ * for insn_off to the jited_off mapping (insn_to_jit_off).
168
+ *
169
+ * The idx to this array is the insn_off. Hence, the insn_off
170
+ * here is relative to the prog itself instead of the main prog.
171
+ * This array has one entry for each xlated bpf insn.
172
+ *
173
+ * jited_off is the byte off to the last byte of the jited insn.
174
+ *
175
+ * Hence, with
176
+ * insn_start:
177
+ * The first bpf insn off of the prog. The insn off
178
+ * here is relative to the main prog.
179
+ * e.g. if prog is a subprog, insn_start > 0
180
+ * linfo_idx:
181
+ * The prog's idx to prog->aux->linfo and jited_linfo
182
+ *
183
+ * jited_linfo[linfo_idx] = prog->bpf_func
184
+ *
185
+ * For i > linfo_idx,
186
+ *
187
+ * jited_linfo[i] = prog->bpf_func +
188
+ * insn_to_jit_off[linfo[i].insn_off - insn_start - 1]
189
+ */
190
+void bpf_prog_fill_jited_linfo(struct bpf_prog *prog,
191
+ const u32 *insn_to_jit_off)
192
+{
193
+ u32 linfo_idx, insn_start, insn_end, nr_linfo, i;
194
+ const struct bpf_line_info *linfo;
195
+ void **jited_linfo;
196
+
197
+ if (!prog->aux->jited_linfo)
198
+ /* Userspace did not provide linfo */
199
+ return;
200
+
201
+ linfo_idx = prog->aux->linfo_idx;
202
+ linfo = &prog->aux->linfo[linfo_idx];
203
+ insn_start = linfo[0].insn_off;
204
+ insn_end = insn_start + prog->len;
205
+
206
+ jited_linfo = &prog->aux->jited_linfo[linfo_idx];
207
+ jited_linfo[0] = prog->bpf_func;
208
+
209
+ nr_linfo = prog->aux->nr_linfo - linfo_idx;
210
+
211
+ for (i = 1; i < nr_linfo && linfo[i].insn_off < insn_end; i++)
212
+ /* The verifier ensures that linfo[i].insn_off is
213
+ * strictly increasing
214
+ */
215
+ jited_linfo[i] = prog->bpf_func +
216
+ insn_to_jit_off[linfo[i].insn_off - insn_start - 1];
217
+}
218
+
219
+void bpf_prog_free_linfo(struct bpf_prog *prog)
220
+{
221
+ bpf_prog_free_jited_linfo(prog);
222
+ kvfree(prog->aux->linfo);
223
+}
107224
108225 struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
109226 gfp_t gfp_extra_flags)
....@@ -112,8 +229,6 @@
112229 struct bpf_prog *fp;
113230 u32 pages, delta;
114231 int ret;
115
-
116
- BUG_ON(fp_old == NULL);
117232
118233 size = round_up(size, PAGE_SIZE);
119234 pages = size / PAGE_SIZE;
....@@ -125,7 +240,7 @@
125240 if (ret)
126241 return NULL;
127242
128
- fp = __vmalloc(size, gfp_flags, PAGE_KERNEL);
243
+ fp = __vmalloc(size, gfp_flags);
129244 if (fp == NULL) {
130245 __bpf_prog_uncharge(fp_old->aux->user, delta);
131246 } else {
....@@ -145,16 +260,22 @@
145260
146261 void __bpf_prog_free(struct bpf_prog *fp)
147262 {
148
- kfree(fp->aux);
263
+ if (fp->aux) {
264
+ mutex_destroy(&fp->aux->used_maps_mutex);
265
+ mutex_destroy(&fp->aux->dst_mutex);
266
+ free_percpu(fp->aux->stats);
267
+ kfree(fp->aux->poke_tab);
268
+ kfree(fp->aux);
269
+ }
149270 vfree(fp);
150271 }
151272
152273 int bpf_prog_calc_tag(struct bpf_prog *fp)
153274 {
154
- const u32 bits_offset = SHA_MESSAGE_BYTES - sizeof(__be64);
275
+ const u32 bits_offset = SHA1_BLOCK_SIZE - sizeof(__be64);
155276 u32 raw_size = bpf_prog_tag_scratch_size(fp);
156
- u32 digest[SHA_DIGEST_WORDS];
157
- u32 ws[SHA_WORKSPACE_WORDS];
277
+ u32 digest[SHA1_DIGEST_WORDS];
278
+ u32 ws[SHA1_WORKSPACE_WORDS];
158279 u32 i, bsize, psize, blocks;
159280 struct bpf_insn *dst;
160281 bool was_ld_map;
....@@ -166,7 +287,7 @@
166287 if (!raw)
167288 return -ENOMEM;
168289
169
- sha_init(digest);
290
+ sha1_init(digest);
170291 memset(ws, 0, sizeof(ws));
171292
172293 /* We need to take out the map fd for the digest calculation
....@@ -177,7 +298,8 @@
177298 dst[i] = fp->insnsi[i];
178299 if (!was_ld_map &&
179300 dst[i].code == (BPF_LD | BPF_IMM | BPF_DW) &&
180
- dst[i].src_reg == BPF_PSEUDO_MAP_FD) {
301
+ (dst[i].src_reg == BPF_PSEUDO_MAP_FD ||
302
+ dst[i].src_reg == BPF_PSEUDO_MAP_VALUE)) {
181303 was_ld_map = true;
182304 dst[i].imm = 0;
183305 } else if (was_ld_map &&
....@@ -196,8 +318,8 @@
196318 memset(&raw[psize], 0, raw_size - psize);
197319 raw[psize++] = 0x80;
198320
199
- bsize = round_up(psize, SHA_MESSAGE_BYTES);
200
- blocks = bsize / SHA_MESSAGE_BYTES;
321
+ bsize = round_up(psize, SHA1_BLOCK_SIZE);
322
+ blocks = bsize / SHA1_BLOCK_SIZE;
201323 todo = raw;
202324 if (bsize - psize >= sizeof(__be64)) {
203325 bits = (__be64 *)(todo + bsize - sizeof(__be64));
....@@ -208,12 +330,12 @@
208330 *bits = cpu_to_be64((psize - 1) << 3);
209331
210332 while (blocks--) {
211
- sha_transform(digest, todo, ws);
212
- todo += SHA_MESSAGE_BYTES;
333
+ sha1_transform(digest, todo, ws);
334
+ todo += SHA1_BLOCK_SIZE;
213335 }
214336
215337 result = (__force __be32 *)digest;
216
- for (i = 0; i < SHA_DIGEST_WORDS; i++)
338
+ for (i = 0; i < SHA1_DIGEST_WORDS; i++)
217339 result[i] = cpu_to_be32(digest[i]);
218340 memcpy(fp->tag, result, sizeof(fp->tag));
219341
....@@ -221,15 +343,16 @@
221343 return 0;
222344 }
223345
224
-static int bpf_adj_delta_to_imm(struct bpf_insn *insn, u32 pos, u32 delta,
225
- u32 curr, const bool probe_pass)
346
+static int bpf_adj_delta_to_imm(struct bpf_insn *insn, u32 pos, s32 end_old,
347
+ s32 end_new, s32 curr, const bool probe_pass)
226348 {
227349 const s64 imm_min = S32_MIN, imm_max = S32_MAX;
350
+ s32 delta = end_new - end_old;
228351 s64 imm = insn->imm;
229352
230
- if (curr < pos && curr + imm + 1 > pos)
353
+ if (curr < pos && curr + imm + 1 >= end_old)
231354 imm += delta;
232
- else if (curr > pos + delta && curr + imm + 1 <= pos + delta)
355
+ else if (curr >= end_new && curr + imm + 1 < end_new)
233356 imm -= delta;
234357 if (imm < imm_min || imm > imm_max)
235358 return -ERANGE;
....@@ -238,15 +361,16 @@
238361 return 0;
239362 }
240363
241
-static int bpf_adj_delta_to_off(struct bpf_insn *insn, u32 pos, u32 delta,
242
- u32 curr, const bool probe_pass)
364
+static int bpf_adj_delta_to_off(struct bpf_insn *insn, u32 pos, s32 end_old,
365
+ s32 end_new, s32 curr, const bool probe_pass)
243366 {
244367 const s32 off_min = S16_MIN, off_max = S16_MAX;
368
+ s32 delta = end_new - end_old;
245369 s32 off = insn->off;
246370
247
- if (curr < pos && curr + off + 1 > pos)
371
+ if (curr < pos && curr + off + 1 >= end_old)
248372 off += delta;
249
- else if (curr > pos + delta && curr + off + 1 <= pos + delta)
373
+ else if (curr >= end_new && curr + off + 1 < end_new)
250374 off -= delta;
251375 if (off < off_min || off > off_max)
252376 return -ERANGE;
....@@ -255,10 +379,10 @@
255379 return 0;
256380 }
257381
258
-static int bpf_adj_branches(struct bpf_prog *prog, u32 pos, u32 delta,
259
- const bool probe_pass)
382
+static int bpf_adj_branches(struct bpf_prog *prog, u32 pos, s32 end_old,
383
+ s32 end_new, const bool probe_pass)
260384 {
261
- u32 i, insn_cnt = prog->len + (probe_pass ? delta : 0);
385
+ u32 i, insn_cnt = prog->len + (probe_pass ? end_new - end_old : 0);
262386 struct bpf_insn *insn = prog->insnsi;
263387 int ret = 0;
264388
....@@ -270,22 +394,23 @@
270394 * do any other adjustments. Therefore skip the patchlet.
271395 */
272396 if (probe_pass && i == pos) {
273
- i += delta + 1;
274
- insn++;
397
+ i = end_new;
398
+ insn = prog->insnsi + end_old;
275399 }
276400 code = insn->code;
277
- if (BPF_CLASS(code) != BPF_JMP ||
401
+ if ((BPF_CLASS(code) != BPF_JMP &&
402
+ BPF_CLASS(code) != BPF_JMP32) ||
278403 BPF_OP(code) == BPF_EXIT)
279404 continue;
280405 /* Adjust offset of jmps if we cross patch boundaries. */
281406 if (BPF_OP(code) == BPF_CALL) {
282407 if (insn->src_reg != BPF_PSEUDO_CALL)
283408 continue;
284
- ret = bpf_adj_delta_to_imm(insn, pos, delta, i,
285
- probe_pass);
409
+ ret = bpf_adj_delta_to_imm(insn, pos, end_old,
410
+ end_new, i, probe_pass);
286411 } else {
287
- ret = bpf_adj_delta_to_off(insn, pos, delta, i,
288
- probe_pass);
412
+ ret = bpf_adj_delta_to_off(insn, pos, end_old,
413
+ end_new, i, probe_pass);
289414 }
290415 if (ret)
291416 break;
....@@ -294,12 +419,33 @@
294419 return ret;
295420 }
296421
422
+static void bpf_adj_linfo(struct bpf_prog *prog, u32 off, u32 delta)
423
+{
424
+ struct bpf_line_info *linfo;
425
+ u32 i, nr_linfo;
426
+
427
+ nr_linfo = prog->aux->nr_linfo;
428
+ if (!nr_linfo || !delta)
429
+ return;
430
+
431
+ linfo = prog->aux->linfo;
432
+
433
+ for (i = 0; i < nr_linfo; i++)
434
+ if (off < linfo[i].insn_off)
435
+ break;
436
+
437
+ /* Push all off < linfo[i].insn_off by delta */
438
+ for (; i < nr_linfo; i++)
439
+ linfo[i].insn_off += delta;
440
+}
441
+
297442 struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
298443 const struct bpf_insn *patch, u32 len)
299444 {
300445 u32 insn_adj_cnt, insn_rest, insn_delta = len - 1;
301446 const u32 cnt_max = S16_MAX;
302447 struct bpf_prog *prog_adj;
448
+ int err;
303449
304450 /* Since our patchlet doesn't expand the image, we're done. */
305451 if (insn_delta == 0) {
....@@ -315,8 +461,8 @@
315461 * we afterwards may not fail anymore.
316462 */
317463 if (insn_adj_cnt > cnt_max &&
318
- bpf_adj_branches(prog, off, insn_delta, true))
319
- return NULL;
464
+ (err = bpf_adj_branches(prog, off, off + 1, off + len, true)))
465
+ return ERR_PTR(err);
320466
321467 /* Several new instructions need to be inserted. Make room
322468 * for them. Likely, there's no need for a new allocation as
....@@ -325,7 +471,7 @@
325471 prog_adj = bpf_prog_realloc(prog, bpf_prog_size(insn_adj_cnt),
326472 GFP_USER);
327473 if (!prog_adj)
328
- return NULL;
474
+ return ERR_PTR(-ENOMEM);
329475
330476 prog_adj->len = insn_adj_cnt;
331477
....@@ -347,12 +493,26 @@
347493 * the ship has sailed to reverse to the original state. An
348494 * overflow cannot happen at this point.
349495 */
350
- BUG_ON(bpf_adj_branches(prog_adj, off, insn_delta, false));
496
+ BUG_ON(bpf_adj_branches(prog_adj, off, off + 1, off + len, false));
497
+
498
+ bpf_adj_linfo(prog_adj, off, insn_delta);
351499
352500 return prog_adj;
353501 }
354502
355
-void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp)
503
+int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt)
504
+{
505
+ /* Branch offsets can't overflow when program is shrinking, no need
506
+ * to call bpf_adj_branches(..., true) here
507
+ */
508
+ memmove(prog->insnsi + off, prog->insnsi + off + cnt,
509
+ sizeof(struct bpf_insn) * (prog->len - off - cnt));
510
+ prog->len -= cnt;
511
+
512
+ return WARN_ON_ONCE(bpf_adj_branches(prog, off, off + cnt, off, false));
513
+}
514
+
515
+static void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp)
356516 {
357517 int i;
358518
....@@ -368,29 +528,31 @@
368528
369529 #ifdef CONFIG_BPF_JIT
370530 /* All BPF JIT sysctl knobs here. */
371
-int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON);
531
+int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_DEFAULT_ON);
532
+int bpf_jit_kallsyms __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_DEFAULT_ON);
372533 int bpf_jit_harden __read_mostly;
373
-int bpf_jit_kallsyms __read_mostly;
374534 long bpf_jit_limit __read_mostly;
375535 long bpf_jit_limit_max __read_mostly;
376536
377
-static __always_inline void
378
-bpf_get_prog_addr_region(const struct bpf_prog *prog,
379
- unsigned long *symbol_start,
380
- unsigned long *symbol_end)
537
+static void
538
+bpf_prog_ksym_set_addr(struct bpf_prog *prog)
381539 {
382540 const struct bpf_binary_header *hdr = bpf_jit_binary_hdr(prog);
383541 unsigned long addr = (unsigned long)hdr;
384542
385543 WARN_ON_ONCE(!bpf_prog_ebpf_jited(prog));
386544
387
- *symbol_start = addr;
388
- *symbol_end = addr + hdr->pages * PAGE_SIZE;
545
+ prog->aux->ksym.start = (unsigned long) prog->bpf_func;
546
+ prog->aux->ksym.end = addr + hdr->pages * PAGE_SIZE;
389547 }
390548
391
-static void bpf_get_prog_name(const struct bpf_prog *prog, char *sym)
549
+static void
550
+bpf_prog_ksym_set_name(struct bpf_prog *prog)
392551 {
552
+ char *sym = prog->aux->ksym.name;
393553 const char *end = sym + KSYM_NAME_LEN;
554
+ const struct btf_type *type;
555
+ const char *func_name;
394556
395557 BUILD_BUG_ON(sizeof("bpf_prog_") +
396558 sizeof(prog->tag) * 2 +
....@@ -405,42 +567,43 @@
405567
406568 sym += snprintf(sym, KSYM_NAME_LEN, "bpf_prog_");
407569 sym = bin2hex(sym, prog->tag, sizeof(prog->tag));
570
+
571
+ /* prog->aux->name will be ignored if full btf name is available */
572
+ if (prog->aux->func_info_cnt) {
573
+ type = btf_type_by_id(prog->aux->btf,
574
+ prog->aux->func_info[prog->aux->func_idx].type_id);
575
+ func_name = btf_name_by_offset(prog->aux->btf, type->name_off);
576
+ snprintf(sym, (size_t)(end - sym), "_%s", func_name);
577
+ return;
578
+ }
579
+
408580 if (prog->aux->name[0])
409581 snprintf(sym, (size_t)(end - sym), "_%s", prog->aux->name);
410582 else
411583 *sym = 0;
412584 }
413585
414
-static __always_inline unsigned long
415
-bpf_get_prog_addr_start(struct latch_tree_node *n)
586
+static unsigned long bpf_get_ksym_start(struct latch_tree_node *n)
416587 {
417
- unsigned long symbol_start, symbol_end;
418
- const struct bpf_prog_aux *aux;
419
-
420
- aux = container_of(n, struct bpf_prog_aux, ksym_tnode);
421
- bpf_get_prog_addr_region(aux->prog, &symbol_start, &symbol_end);
422
-
423
- return symbol_start;
588
+ return container_of(n, struct bpf_ksym, tnode)->start;
424589 }
425590
426591 static __always_inline bool bpf_tree_less(struct latch_tree_node *a,
427592 struct latch_tree_node *b)
428593 {
429
- return bpf_get_prog_addr_start(a) < bpf_get_prog_addr_start(b);
594
+ return bpf_get_ksym_start(a) < bpf_get_ksym_start(b);
430595 }
431596
432597 static __always_inline int bpf_tree_comp(void *key, struct latch_tree_node *n)
433598 {
434599 unsigned long val = (unsigned long)key;
435
- unsigned long symbol_start, symbol_end;
436
- const struct bpf_prog_aux *aux;
600
+ const struct bpf_ksym *ksym;
437601
438
- aux = container_of(n, struct bpf_prog_aux, ksym_tnode);
439
- bpf_get_prog_addr_region(aux->prog, &symbol_start, &symbol_end);
602
+ ksym = container_of(n, struct bpf_ksym, tnode);
440603
441
- if (val < symbol_start)
604
+ if (val < ksym->start)
442605 return -1;
443
- if (val >= symbol_end)
606
+ if (val >= ksym->end)
444607 return 1;
445608
446609 return 0;
....@@ -455,20 +618,29 @@
455618 static LIST_HEAD(bpf_kallsyms);
456619 static struct latch_tree_root bpf_tree __cacheline_aligned;
457620
458
-static void bpf_prog_ksym_node_add(struct bpf_prog_aux *aux)
621
+void bpf_ksym_add(struct bpf_ksym *ksym)
459622 {
460
- WARN_ON_ONCE(!list_empty(&aux->ksym_lnode));
461
- list_add_tail_rcu(&aux->ksym_lnode, &bpf_kallsyms);
462
- latch_tree_insert(&aux->ksym_tnode, &bpf_tree, &bpf_tree_ops);
623
+ spin_lock_bh(&bpf_lock);
624
+ WARN_ON_ONCE(!list_empty(&ksym->lnode));
625
+ list_add_tail_rcu(&ksym->lnode, &bpf_kallsyms);
626
+ latch_tree_insert(&ksym->tnode, &bpf_tree, &bpf_tree_ops);
627
+ spin_unlock_bh(&bpf_lock);
463628 }
464629
465
-static void bpf_prog_ksym_node_del(struct bpf_prog_aux *aux)
630
+static void __bpf_ksym_del(struct bpf_ksym *ksym)
466631 {
467
- if (list_empty(&aux->ksym_lnode))
632
+ if (list_empty(&ksym->lnode))
468633 return;
469634
470
- latch_tree_erase(&aux->ksym_tnode, &bpf_tree, &bpf_tree_ops);
471
- list_del_rcu(&aux->ksym_lnode);
635
+ latch_tree_erase(&ksym->tnode, &bpf_tree, &bpf_tree_ops);
636
+ list_del_rcu(&ksym->lnode);
637
+}
638
+
639
+void bpf_ksym_del(struct bpf_ksym *ksym)
640
+{
641
+ spin_lock_bh(&bpf_lock);
642
+ __bpf_ksym_del(ksym);
643
+ spin_unlock_bh(&bpf_lock);
472644 }
473645
474646 static bool bpf_prog_kallsyms_candidate(const struct bpf_prog *fp)
....@@ -478,19 +650,21 @@
478650
479651 static bool bpf_prog_kallsyms_verify_off(const struct bpf_prog *fp)
480652 {
481
- return list_empty(&fp->aux->ksym_lnode) ||
482
- fp->aux->ksym_lnode.prev == LIST_POISON2;
653
+ return list_empty(&fp->aux->ksym.lnode) ||
654
+ fp->aux->ksym.lnode.prev == LIST_POISON2;
483655 }
484656
485657 void bpf_prog_kallsyms_add(struct bpf_prog *fp)
486658 {
487659 if (!bpf_prog_kallsyms_candidate(fp) ||
488
- !capable(CAP_SYS_ADMIN))
660
+ !bpf_capable())
489661 return;
490662
491
- spin_lock_bh(&bpf_lock);
492
- bpf_prog_ksym_node_add(fp->aux);
493
- spin_unlock_bh(&bpf_lock);
663
+ bpf_prog_ksym_set_addr(fp);
664
+ bpf_prog_ksym_set_name(fp);
665
+ fp->aux->ksym.prog = true;
666
+
667
+ bpf_ksym_add(&fp->aux->ksym);
494668 }
495669
496670 void bpf_prog_kallsyms_del(struct bpf_prog *fp)
....@@ -498,36 +672,30 @@
498672 if (!bpf_prog_kallsyms_candidate(fp))
499673 return;
500674
501
- spin_lock_bh(&bpf_lock);
502
- bpf_prog_ksym_node_del(fp->aux);
503
- spin_unlock_bh(&bpf_lock);
675
+ bpf_ksym_del(&fp->aux->ksym);
504676 }
505677
506
-static struct bpf_prog *bpf_prog_kallsyms_find(unsigned long addr)
678
+static struct bpf_ksym *bpf_ksym_find(unsigned long addr)
507679 {
508680 struct latch_tree_node *n;
509681
510
- if (!bpf_jit_kallsyms_enabled())
511
- return NULL;
512
-
513682 n = latch_tree_find((void *)addr, &bpf_tree, &bpf_tree_ops);
514
- return n ?
515
- container_of(n, struct bpf_prog_aux, ksym_tnode)->prog :
516
- NULL;
683
+ return n ? container_of(n, struct bpf_ksym, tnode) : NULL;
517684 }
518685
519686 const char *__bpf_address_lookup(unsigned long addr, unsigned long *size,
520687 unsigned long *off, char *sym)
521688 {
522
- unsigned long symbol_start, symbol_end;
523
- struct bpf_prog *prog;
689
+ struct bpf_ksym *ksym;
524690 char *ret = NULL;
525691
526692 rcu_read_lock();
527
- prog = bpf_prog_kallsyms_find(addr);
528
- if (prog) {
529
- bpf_get_prog_addr_region(prog, &symbol_start, &symbol_end);
530
- bpf_get_prog_name(prog, sym);
693
+ ksym = bpf_ksym_find(addr);
694
+ if (ksym) {
695
+ unsigned long symbol_start = ksym->start;
696
+ unsigned long symbol_end = ksym->end;
697
+
698
+ strncpy(sym, ksym->name, KSYM_NAME_LEN);
531699
532700 ret = sym;
533701 if (size)
....@@ -545,17 +713,43 @@
545713 bool ret;
546714
547715 rcu_read_lock();
548
- ret = bpf_prog_kallsyms_find(addr) != NULL;
716
+ ret = bpf_ksym_find(addr) != NULL;
549717 rcu_read_unlock();
550718
551719 return ret;
552720 }
553721
722
+static struct bpf_prog *bpf_prog_ksym_find(unsigned long addr)
723
+{
724
+ struct bpf_ksym *ksym = bpf_ksym_find(addr);
725
+
726
+ return ksym && ksym->prog ?
727
+ container_of(ksym, struct bpf_prog_aux, ksym)->prog :
728
+ NULL;
729
+}
730
+
731
+const struct exception_table_entry *search_bpf_extables(unsigned long addr)
732
+{
733
+ const struct exception_table_entry *e = NULL;
734
+ struct bpf_prog *prog;
735
+
736
+ rcu_read_lock();
737
+ prog = bpf_prog_ksym_find(addr);
738
+ if (!prog)
739
+ goto out;
740
+ if (!prog->aux->num_exentries)
741
+ goto out;
742
+
743
+ e = search_extable(prog->aux->extable, prog->aux->num_exentries, addr);
744
+out:
745
+ rcu_read_unlock();
746
+ return e;
747
+}
748
+
554749 int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
555750 char *sym)
556751 {
557
- unsigned long symbol_start, symbol_end;
558
- struct bpf_prog_aux *aux;
752
+ struct bpf_ksym *ksym;
559753 unsigned int it = 0;
560754 int ret = -ERANGE;
561755
....@@ -563,14 +757,13 @@
563757 return ret;
564758
565759 rcu_read_lock();
566
- list_for_each_entry_rcu(aux, &bpf_kallsyms, ksym_lnode) {
760
+ list_for_each_entry_rcu(ksym, &bpf_kallsyms, lnode) {
567761 if (it++ != symnum)
568762 continue;
569763
570
- bpf_get_prog_addr_region(aux->prog, &symbol_start, &symbol_end);
571
- bpf_get_prog_name(aux->prog, sym);
764
+ strncpy(sym, ksym->name, KSYM_NAME_LEN);
572765
573
- *value = symbol_start;
766
+ *value = ksym->start;
574767 *type = BPF_SYM_ELF_TYPE;
575768
576769 ret = 0;
....@@ -579,6 +772,40 @@
579772 rcu_read_unlock();
580773
581774 return ret;
775
+}
776
+
777
+int bpf_jit_add_poke_descriptor(struct bpf_prog *prog,
778
+ struct bpf_jit_poke_descriptor *poke)
779
+{
780
+ struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab;
781
+ static const u32 poke_tab_max = 1024;
782
+ u32 slot = prog->aux->size_poke_tab;
783
+ u32 size = slot + 1;
784
+
785
+ if (size > poke_tab_max)
786
+ return -ENOSPC;
787
+ if (poke->tailcall_target || poke->tailcall_target_stable ||
788
+ poke->tailcall_bypass || poke->adj_off || poke->bypass_addr)
789
+ return -EINVAL;
790
+
791
+ switch (poke->reason) {
792
+ case BPF_POKE_REASON_TAIL_CALL:
793
+ if (!poke->tail_call.map)
794
+ return -EINVAL;
795
+ break;
796
+ default:
797
+ return -EINVAL;
798
+ }
799
+
800
+ tab = krealloc(tab, size * sizeof(*poke), GFP_KERNEL);
801
+ if (!tab)
802
+ return -ENOMEM;
803
+
804
+ memcpy(&tab[slot], poke, sizeof(*poke));
805
+ prog->aux->size_poke_tab = size;
806
+ prog->aux->poke_tab = tab;
807
+
808
+ return slot;
582809 }
583810
584811 static atomic_long_t bpf_jit_current;
....@@ -606,11 +833,11 @@
606833 }
607834 pure_initcall(bpf_jit_charge_init);
608835
609
-static int bpf_jit_charge_modmem(u32 pages)
836
+int bpf_jit_charge_modmem(u32 pages)
610837 {
611838 if (atomic_long_add_return(pages, &bpf_jit_current) >
612839 (bpf_jit_limit >> PAGE_SHIFT)) {
613
- if (!capable(CAP_SYS_ADMIN)) {
840
+ if (!bpf_capable()) {
614841 atomic_long_sub(pages, &bpf_jit_current);
615842 return -EPERM;
616843 }
....@@ -619,18 +846,20 @@
619846 return 0;
620847 }
621848
622
-static void bpf_jit_uncharge_modmem(u32 pages)
849
+void bpf_jit_uncharge_modmem(u32 pages)
623850 {
624851 atomic_long_sub(pages, &bpf_jit_current);
625852 }
626853
627
-#if IS_ENABLED(CONFIG_BPF_JIT) && IS_ENABLED(CONFIG_CFI_CLANG)
628
-bool __weak arch_bpf_jit_check_func(const struct bpf_prog *prog)
854
+void *__weak bpf_jit_alloc_exec(unsigned long size)
629855 {
630
- return true;
856
+ return module_alloc(size);
631857 }
632
-EXPORT_SYMBOL_GPL(arch_bpf_jit_check_func);
633
-#endif
858
+
859
+void __weak bpf_jit_free_exec(void *addr)
860
+{
861
+ module_memfree(addr);
862
+}
634863
635864 struct bpf_binary_header *
636865 bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
....@@ -639,6 +868,9 @@
639868 {
640869 struct bpf_binary_header *hdr;
641870 u32 size, hole, start, pages;
871
+
872
+ WARN_ON_ONCE(!is_power_of_2(alignment) ||
873
+ alignment > BPF_IMAGE_ALIGNMENT);
642874
643875 /* Most of BPF filters are really small, but if some of them
644876 * fill a page, allow at least 128 extra bytes to insert a
....@@ -649,7 +881,7 @@
649881
650882 if (bpf_jit_charge_modmem(pages))
651883 return NULL;
652
- hdr = module_alloc(size);
884
+ hdr = bpf_jit_alloc_exec(size);
653885 if (!hdr) {
654886 bpf_jit_uncharge_modmem(pages);
655887 return NULL;
....@@ -658,7 +890,6 @@
658890 /* Fill space with illegal/arch-dep instructions. */
659891 bpf_fill_ill_insns(hdr, size);
660892
661
- bpf_jit_set_header_magic(hdr);
662893 hdr->pages = pages;
663894 hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)),
664895 PAGE_SIZE - sizeof(*hdr));
....@@ -674,7 +905,9 @@
674905 {
675906 u32 pages = hdr->pages;
676907
677
- module_memfree(hdr);
908
+ trace_android_vh_set_memory_rw((unsigned long)hdr, pages);
909
+ trace_android_vh_set_memory_nx((unsigned long)hdr, pages);
910
+ bpf_jit_free_exec(hdr);
678911 bpf_jit_uncharge_modmem(pages);
679912 }
680913
....@@ -687,7 +920,6 @@
687920 if (fp->jited) {
688921 struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp);
689922
690
- bpf_jit_binary_unlock_ro(hdr);
691923 bpf_jit_binary_free(hdr);
692924
693925 WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(fp));
....@@ -696,9 +928,44 @@
696928 bpf_prog_unlock_free(fp);
697929 }
698930
931
+int bpf_jit_get_func_addr(const struct bpf_prog *prog,
932
+ const struct bpf_insn *insn, bool extra_pass,
933
+ u64 *func_addr, bool *func_addr_fixed)
934
+{
935
+ s16 off = insn->off;
936
+ s32 imm = insn->imm;
937
+ u8 *addr;
938
+
939
+ *func_addr_fixed = insn->src_reg != BPF_PSEUDO_CALL;
940
+ if (!*func_addr_fixed) {
941
+ /* Place-holder address till the last pass has collected
942
+ * all addresses for JITed subprograms in which case we
943
+ * can pick them up from prog->aux.
944
+ */
945
+ if (!extra_pass)
946
+ addr = NULL;
947
+ else if (prog->aux->func &&
948
+ off >= 0 && off < prog->aux->func_cnt)
949
+ addr = (u8 *)prog->aux->func[off]->bpf_func;
950
+ else
951
+ return -EINVAL;
952
+ } else {
953
+ /* Address of a BPF helper call. Since part of the core
954
+ * kernel, it's always at a fixed location. __bpf_call_base
955
+ * and the helper with imm relative to it are both in core
956
+ * kernel.
957
+ */
958
+ addr = (u8 *)__bpf_call_base + imm;
959
+ }
960
+
961
+ *func_addr = (unsigned long)addr;
962
+ return 0;
963
+}
964
+
699965 static int bpf_jit_blind_insn(const struct bpf_insn *from,
700966 const struct bpf_insn *aux,
701
- struct bpf_insn *to_buff)
967
+ struct bpf_insn *to_buff,
968
+ bool emit_zext)
702969 {
703970 struct bpf_insn *to = to_buff;
704971 u32 imm_rnd = get_random_int();
....@@ -717,6 +984,9 @@
717984 * below.
718985 *
719986 * Constant blinding is only used by JITs, not in the interpreter.
987
+ * The interpreter uses AX in some occasions as a local temporary
988
+ * register e.g. in DIV or MOD instructions.
989
+ *
720990 * In restricted circumstances, the verifier can also use the AX
721991 * register for rewrites as long as they do not interfere with
722992 * the above cases!
....@@ -780,6 +1050,27 @@
7801050 *to++ = BPF_JMP_REG(from->code, from->dst_reg, BPF_REG_AX, off);
7811051 break;
7821052
1053
+ case BPF_JMP32 | BPF_JEQ | BPF_K:
1054
+ case BPF_JMP32 | BPF_JNE | BPF_K:
1055
+ case BPF_JMP32 | BPF_JGT | BPF_K:
1056
+ case BPF_JMP32 | BPF_JLT | BPF_K:
1057
+ case BPF_JMP32 | BPF_JGE | BPF_K:
1058
+ case BPF_JMP32 | BPF_JLE | BPF_K:
1059
+ case BPF_JMP32 | BPF_JSGT | BPF_K:
1060
+ case BPF_JMP32 | BPF_JSLT | BPF_K:
1061
+ case BPF_JMP32 | BPF_JSGE | BPF_K:
1062
+ case BPF_JMP32 | BPF_JSLE | BPF_K:
1063
+ case BPF_JMP32 | BPF_JSET | BPF_K:
1064
+ /* Accommodate for extra offset in case of a backjump. */
1065
+ off = from->off;
1066
+ if (off < 0)
1067
+ off -= 2;
1068
+ *to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
1069
+ *to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
1070
+ *to++ = BPF_JMP32_REG(from->code, from->dst_reg, BPF_REG_AX,
1071
+ off);
1072
+ break;
1073
+
7831074 case BPF_LD | BPF_IMM | BPF_DW:
7841075 *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[1].imm);
7851076 *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
....@@ -789,6 +1080,8 @@
7891080 case 0: /* Part 2 of BPF_LD | BPF_IMM | BPF_DW. */
7901081 *to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[0].imm);
7911082 *to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
1083
+ if (emit_zext)
1084
+ *to++ = BPF_ZEXT_REG(BPF_REG_AX);
7921085 *to++ = BPF_ALU64_REG(BPF_OR, aux[0].dst_reg, BPF_REG_AX);
7931086 break;
7941087
....@@ -811,7 +1104,7 @@
8111104 gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags;
8121105 struct bpf_prog *fp;
8131106
814
- fp = __vmalloc(fp_other->pages * PAGE_SIZE, gfp_flags, PAGE_KERNEL);
1107
+ fp = __vmalloc(fp_other->pages * PAGE_SIZE, gfp_flags);
8151108 if (fp != NULL) {
8161109 /* aux->prog still points to the fp_other one, so
8171110 * when promoting the clone to the real program,
....@@ -872,18 +1165,19 @@
8721165 insn[1].code == 0)
8731166 memcpy(aux, insn, sizeof(aux));
8741167
875
- rewritten = bpf_jit_blind_insn(insn, aux, insn_buff);
1168
+ rewritten = bpf_jit_blind_insn(insn, aux, insn_buff,
1169
+ clone->aux->verifier_zext);
8761170 if (!rewritten)
8771171 continue;
8781172
8791173 tmp = bpf_patch_insn_single(clone, i, insn_buff, rewritten);
880
- if (!tmp) {
1174
+ if (IS_ERR(tmp)) {
8811175 /* Patching may have repointed aux->prog during
8821176 * realloc from the original one, so we need to
8831177 * fix it up here on error.
8841178 */
8851179 bpf_jit_prog_release_other(prog, clone);
886
- return ERR_PTR(-ENOMEM);
1180
+ return tmp;
8871181 }
8881182
8891183 clone = tmp;
....@@ -916,32 +1210,34 @@
9161210 #define BPF_INSN_MAP(INSN_2, INSN_3) \
9171211 /* 32 bit ALU operations. */ \
9181212 /* Register based. */ \
919
- INSN_3(ALU, ADD, X), \
920
- INSN_3(ALU, SUB, X), \
921
- INSN_3(ALU, AND, X), \
922
- INSN_3(ALU, OR, X), \
923
- INSN_3(ALU, LSH, X), \
924
- INSN_3(ALU, RSH, X), \
925
- INSN_3(ALU, XOR, X), \
926
- INSN_3(ALU, MUL, X), \
927
- INSN_3(ALU, MOV, X), \
928
- INSN_3(ALU, DIV, X), \
929
- INSN_3(ALU, MOD, X), \
1213
+ INSN_3(ALU, ADD, X), \
1214
+ INSN_3(ALU, SUB, X), \
1215
+ INSN_3(ALU, AND, X), \
1216
+ INSN_3(ALU, OR, X), \
1217
+ INSN_3(ALU, LSH, X), \
1218
+ INSN_3(ALU, RSH, X), \
1219
+ INSN_3(ALU, XOR, X), \
1220
+ INSN_3(ALU, MUL, X), \
1221
+ INSN_3(ALU, MOV, X), \
1222
+ INSN_3(ALU, ARSH, X), \
1223
+ INSN_3(ALU, DIV, X), \
1224
+ INSN_3(ALU, MOD, X), \
9301225 INSN_2(ALU, NEG), \
9311226 INSN_3(ALU, END, TO_BE), \
9321227 INSN_3(ALU, END, TO_LE), \
9331228 /* Immediate based. */ \
934
- INSN_3(ALU, ADD, K), \
935
- INSN_3(ALU, SUB, K), \
936
- INSN_3(ALU, AND, K), \
937
- INSN_3(ALU, OR, K), \
938
- INSN_3(ALU, LSH, K), \
939
- INSN_3(ALU, RSH, K), \
940
- INSN_3(ALU, XOR, K), \
941
- INSN_3(ALU, MUL, K), \
942
- INSN_3(ALU, MOV, K), \
943
- INSN_3(ALU, DIV, K), \
944
- INSN_3(ALU, MOD, K), \
1229
+ INSN_3(ALU, ADD, K), \
1230
+ INSN_3(ALU, SUB, K), \
1231
+ INSN_3(ALU, AND, K), \
1232
+ INSN_3(ALU, OR, K), \
1233
+ INSN_3(ALU, LSH, K), \
1234
+ INSN_3(ALU, RSH, K), \
1235
+ INSN_3(ALU, XOR, K), \
1236
+ INSN_3(ALU, MUL, K), \
1237
+ INSN_3(ALU, MOV, K), \
1238
+ INSN_3(ALU, ARSH, K), \
1239
+ INSN_3(ALU, DIV, K), \
1240
+ INSN_3(ALU, MOD, K), \
9451241 /* 64 bit ALU operations. */ \
9461242 /* Register based. */ \
9471243 INSN_3(ALU64, ADD, X), \
....@@ -974,6 +1270,31 @@
9741270 INSN_2(JMP, CALL), \
9751271 /* Exit instruction. */ \
9761272 INSN_2(JMP, EXIT), \
1273
+ /* 32-bit Jump instructions. */ \
1274
+ /* Register based. */ \
1275
+ INSN_3(JMP32, JEQ, X), \
1276
+ INSN_3(JMP32, JNE, X), \
1277
+ INSN_3(JMP32, JGT, X), \
1278
+ INSN_3(JMP32, JLT, X), \
1279
+ INSN_3(JMP32, JGE, X), \
1280
+ INSN_3(JMP32, JLE, X), \
1281
+ INSN_3(JMP32, JSGT, X), \
1282
+ INSN_3(JMP32, JSLT, X), \
1283
+ INSN_3(JMP32, JSGE, X), \
1284
+ INSN_3(JMP32, JSLE, X), \
1285
+ INSN_3(JMP32, JSET, X), \
1286
+ /* Immediate based. */ \
1287
+ INSN_3(JMP32, JEQ, K), \
1288
+ INSN_3(JMP32, JNE, K), \
1289
+ INSN_3(JMP32, JGT, K), \
1290
+ INSN_3(JMP32, JLT, K), \
1291
+ INSN_3(JMP32, JGE, K), \
1292
+ INSN_3(JMP32, JLE, K), \
1293
+ INSN_3(JMP32, JSGT, K), \
1294
+ INSN_3(JMP32, JSLT, K), \
1295
+ INSN_3(JMP32, JSGE, K), \
1296
+ INSN_3(JMP32, JSLE, K), \
1297
+ INSN_3(JMP32, JSET, K), \
9771298 /* Jump instructions. */ \
9781299 /* Register based. */ \
9791300 INSN_3(JMP, JEQ, X), \
....@@ -1044,10 +1365,17 @@
10441365 }
10451366
10461367 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
1368
+u64 __weak bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
1369
+{
1370
+ memset(dst, 0, size);
1371
+ return -EFAULT;
1372
+}
1373
+
10471374 /**
10481375 * __bpf_prog_run - run eBPF program on a given context
1049
- * @ctx: is the data we are operating on
1376
+ * @regs: is the array of MAX_BPF_EXT_REG eBPF pseudo-registers
10501377 * @insn: is the array of eBPF instructions
1378
+ * @stack: is the eBPF storage stack
10511379 *
10521380 * Decode and execute eBPF instructions.
10531381 */
....@@ -1055,7 +1383,7 @@
10551383 {
10561384 #define BPF_INSN_2_LBL(x, y) [BPF_##x | BPF_##y] = &&x##_##y
10571385 #define BPF_INSN_3_LBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = &&x##_##y##_##z
1058
- static const void *jumptable[256] = {
1386
+ static const void * const jumptable[256] __annotate_jump_table = {
10591387 [0 ... 255] = &&default_label,
10601388 /* Now overwrite non-defaults ... */
10611389 BPF_INSN_MAP(BPF_INSN_2_LBL, BPF_INSN_3_LBL),
....@@ -1063,11 +1391,14 @@
10631391 [BPF_JMP | BPF_CALL_ARGS] = &&JMP_CALL_ARGS,
10641392 [BPF_JMP | BPF_TAIL_CALL] = &&JMP_TAIL_CALL,
10651393 [BPF_ST | BPF_NOSPEC] = &&ST_NOSPEC,
1394
+ [BPF_LDX | BPF_PROBE_MEM | BPF_B] = &&LDX_PROBE_MEM_B,
1395
+ [BPF_LDX | BPF_PROBE_MEM | BPF_H] = &&LDX_PROBE_MEM_H,
1396
+ [BPF_LDX | BPF_PROBE_MEM | BPF_W] = &&LDX_PROBE_MEM_W,
1397
+ [BPF_LDX | BPF_PROBE_MEM | BPF_DW] = &&LDX_PROBE_MEM_DW,
10661398 };
10671399 #undef BPF_INSN_3_LBL
10681400 #undef BPF_INSN_2_LBL
10691401 u32 tail_call_cnt = 0;
1070
- u64 tmp;
10711402
10721403 #define CONT ({ insn++; goto select_insn; })
10731404 #define CONT_JMP ({ insn++; goto select_insn; })
....@@ -1075,29 +1406,54 @@
10751406 select_insn:
10761407 goto *jumptable[insn->code];
10771408
1078
- /* ALU */
1079
-#define ALU(OPCODE, OP) \
1080
- ALU64_##OPCODE##_X: \
1081
- DST = DST OP SRC; \
1082
- CONT; \
1083
- ALU_##OPCODE##_X: \
1084
- DST = (u32) DST OP (u32) SRC; \
1085
- CONT; \
1086
- ALU64_##OPCODE##_K: \
1087
- DST = DST OP IMM; \
1088
- CONT; \
1089
- ALU_##OPCODE##_K: \
1090
- DST = (u32) DST OP (u32) IMM; \
1409
+ /* Explicitly mask the register-based shift amounts with 63 or 31
1410
+ * to avoid undefined behavior. Normally this won't affect the
1411
+ * generated code, for example, in case of native 64 bit archs such
1412
+ * as x86-64 or arm64, the compiler is optimizing the AND away for
1413
+ * the interpreter. In case of JITs, each of the JIT backends compiles
1414
+ * the BPF shift operations to machine instructions which produce
1415
+ * implementation-defined results in such a case; the resulting
1416
+ * contents of the register may be arbitrary, but program behaviour
1417
+ * as a whole remains defined. In other words, in case of JIT backends,
1418
+ * the AND must /not/ be added to the emitted LSH/RSH/ARSH translation.
1419
+ */
1420
+ /* ALU (shifts) */
1421
+#define SHT(OPCODE, OP) \
1422
+ ALU64_##OPCODE##_X: \
1423
+ DST = DST OP (SRC & 63); \
1424
+ CONT; \
1425
+ ALU_##OPCODE##_X: \
1426
+ DST = (u32) DST OP ((u32) SRC & 31); \
1427
+ CONT; \
1428
+ ALU64_##OPCODE##_K: \
1429
+ DST = DST OP IMM; \
1430
+ CONT; \
1431
+ ALU_##OPCODE##_K: \
1432
+ DST = (u32) DST OP (u32) IMM; \
10911433 CONT;
1092
-
1434
+ /* ALU (rest) */
1435
+#define ALU(OPCODE, OP) \
1436
+ ALU64_##OPCODE##_X: \
1437
+ DST = DST OP SRC; \
1438
+ CONT; \
1439
+ ALU_##OPCODE##_X: \
1440
+ DST = (u32) DST OP (u32) SRC; \
1441
+ CONT; \
1442
+ ALU64_##OPCODE##_K: \
1443
+ DST = DST OP IMM; \
1444
+ CONT; \
1445
+ ALU_##OPCODE##_K: \
1446
+ DST = (u32) DST OP (u32) IMM; \
1447
+ CONT;
10931448 ALU(ADD, +)
10941449 ALU(SUB, -)
10951450 ALU(AND, &)
10961451 ALU(OR, |)
1097
- ALU(LSH, <<)
1098
- ALU(RSH, >>)
10991452 ALU(XOR, ^)
11001453 ALU(MUL, *)
1454
+ SHT(LSH, <<)
1455
+ SHT(RSH, >>)
1456
+#undef SHT
11011457 #undef ALU
11021458 ALU_NEG:
11031459 DST = (u32) -DST;
....@@ -1121,43 +1477,49 @@
11211477 DST = (u64) (u32) insn[0].imm | ((u64) (u32) insn[1].imm) << 32;
11221478 insn++;
11231479 CONT;
1480
+ ALU_ARSH_X:
1481
+ DST = (u64) (u32) (((s32) DST) >> (SRC & 31));
1482
+ CONT;
1483
+ ALU_ARSH_K:
1484
+ DST = (u64) (u32) (((s32) DST) >> IMM);
1485
+ CONT;
11241486 ALU64_ARSH_X:
1125
- (*(s64 *) &DST) >>= SRC;
1487
+ (*(s64 *) &DST) >>= (SRC & 63);
11261488 CONT;
11271489 ALU64_ARSH_K:
11281490 (*(s64 *) &DST) >>= IMM;
11291491 CONT;
11301492 ALU64_MOD_X:
1131
- div64_u64_rem(DST, SRC, &tmp);
1132
- DST = tmp;
1493
+ div64_u64_rem(DST, SRC, &AX);
1494
+ DST = AX;
11331495 CONT;
11341496 ALU_MOD_X:
1135
- tmp = (u32) DST;
1136
- DST = do_div(tmp, (u32) SRC);
1497
+ AX = (u32) DST;
1498
+ DST = do_div(AX, (u32) SRC);
11371499 CONT;
11381500 ALU64_MOD_K:
1139
- div64_u64_rem(DST, IMM, &tmp);
1140
- DST = tmp;
1501
+ div64_u64_rem(DST, IMM, &AX);
1502
+ DST = AX;
11411503 CONT;
11421504 ALU_MOD_K:
1143
- tmp = (u32) DST;
1144
- DST = do_div(tmp, (u32) IMM);
1505
+ AX = (u32) DST;
1506
+ DST = do_div(AX, (u32) IMM);
11451507 CONT;
11461508 ALU64_DIV_X:
11471509 DST = div64_u64(DST, SRC);
11481510 CONT;
11491511 ALU_DIV_X:
1150
- tmp = (u32) DST;
1151
- do_div(tmp, (u32) SRC);
1152
- DST = (u32) tmp;
1512
+ AX = (u32) DST;
1513
+ do_div(AX, (u32) SRC);
1514
+ DST = (u32) AX;
11531515 CONT;
11541516 ALU64_DIV_K:
11551517 DST = div64_u64(DST, IMM);
11561518 CONT;
11571519 ALU_DIV_K:
1158
- tmp = (u32) DST;
1159
- do_div(tmp, (u32) IMM);
1160
- DST = (u32) tmp;
1520
+ AX = (u32) DST;
1521
+ do_div(AX, (u32) IMM);
1522
+ DST = (u32) AX;
11611523 CONT;
11621524 ALU_END_TO_BE:
11631525 switch (IMM) {
....@@ -1222,7 +1584,7 @@
12221584
12231585 /* ARG1 at this point is guaranteed to point to CTX from
12241586 * the verifier side due to the fact that the tail call is
1225
- * handeled like a helper, that is, bpf_tail_call_proto,
1587
+ * handled like a helper, that is, bpf_tail_call_proto,
12261588 * where arg1_type is ARG_PTR_TO_CTX.
12271589 */
12281590 insn = prog->insnsi;
....@@ -1230,145 +1592,49 @@
12301592 out:
12311593 CONT;
12321594 }
1233
- /* JMP */
12341595 JMP_JA:
12351596 insn += insn->off;
12361597 CONT;
1237
- JMP_JEQ_X:
1238
- if (DST == SRC) {
1239
- insn += insn->off;
1240
- CONT_JMP;
1241
- }
1242
- CONT;
1243
- JMP_JEQ_K:
1244
- if (DST == IMM) {
1245
- insn += insn->off;
1246
- CONT_JMP;
1247
- }
1248
- CONT;
1249
- JMP_JNE_X:
1250
- if (DST != SRC) {
1251
- insn += insn->off;
1252
- CONT_JMP;
1253
- }
1254
- CONT;
1255
- JMP_JNE_K:
1256
- if (DST != IMM) {
1257
- insn += insn->off;
1258
- CONT_JMP;
1259
- }
1260
- CONT;
1261
- JMP_JGT_X:
1262
- if (DST > SRC) {
1263
- insn += insn->off;
1264
- CONT_JMP;
1265
- }
1266
- CONT;
1267
- JMP_JGT_K:
1268
- if (DST > IMM) {
1269
- insn += insn->off;
1270
- CONT_JMP;
1271
- }
1272
- CONT;
1273
- JMP_JLT_X:
1274
- if (DST < SRC) {
1275
- insn += insn->off;
1276
- CONT_JMP;
1277
- }
1278
- CONT;
1279
- JMP_JLT_K:
1280
- if (DST < IMM) {
1281
- insn += insn->off;
1282
- CONT_JMP;
1283
- }
1284
- CONT;
1285
- JMP_JGE_X:
1286
- if (DST >= SRC) {
1287
- insn += insn->off;
1288
- CONT_JMP;
1289
- }
1290
- CONT;
1291
- JMP_JGE_K:
1292
- if (DST >= IMM) {
1293
- insn += insn->off;
1294
- CONT_JMP;
1295
- }
1296
- CONT;
1297
- JMP_JLE_X:
1298
- if (DST <= SRC) {
1299
- insn += insn->off;
1300
- CONT_JMP;
1301
- }
1302
- CONT;
1303
- JMP_JLE_K:
1304
- if (DST <= IMM) {
1305
- insn += insn->off;
1306
- CONT_JMP;
1307
- }
1308
- CONT;
1309
- JMP_JSGT_X:
1310
- if (((s64) DST) > ((s64) SRC)) {
1311
- insn += insn->off;
1312
- CONT_JMP;
1313
- }
1314
- CONT;
1315
- JMP_JSGT_K:
1316
- if (((s64) DST) > ((s64) IMM)) {
1317
- insn += insn->off;
1318
- CONT_JMP;
1319
- }
1320
- CONT;
1321
- JMP_JSLT_X:
1322
- if (((s64) DST) < ((s64) SRC)) {
1323
- insn += insn->off;
1324
- CONT_JMP;
1325
- }
1326
- CONT;
1327
- JMP_JSLT_K:
1328
- if (((s64) DST) < ((s64) IMM)) {
1329
- insn += insn->off;
1330
- CONT_JMP;
1331
- }
1332
- CONT;
1333
- JMP_JSGE_X:
1334
- if (((s64) DST) >= ((s64) SRC)) {
1335
- insn += insn->off;
1336
- CONT_JMP;
1337
- }
1338
- CONT;
1339
- JMP_JSGE_K:
1340
- if (((s64) DST) >= ((s64) IMM)) {
1341
- insn += insn->off;
1342
- CONT_JMP;
1343
- }
1344
- CONT;
1345
- JMP_JSLE_X:
1346
- if (((s64) DST) <= ((s64) SRC)) {
1347
- insn += insn->off;
1348
- CONT_JMP;
1349
- }
1350
- CONT;
1351
- JMP_JSLE_K:
1352
- if (((s64) DST) <= ((s64) IMM)) {
1353
- insn += insn->off;
1354
- CONT_JMP;
1355
- }
1356
- CONT;
1357
- JMP_JSET_X:
1358
- if (DST & SRC) {
1359
- insn += insn->off;
1360
- CONT_JMP;
1361
- }
1362
- CONT;
1363
- JMP_JSET_K:
1364
- if (DST & IMM) {
1365
- insn += insn->off;
1366
- CONT_JMP;
1367
- }
1368
- CONT;
13691598 JMP_EXIT:
13701599 return BPF_R0;
1371
-
1600
+ /* JMP */
1601
+#define COND_JMP(SIGN, OPCODE, CMP_OP) \
1602
+ JMP_##OPCODE##_X: \
1603
+ if ((SIGN##64) DST CMP_OP (SIGN##64) SRC) { \
1604
+ insn += insn->off; \
1605
+ CONT_JMP; \
1606
+ } \
1607
+ CONT; \
1608
+ JMP32_##OPCODE##_X: \
1609
+ if ((SIGN##32) DST CMP_OP (SIGN##32) SRC) { \
1610
+ insn += insn->off; \
1611
+ CONT_JMP; \
1612
+ } \
1613
+ CONT; \
1614
+ JMP_##OPCODE##_K: \
1615
+ if ((SIGN##64) DST CMP_OP (SIGN##64) IMM) { \
1616
+ insn += insn->off; \
1617
+ CONT_JMP; \
1618
+ } \
1619
+ CONT; \
1620
+ JMP32_##OPCODE##_K: \
1621
+ if ((SIGN##32) DST CMP_OP (SIGN##32) IMM) { \
1622
+ insn += insn->off; \
1623
+ CONT_JMP; \
1624
+ } \
1625
+ CONT;
1626
+ COND_JMP(u, JEQ, ==)
1627
+ COND_JMP(u, JNE, !=)
1628
+ COND_JMP(u, JGT, >)
1629
+ COND_JMP(u, JLT, <)
1630
+ COND_JMP(u, JGE, >=)
1631
+ COND_JMP(u, JLE, <=)
1632
+ COND_JMP(u, JSET, &)
1633
+ COND_JMP(s, JSGT, >)
1634
+ COND_JMP(s, JSLT, <)
1635
+ COND_JMP(s, JSGE, >=)
1636
+ COND_JMP(s, JSLE, <=)
1637
+#undef COND_JMP
13721638 /* ST, STX and LDX*/
13731639 ST_NOSPEC:
13741640 /* Speculation barrier for mitigating Speculative Store Bypass.
....@@ -1393,6 +1659,11 @@
13931659 CONT; \
13941660 LDX_MEM_##SIZEOP: \
13951661 DST = *(SIZE *)(unsigned long) (SRC + insn->off); \
1662
+ CONT; \
1663
+ LDX_PROBE_MEM_##SIZEOP: \
1664
+ bpf_probe_read_kernel(&DST, sizeof(SIZE), \
1665
+ (const void *)(long) (SRC + insn->off)); \
1666
+ DST = *((SIZE *)&DST); \
13961667 CONT;
13971668
13981669 LDST(B, u8)
....@@ -1400,6 +1671,7 @@
14001671 LDST(W, u32)
14011672 LDST(DW, u64)
14021673 #undef LDST
1674
+
14031675 STX_XADD_W: /* lock xadd *(u32 *)(dst_reg + off16) += src_reg */
14041676 atomic_add((u32) SRC, (atomic_t *)(unsigned long)
14051677 (DST + insn->off));
....@@ -1420,7 +1692,6 @@
14201692 BUG_ON(1);
14211693 return 0;
14221694 }
1423
-STACK_FRAME_NON_STANDARD(___bpf_prog_run); /* jump table */
14241695
14251696 #define PROG_NAME(stack_size) __bpf_prog_run##stack_size
14261697 #define DEFINE_BPF_PROG_RUN(stack_size) \
....@@ -1508,28 +1779,34 @@
15081779 bool bpf_prog_array_compatible(struct bpf_array *array,
15091780 const struct bpf_prog *fp)
15101781 {
1782
+ bool ret;
1783
+
15111784 if (fp->kprobe_override)
15121785 return false;
15131786
1514
- if (!array->owner_prog_type) {
1787
+ spin_lock(&array->aux->owner.lock);
1788
+
1789
+ if (!array->aux->owner.type) {
15151790 /* There's no owner yet where we could check for
15161791 * compatibility.
15171792 */
1518
- array->owner_prog_type = fp->type;
1519
- array->owner_jited = fp->jited;
1520
-
1521
- return true;
1793
+ array->aux->owner.type = fp->type;
1794
+ array->aux->owner.jited = fp->jited;
1795
+ ret = true;
1796
+ } else {
1797
+ ret = array->aux->owner.type == fp->type &&
1798
+ array->aux->owner.jited == fp->jited;
15221799 }
1523
-
1524
- return array->owner_prog_type == fp->type &&
1525
- array->owner_jited == fp->jited;
1800
+ spin_unlock(&array->aux->owner.lock);
1801
+ return ret;
15261802 }
15271803
15281804 static int bpf_check_tail_call(const struct bpf_prog *fp)
15291805 {
15301806 struct bpf_prog_aux *aux = fp->aux;
1531
- int i;
1807
+ int i, ret = 0;
15321808
1809
+ mutex_lock(&aux->used_maps_mutex);
15331810 for (i = 0; i < aux->used_map_cnt; i++) {
15341811 struct bpf_map *map = aux->used_maps[i];
15351812 struct bpf_array *array;
....@@ -1538,11 +1815,15 @@
15381815 continue;
15391816
15401817 array = container_of(map, struct bpf_array, map);
1541
- if (!bpf_prog_array_compatible(array, fp))
1542
- return -EINVAL;
1818
+ if (!bpf_prog_array_compatible(array, fp)) {
1819
+ ret = -EINVAL;
1820
+ goto out;
1821
+ }
15431822 }
15441823
1545
- return 0;
1824
+out:
1825
+ mutex_unlock(&aux->used_maps_mutex);
1826
+ return ret;
15461827 }
15471828
15481829 static void bpf_prog_select_func(struct bpf_prog *fp)
....@@ -1581,13 +1862,20 @@
15811862 * be JITed, but falls back to the interpreter.
15821863 */
15831864 if (!bpf_prog_is_dev_bound(fp->aux)) {
1865
+ *err = bpf_prog_alloc_jited_linfo(fp);
1866
+ if (*err)
1867
+ return fp;
1868
+
15841869 fp = bpf_int_jit_compile(fp);
1585
-#ifdef CONFIG_BPF_JIT_ALWAYS_ON
15861870 if (!fp->jited) {
1871
+ bpf_prog_free_jited_linfo(fp);
1872
+#ifdef CONFIG_BPF_JIT_ALWAYS_ON
15871873 *err = -ENOTSUPP;
15881874 return fp;
1589
- }
15901875 #endif
1876
+ } else {
1877
+ bpf_prog_free_unused_jited_linfo(fp);
1878
+ }
15911879 } else {
15921880 *err = bpf_prog_offload_compile(fp);
15931881 if (*err)
....@@ -1646,38 +1934,42 @@
16461934 return &empty_prog_array.hdr;
16471935 }
16481936
1649
-void bpf_prog_array_free(struct bpf_prog_array __rcu *progs)
1937
+void bpf_prog_array_free(struct bpf_prog_array *progs)
16501938 {
1651
- if (!progs ||
1652
- progs == (struct bpf_prog_array __rcu *)&empty_prog_array.hdr)
1939
+ if (!progs || progs == &empty_prog_array.hdr)
16531940 return;
16541941 kfree_rcu(progs, rcu);
16551942 }
16561943
1657
-int bpf_prog_array_length(struct bpf_prog_array __rcu *array)
1944
+int bpf_prog_array_length(struct bpf_prog_array *array)
16581945 {
16591946 struct bpf_prog_array_item *item;
16601947 u32 cnt = 0;
16611948
1662
- rcu_read_lock();
1663
- item = rcu_dereference(array)->items;
1664
- for (; item->prog; item++)
1949
+ for (item = array->items; item->prog; item++)
16651950 if (item->prog != &dummy_bpf_prog.prog)
16661951 cnt++;
1667
- rcu_read_unlock();
16681952 return cnt;
16691953 }
16701954
1955
+bool bpf_prog_array_is_empty(struct bpf_prog_array *array)
1956
+{
1957
+ struct bpf_prog_array_item *item;
16711958
1672
-static bool bpf_prog_array_copy_core(struct bpf_prog_array __rcu *array,
1959
+ for (item = array->items; item->prog; item++)
1960
+ if (item->prog != &dummy_bpf_prog.prog)
1961
+ return false;
1962
+ return true;
1963
+}
1964
+
1965
+static bool bpf_prog_array_copy_core(struct bpf_prog_array *array,
16731966 u32 *prog_ids,
16741967 u32 request_cnt)
16751968 {
16761969 struct bpf_prog_array_item *item;
16771970 int i = 0;
16781971
1679
- item = rcu_dereference_check(array, 1)->items;
1680
- for (; item->prog; item++) {
1972
+ for (item = array->items; item->prog; item++) {
16811973 if (item->prog == &dummy_bpf_prog.prog)
16821974 continue;
16831975 prog_ids[i] = item->prog->aux->id;
....@@ -1690,7 +1982,7 @@
16901982 return !!(item->prog);
16911983 }
16921984
1693
-int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *array,
1985
+int bpf_prog_array_copy_to_user(struct bpf_prog_array *array,
16941986 __u32 __user *prog_ids, u32 cnt)
16951987 {
16961988 unsigned long err = 0;
....@@ -1701,18 +1993,12 @@
17011993 * cnt = bpf_prog_array_length();
17021994 * if (cnt > 0)
17031995 * bpf_prog_array_copy_to_user(..., cnt);
1704
- * so below kcalloc doesn't need extra cnt > 0 check, but
1705
- * bpf_prog_array_length() releases rcu lock and
1706
- * prog array could have been swapped with empty or larger array,
1707
- * so always copy 'cnt' prog_ids to the user.
1708
- * In a rare race the user will see zero prog_ids
1996
+ * so below kcalloc doesn't need extra cnt > 0 check.
17091997 */
17101998 ids = kcalloc(cnt, sizeof(u32), GFP_USER | __GFP_NOWARN);
17111999 if (!ids)
17122000 return -ENOMEM;
1713
- rcu_read_lock();
17142001 nospc = bpf_prog_array_copy_core(array, ids, cnt);
1715
- rcu_read_unlock();
17162002 err = copy_to_user(prog_ids, ids, cnt * sizeof(u32));
17172003 kfree(ids);
17182004 if (err)
....@@ -1722,19 +2008,74 @@
17222008 return 0;
17232009 }
17242010
1725
-void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *array,
2011
+void bpf_prog_array_delete_safe(struct bpf_prog_array *array,
17262012 struct bpf_prog *old_prog)
17272013 {
1728
- struct bpf_prog_array_item *item = array->items;
2014
+ struct bpf_prog_array_item *item;
17292015
1730
- for (; item->prog; item++)
2016
+ for (item = array->items; item->prog; item++)
17312017 if (item->prog == old_prog) {
17322018 WRITE_ONCE(item->prog, &dummy_bpf_prog.prog);
17332019 break;
17342020 }
17352021 }
17362022
1737
-int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
2023
+/**
2024
+ * bpf_prog_array_delete_safe_at() - Replaces the program at the given
2025
+ * index into the program array with
2026
+ * a dummy no-op program.
2027
+ * @array: a bpf_prog_array
2028
+ * @index: the index of the program to replace
2029
+ *
2030
+ * Skips over dummy programs, by not counting them, when calculating
2031
+ * the position of the program to replace.
2032
+ *
2033
+ * Return:
2034
+ * * 0 - Success
2035
+ * * -EINVAL - Invalid index value. Must be a non-negative integer.
2036
+ * * -ENOENT - Index out of range
2037
+ */
2038
+int bpf_prog_array_delete_safe_at(struct bpf_prog_array *array, int index)
2039
+{
2040
+ return bpf_prog_array_update_at(array, index, &dummy_bpf_prog.prog);
2041
+}
2042
+
2043
+/**
2044
+ * bpf_prog_array_update_at() - Updates the program at the given index
2045
+ * into the program array.
2046
+ * @array: a bpf_prog_array
2047
+ * @index: the index of the program to update
2048
+ * @prog: the program to insert into the array
2049
+ *
2050
+ * Skips over dummy programs, by not counting them, when calculating
2051
+ * the position of the program to update.
2052
+ *
2053
+ * Return:
2054
+ * * 0 - Success
2055
+ * * -EINVAL - Invalid index value. Must be a non-negative integer.
2056
+ * * -ENOENT - Index out of range
2057
+ */
2058
+int bpf_prog_array_update_at(struct bpf_prog_array *array, int index,
2059
+ struct bpf_prog *prog)
2060
+{
2061
+ struct bpf_prog_array_item *item;
2062
+
2063
+ if (unlikely(index < 0))
2064
+ return -EINVAL;
2065
+
2066
+ for (item = array->items; item->prog; item++) {
2067
+ if (item->prog == &dummy_bpf_prog.prog)
2068
+ continue;
2069
+ if (!index) {
2070
+ WRITE_ONCE(item->prog, prog);
2071
+ return 0;
2072
+ }
2073
+ index--;
2074
+ }
2075
+ return -ENOENT;
2076
+}
2077
+
2078
+int bpf_prog_array_copy(struct bpf_prog_array *old_array,
17382079 struct bpf_prog *exclude_prog,
17392080 struct bpf_prog *include_prog,
17402081 struct bpf_prog_array **new_array)
....@@ -1798,7 +2139,7 @@
17982139 return 0;
17992140 }
18002141
1801
-int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array,
2142
+int bpf_prog_array_copy_info(struct bpf_prog_array *array,
18022143 u32 *prog_ids, u32 request_cnt,
18032144 u32 *prog_cnt)
18042145 {
....@@ -1818,18 +2159,41 @@
18182159 : 0;
18192160 }
18202161
2162
+void __bpf_free_used_maps(struct bpf_prog_aux *aux,
2163
+ struct bpf_map **used_maps, u32 len)
2164
+{
2165
+ struct bpf_map *map;
2166
+ u32 i;
2167
+
2168
+ for (i = 0; i < len; i++) {
2169
+ map = used_maps[i];
2170
+ if (map->ops->map_poke_untrack)
2171
+ map->ops->map_poke_untrack(map, aux);
2172
+ bpf_map_put(map);
2173
+ }
2174
+}
2175
+
2176
+static void bpf_free_used_maps(struct bpf_prog_aux *aux)
2177
+{
2178
+ __bpf_free_used_maps(aux, aux->used_maps, aux->used_map_cnt);
2179
+ kfree(aux->used_maps);
2180
+}
2181
+
18212182 static void bpf_prog_free_deferred(struct work_struct *work)
18222183 {
18232184 struct bpf_prog_aux *aux;
18242185 int i;
18252186
18262187 aux = container_of(work, struct bpf_prog_aux, work);
2188
+ bpf_free_used_maps(aux);
18272189 if (bpf_prog_is_dev_bound(aux))
18282190 bpf_prog_offload_destroy(aux->prog);
18292191 #ifdef CONFIG_PERF_EVENTS
18302192 if (aux->prog->has_callchain_buf)
18312193 put_callchain_buffers();
18322194 #endif
2195
+ if (aux->dst_trampoline)
2196
+ bpf_trampoline_put(aux->dst_trampoline);
18332197 for (i = 0; i < aux->func_cnt; i++)
18342198 bpf_jit_free(aux->func[i]);
18352199 if (aux->func_cnt) {
....@@ -1845,6 +2209,8 @@
18452209 {
18462210 struct bpf_prog_aux *aux = fp->aux;
18472211
2212
+ if (aux->dst_prog)
2213
+ bpf_prog_put(aux->dst_prog);
18482214 INIT_WORK(&aux->work, bpf_prog_free_deferred);
18492215 schedule_work(&aux->work);
18502216 }
....@@ -1876,10 +2242,21 @@
18762242 return res;
18772243 }
18782244
2245
+BPF_CALL_0(bpf_get_raw_cpu_id)
2246
+{
2247
+ return raw_smp_processor_id();
2248
+}
2249
+
18792250 /* Weak definitions of helper functions in case we don't have bpf syscall. */
18802251 const struct bpf_func_proto bpf_map_lookup_elem_proto __weak;
18812252 const struct bpf_func_proto bpf_map_update_elem_proto __weak;
18822253 const struct bpf_func_proto bpf_map_delete_elem_proto __weak;
2254
+const struct bpf_func_proto bpf_map_push_elem_proto __weak;
2255
+const struct bpf_func_proto bpf_map_pop_elem_proto __weak;
2256
+const struct bpf_func_proto bpf_map_peek_elem_proto __weak;
2257
+const struct bpf_func_proto bpf_spin_lock_proto __weak;
2258
+const struct bpf_func_proto bpf_spin_unlock_proto __weak;
2259
+const struct bpf_func_proto bpf_jiffies64_proto __weak;
18832260
18842261 const struct bpf_func_proto bpf_get_prandom_u32_proto __weak;
18852262 const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak;
....@@ -1890,10 +2267,12 @@
18902267 const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak;
18912268 const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
18922269 const struct bpf_func_proto bpf_get_current_comm_proto __weak;
1893
-const struct bpf_func_proto bpf_sock_map_update_proto __weak;
1894
-const struct bpf_func_proto bpf_sock_hash_update_proto __weak;
18952270 const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak;
2271
+const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto __weak;
18962272 const struct bpf_func_proto bpf_get_local_storage_proto __weak;
2273
+const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto __weak;
2274
+const struct bpf_func_proto bpf_snprintf_btf_proto __weak;
2275
+const struct bpf_func_proto bpf_seq_printf_btf_proto __weak;
18972276
18982277 const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
18992278 {
....@@ -1939,6 +2318,15 @@
19392318 return false;
19402319 }
19412320
2321
+/* Return TRUE if the JIT backend wants verifier to enable sub-register usage
2322
+ * analysis code and wants explicit zero extension inserted by verifier.
2323
+ * Otherwise, return FALSE.
2324
+ */
2325
+bool __weak bpf_jit_needs_zext(void)
2326
+{
2327
+ return false;
2328
+}
2329
+
19422330 /* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call
19432331 * skb_copy_bits(), so provide a weak definition of it for NET-less config.
19442332 */
....@@ -1948,8 +2336,19 @@
19482336 return -EFAULT;
19492337 }
19502338
2339
+int __weak bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
2340
+ void *addr1, void *addr2)
2341
+{
2342
+ return -ENOTSUPP;
2343
+}
2344
+
2345
+DEFINE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
2346
+EXPORT_SYMBOL(bpf_stats_enabled_key);
2347
+
19512348 /* All definitions of tracepoints related to BPF. */
2349
+#undef TRACE_INCLUDE_PATH
19522350 #define CREATE_TRACE_POINTS
19532351 #include <linux/bpf_trace.h>
19542352
19552353 EXPORT_TRACEPOINT_SYMBOL_GPL(xdp_exception);
2354
+EXPORT_TRACEPOINT_SYMBOL_GPL(xdp_bulk_tx);