forked from ~ljy/RK356X_SDK_RELEASE

hc
2023-12-09 95099d4622f8cb224d94e314c7a8e0df60b13f87
kernel/tools/lib/bpf/libbpf.c
....@@ -1,4 +1,4 @@
1
-// SPDX-License-Identifier: LGPL-2.1
1
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
22
33 /*
44 * Common eBPF ELF object loading operations.
....@@ -7,19 +7,7 @@
77 * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
88 * Copyright (C) 2015 Huawei Inc.
99 * Copyright (C) 2017 Nicira, Inc.
10
- *
11
- * This program is free software; you can redistribute it and/or
12
- * modify it under the terms of the GNU Lesser General Public
13
- * License as published by the Free Software Foundation;
14
- * version 2.1 of the License (not later!)
15
- *
16
- * This program is distributed in the hope that it will be useful,
17
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
18
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19
- * GNU Lesser General Public License for more details.
20
- *
21
- * You should have received a copy of the GNU Lesser General Public
22
- * License along with this program; if not, see <http://www.gnu.org/licenses>
10
+ * Copyright (C) 2019 Isovalent, Inc.
2311 */
2412
2513 #ifndef _GNU_SOURCE
....@@ -30,29 +18,42 @@
3018 #include <stdarg.h>
3119 #include <libgen.h>
3220 #include <inttypes.h>
21
+#include <limits.h>
3322 #include <string.h>
3423 #include <unistd.h>
24
+#include <endian.h>
3525 #include <fcntl.h>
3626 #include <errno.h>
37
-#include <perf-sys.h>
27
+#include <ctype.h>
3828 #include <asm/unistd.h>
3929 #include <linux/err.h>
4030 #include <linux/kernel.h>
4131 #include <linux/bpf.h>
4232 #include <linux/btf.h>
33
+#include <linux/filter.h>
4334 #include <linux/list.h>
4435 #include <linux/limits.h>
36
+#include <linux/perf_event.h>
37
+#include <linux/ring_buffer.h>
38
+#include <linux/version.h>
39
+#include <sys/epoll.h>
40
+#include <sys/ioctl.h>
41
+#include <sys/mman.h>
4542 #include <sys/stat.h>
4643 #include <sys/types.h>
4744 #include <sys/vfs.h>
48
-#include <tools/libc_compat.h>
45
+#include <sys/utsname.h>
46
+#include <sys/resource.h>
4947 #include <libelf.h>
5048 #include <gelf.h>
49
+#include <zlib.h>
5150
5251 #include "libbpf.h"
5352 #include "bpf.h"
5453 #include "btf.h"
5554 #include "str_error.h"
55
+#include "libbpf_internal.h"
56
+#include "hashmap.h"
5657
5758 #ifndef EM_BPF
5859 #define EM_BPF 247
....@@ -62,51 +63,78 @@
6263 #define BPF_FS_MAGIC 0xcafe4a11
6364 #endif
6465
66
+#define BPF_INSN_SZ (sizeof(struct bpf_insn))
67
+
68
+/* vsprintf() in __base_pr() uses nonliteral format string. It may break
69
+ * compilation if user enables corresponding warning. Disable it explicitly.
70
+ */
71
+#pragma GCC diagnostic ignored "-Wformat-nonliteral"
72
+
6573 #define __printf(a, b) __attribute__((format(printf, a, b)))
6674
67
-__printf(1, 2)
68
-static int __base_pr(const char *format, ...)
69
-{
70
- va_list args;
71
- int err;
75
+static struct bpf_map *bpf_object__add_map(struct bpf_object *obj);
76
+static const struct btf_type *
77
+skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id);
7278
73
- va_start(args, format);
74
- err = vfprintf(stderr, format, args);
75
- va_end(args);
76
- return err;
79
+static int __base_pr(enum libbpf_print_level level, const char *format,
80
+ va_list args)
81
+{
82
+ if (level == LIBBPF_DEBUG)
83
+ return 0;
84
+
85
+ return vfprintf(stderr, format, args);
7786 }
7887
79
-static __printf(1, 2) libbpf_print_fn_t __pr_warning = __base_pr;
80
-static __printf(1, 2) libbpf_print_fn_t __pr_info = __base_pr;
81
-static __printf(1, 2) libbpf_print_fn_t __pr_debug;
88
+static libbpf_print_fn_t __libbpf_pr = __base_pr;
8289
83
-#define __pr(func, fmt, ...) \
84
-do { \
85
- if ((func)) \
86
- (func)("libbpf: " fmt, ##__VA_ARGS__); \
87
-} while (0)
88
-
89
-#define pr_warning(fmt, ...) __pr(__pr_warning, fmt, ##__VA_ARGS__)
90
-#define pr_info(fmt, ...) __pr(__pr_info, fmt, ##__VA_ARGS__)
91
-#define pr_debug(fmt, ...) __pr(__pr_debug, fmt, ##__VA_ARGS__)
92
-
93
-void libbpf_set_print(libbpf_print_fn_t warn,
94
- libbpf_print_fn_t info,
95
- libbpf_print_fn_t debug)
90
+libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn)
9691 {
97
- __pr_warning = warn;
98
- __pr_info = info;
99
- __pr_debug = debug;
92
+ libbpf_print_fn_t old_print_fn = __libbpf_pr;
93
+
94
+ __libbpf_pr = fn;
95
+ return old_print_fn;
96
+}
97
+
98
+__printf(2, 3)
99
+void libbpf_print(enum libbpf_print_level level, const char *format, ...)
100
+{
101
+ va_list args;
102
+
103
+ if (!__libbpf_pr)
104
+ return;
105
+
106
+ va_start(args, format);
107
+ __libbpf_pr(level, format, args);
108
+ va_end(args);
109
+}
110
+
111
+static void pr_perm_msg(int err)
112
+{
113
+ struct rlimit limit;
114
+ char buf[100];
115
+
116
+ if (err != -EPERM || geteuid() != 0)
117
+ return;
118
+
119
+ err = getrlimit(RLIMIT_MEMLOCK, &limit);
120
+ if (err)
121
+ return;
122
+
123
+ if (limit.rlim_cur == RLIM_INFINITY)
124
+ return;
125
+
126
+ if (limit.rlim_cur < 1024)
127
+ snprintf(buf, sizeof(buf), "%zu bytes", (size_t)limit.rlim_cur);
128
+ else if (limit.rlim_cur < 1024*1024)
129
+ snprintf(buf, sizeof(buf), "%.1f KiB", (double)limit.rlim_cur / 1024);
130
+ else
131
+ snprintf(buf, sizeof(buf), "%.1f MiB", (double)limit.rlim_cur / (1024*1024));
132
+
133
+ pr_warn("permission error while running as root; try raising 'ulimit -l'? current value: %s\n",
134
+ buf);
100135 }
101136
102137 #define STRERR_BUFSIZE 128
103
-
104
-#define CHECK_ERR(action, err, out) do { \
105
- err = action; \
106
- if (err) \
107
- goto out; \
108
-} while(0)
109
-
110138
111139 /* Copied from tools/perf/util/util.h */
112140 #ifndef zfree
....@@ -122,38 +150,117 @@
122150 ___err; })
123151 #endif
124152
125
-#ifdef HAVE_LIBELF_MMAP_SUPPORT
126
-# define LIBBPF_ELF_C_READ_MMAP ELF_C_READ_MMAP
127
-#else
128
-# define LIBBPF_ELF_C_READ_MMAP ELF_C_READ
129
-#endif
153
+static inline __u64 ptr_to_u64(const void *ptr)
154
+{
155
+ return (__u64) (unsigned long) ptr;
156
+}
157
+
158
+enum kern_feature_id {
159
+ /* v4.14: kernel support for program & map names. */
160
+ FEAT_PROG_NAME,
161
+ /* v5.2: kernel support for global data sections. */
162
+ FEAT_GLOBAL_DATA,
163
+ /* BTF support */
164
+ FEAT_BTF,
165
+ /* BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO support */
166
+ FEAT_BTF_FUNC,
167
+ /* BTF_KIND_VAR and BTF_KIND_DATASEC support */
168
+ FEAT_BTF_DATASEC,
169
+ /* BTF_FUNC_GLOBAL is supported */
170
+ FEAT_BTF_GLOBAL_FUNC,
171
+ /* BPF_F_MMAPABLE is supported for arrays */
172
+ FEAT_ARRAY_MMAP,
173
+ /* kernel support for expected_attach_type in BPF_PROG_LOAD */
174
+ FEAT_EXP_ATTACH_TYPE,
175
+ /* bpf_probe_read_{kernel,user}[_str] helpers */
176
+ FEAT_PROBE_READ_KERN,
177
+ /* BPF_PROG_BIND_MAP is supported */
178
+ FEAT_PROG_BIND_MAP,
179
+ __FEAT_CNT,
180
+};
181
+
182
+static bool kernel_supports(enum kern_feature_id feat_id);
183
+
184
+enum reloc_type {
185
+ RELO_LD64,
186
+ RELO_CALL,
187
+ RELO_DATA,
188
+ RELO_EXTERN,
189
+};
190
+
191
+struct reloc_desc {
192
+ enum reloc_type type;
193
+ int insn_idx;
194
+ int map_idx;
195
+ int sym_off;
196
+ bool processed;
197
+};
198
+
199
+struct bpf_sec_def;
200
+
201
+typedef struct bpf_link *(*attach_fn_t)(const struct bpf_sec_def *sec,
202
+ struct bpf_program *prog);
203
+
204
+struct bpf_sec_def {
205
+ const char *sec;
206
+ size_t len;
207
+ enum bpf_prog_type prog_type;
208
+ enum bpf_attach_type expected_attach_type;
209
+ bool is_exp_attach_type_optional;
210
+ bool is_attachable;
211
+ bool is_attach_btf;
212
+ bool is_sleepable;
213
+ attach_fn_t attach_fn;
214
+};
130215
131216 /*
132217 * bpf_prog should be a better name but it has been used in
133218 * linux/filter.h.
134219 */
135220 struct bpf_program {
136
- /* Index in elf obj file, for relocation use. */
137
- int idx;
138
- char *name;
139
- int prog_ifindex;
140
- char *section_name;
141
- struct bpf_insn *insns;
142
- size_t insns_cnt, main_prog_cnt;
143
- enum bpf_prog_type type;
221
+ const struct bpf_sec_def *sec_def;
222
+ char *sec_name;
223
+ size_t sec_idx;
224
+ /* this program's instruction offset (in number of instructions)
225
+ * within its containing ELF section
226
+ */
227
+ size_t sec_insn_off;
228
+ /* number of original instructions in ELF section belonging to this
229
+ * program, not taking into account subprogram instructions possible
230
+ * appended later during relocation
231
+ */
232
+ size_t sec_insn_cnt;
233
+ /* Offset (in number of instructions) of the start of instruction
234
+ * belonging to this BPF program within its containing main BPF
235
+ * program. For the entry-point (main) BPF program, this is always
236
+ * zero. For a sub-program, this gets reset before each of main BPF
237
+ * programs are processed and relocated and is used to determined
238
+ * whether sub-program was already appended to the main program, and
239
+ * if yes, at which instruction offset.
240
+ */
241
+ size_t sub_insn_off;
144242
145
- struct reloc_desc {
146
- enum {
147
- RELO_LD64,
148
- RELO_CALL,
149
- } type;
150
- int insn_idx;
151
- union {
152
- int map_idx;
153
- int text_off;
154
- };
155
- } *reloc_desc;
243
+ char *name;
244
+ /* sec_name with / replaced by _; makes recursive pinning
245
+ * in bpf_object__pin_programs easier
246
+ */
247
+ char *pin_name;
248
+
249
+ /* instructions that belong to BPF program; insns[0] is located at
250
+ * sec_insn_off instruction within its ELF section in ELF file, so
251
+ * when mapping ELF file instruction index to the local instruction,
252
+ * one needs to subtract sec_insn_off; and vice versa.
253
+ */
254
+ struct bpf_insn *insns;
255
+ /* actual number of instruction in this BPF program's image; for
256
+ * entry-point BPF programs this includes the size of main program
257
+ * itself plus all the used sub-programs, appended at the end
258
+ */
259
+ size_t insns_cnt;
260
+
261
+ struct reloc_desc *reloc_desc;
156262 int nr_reloc;
263
+ int log_level;
157264
158265 struct {
159266 int nr;
....@@ -165,34 +272,155 @@
165272 void *priv;
166273 bpf_program_clear_priv_t clear_priv;
167274
275
+ bool load;
276
+ enum bpf_prog_type type;
168277 enum bpf_attach_type expected_attach_type;
278
+ int prog_ifindex;
279
+ __u32 attach_btf_id;
280
+ __u32 attach_prog_fd;
281
+ void *func_info;
282
+ __u32 func_info_rec_size;
283
+ __u32 func_info_cnt;
284
+
285
+ void *line_info;
286
+ __u32 line_info_rec_size;
287
+ __u32 line_info_cnt;
288
+ __u32 prog_flags;
289
+};
290
+
291
+struct bpf_struct_ops {
292
+ const char *tname;
293
+ const struct btf_type *type;
294
+ struct bpf_program **progs;
295
+ __u32 *kern_func_off;
296
+ /* e.g. struct tcp_congestion_ops in bpf_prog's btf format */
297
+ void *data;
298
+ /* e.g. struct bpf_struct_ops_tcp_congestion_ops in
299
+ * btf_vmlinux's format.
300
+ * struct bpf_struct_ops_tcp_congestion_ops {
301
+ * [... some other kernel fields ...]
302
+ * struct tcp_congestion_ops data;
303
+ * }
304
+ * kern_vdata-size == sizeof(struct bpf_struct_ops_tcp_congestion_ops)
305
+ * bpf_map__init_kern_struct_ops() will populate the "kern_vdata"
306
+ * from "data".
307
+ */
308
+ void *kern_vdata;
309
+ __u32 type_id;
310
+};
311
+
312
+#define DATA_SEC ".data"
313
+#define BSS_SEC ".bss"
314
+#define RODATA_SEC ".rodata"
315
+#define KCONFIG_SEC ".kconfig"
316
+#define KSYMS_SEC ".ksyms"
317
+#define STRUCT_OPS_SEC ".struct_ops"
318
+
319
+enum libbpf_map_type {
320
+ LIBBPF_MAP_UNSPEC,
321
+ LIBBPF_MAP_DATA,
322
+ LIBBPF_MAP_BSS,
323
+ LIBBPF_MAP_RODATA,
324
+ LIBBPF_MAP_KCONFIG,
325
+};
326
+
327
+static const char * const libbpf_type_to_btf_name[] = {
328
+ [LIBBPF_MAP_DATA] = DATA_SEC,
329
+ [LIBBPF_MAP_BSS] = BSS_SEC,
330
+ [LIBBPF_MAP_RODATA] = RODATA_SEC,
331
+ [LIBBPF_MAP_KCONFIG] = KCONFIG_SEC,
169332 };
170333
171334 struct bpf_map {
172
- int fd;
173335 char *name;
174
- size_t offset;
336
+ int fd;
337
+ int sec_idx;
338
+ size_t sec_offset;
175339 int map_ifindex;
340
+ int inner_map_fd;
176341 struct bpf_map_def def;
342
+ __u32 numa_node;
343
+ __u32 btf_var_idx;
177344 __u32 btf_key_type_id;
178345 __u32 btf_value_type_id;
346
+ __u32 btf_vmlinux_value_type_id;
179347 void *priv;
180348 bpf_map_clear_priv_t clear_priv;
349
+ enum libbpf_map_type libbpf_type;
350
+ void *mmaped;
351
+ struct bpf_struct_ops *st_ops;
352
+ struct bpf_map *inner_map;
353
+ void **init_slots;
354
+ int init_slots_sz;
355
+ char *pin_path;
356
+ bool pinned;
357
+ bool reused;
358
+};
359
+
360
+enum extern_type {
361
+ EXT_UNKNOWN,
362
+ EXT_KCFG,
363
+ EXT_KSYM,
364
+};
365
+
366
+enum kcfg_type {
367
+ KCFG_UNKNOWN,
368
+ KCFG_CHAR,
369
+ KCFG_BOOL,
370
+ KCFG_INT,
371
+ KCFG_TRISTATE,
372
+ KCFG_CHAR_ARR,
373
+};
374
+
375
+struct extern_desc {
376
+ enum extern_type type;
377
+ int sym_idx;
378
+ int btf_id;
379
+ int sec_btf_id;
380
+ const char *name;
381
+ bool is_set;
382
+ bool is_weak;
383
+ union {
384
+ struct {
385
+ enum kcfg_type type;
386
+ int sz;
387
+ int align;
388
+ int data_off;
389
+ bool is_signed;
390
+ } kcfg;
391
+ struct {
392
+ unsigned long long addr;
393
+
394
+ /* target btf_id of the corresponding kernel var. */
395
+ int vmlinux_btf_id;
396
+
397
+ /* local btf_id of the ksym extern's type. */
398
+ __u32 type_id;
399
+ } ksym;
400
+ };
181401 };
182402
183403 static LIST_HEAD(bpf_objects_list);
184404
185405 struct bpf_object {
406
+ char name[BPF_OBJ_NAME_LEN];
186407 char license[64];
187
- u32 kern_version;
408
+ __u32 kern_version;
188409
189410 struct bpf_program *programs;
190411 size_t nr_programs;
191412 struct bpf_map *maps;
192413 size_t nr_maps;
414
+ size_t maps_cap;
415
+
416
+ char *kconfig;
417
+ struct extern_desc *externs;
418
+ int nr_extern;
419
+ int kconfig_map_idx;
420
+ int rodata_map_idx;
193421
194422 bool loaded;
195
- bool has_pseudo_calls;
423
+ bool has_subcalls;
196424
197425 /*
198426 * Information when doing elf related work. Only valid if fd
....@@ -200,19 +428,31 @@
200428 */
201429 struct {
202430 int fd;
203
- void *obj_buf;
431
+ const void *obj_buf;
204432 size_t obj_buf_sz;
205433 Elf *elf;
206434 GElf_Ehdr ehdr;
207435 Elf_Data *symbols;
436
+ Elf_Data *data;
437
+ Elf_Data *rodata;
438
+ Elf_Data *bss;
439
+ Elf_Data *st_ops_data;
440
+ size_t shstrndx; /* section index for section name strings */
208441 size_t strtabidx;
209442 struct {
210443 GElf_Shdr shdr;
211444 Elf_Data *data;
212
- } *reloc;
213
- int nr_reloc;
445
+ } *reloc_sects;
446
+ int nr_reloc_sects;
214447 int maps_shndx;
448
+ int btf_maps_shndx;
449
+ __u32 btf_maps_sec_btf_id;
215450 int text_shndx;
451
+ int symbols_shndx;
452
+ int data_shndx;
453
+ int rodata_shndx;
454
+ int bss_shndx;
455
+ int st_ops_shndx;
216456 } efile;
217457 /*
218458 * All loaded bpf_object is linked in a list, which is
....@@ -222,6 +462,11 @@
222462 struct list_head list;
223463
224464 struct btf *btf;
465
+ /* Parse and load BTF vmlinux if any of the programs in the object need
466
+ * it at load time.
467
+ */
468
+ struct btf *btf_vmlinux;
469
+ struct btf_ext *btf_ext;
225470
226471 void *priv;
227472 bpf_object_clear_priv_t clear_priv;
....@@ -230,7 +475,17 @@
230475 };
231476 #define obj_elf_valid(o) ((o)->efile.elf)
232477
233
-static void bpf_program__unload(struct bpf_program *prog)
478
+static const char *elf_sym_str(const struct bpf_object *obj, size_t off);
479
+static const char *elf_sec_str(const struct bpf_object *obj, size_t off);
480
+static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx);
481
+static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name);
482
+static int elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn, GElf_Shdr *hdr);
483
+static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn);
484
+static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn);
485
+static int elf_sym_by_sec_off(const struct bpf_object *obj, size_t sec_idx,
486
+ size_t off, __u32 sym_type, GElf_Sym *sym);
487
+
488
+void bpf_program__unload(struct bpf_program *prog)
234489 {
235490 int i;
236491
....@@ -245,12 +500,15 @@
245500 for (i = 0; i < prog->instances.nr; i++)
246501 zclose(prog->instances.fds[i]);
247502 } else if (prog->instances.nr != -1) {
248
- pr_warning("Internal error: instances.nr is %d\n",
249
- prog->instances.nr);
503
+ pr_warn("Internal error: instances.nr is %d\n",
504
+ prog->instances.nr);
250505 }
251506
252507 prog->instances.nr = -1;
253508 zfree(&prog->instances.fds);
509
+
510
+ zfree(&prog->func_info);
511
+ zfree(&prog->line_info);
254512 }
255513
256514 static void bpf_program__exit(struct bpf_program *prog)
....@@ -266,158 +524,539 @@
266524
267525 bpf_program__unload(prog);
268526 zfree(&prog->name);
269
- zfree(&prog->section_name);
527
+ zfree(&prog->sec_name);
528
+ zfree(&prog->pin_name);
270529 zfree(&prog->insns);
271530 zfree(&prog->reloc_desc);
272531
273532 prog->nr_reloc = 0;
274533 prog->insns_cnt = 0;
275
- prog->idx = -1;
534
+ prog->sec_idx = -1;
535
+}
536
+
537
+static char *__bpf_program__pin_name(struct bpf_program *prog)
538
+{
539
+ char *name, *p;
540
+
541
+ name = p = strdup(prog->sec_name);
542
+ while ((p = strchr(p, '/')))
543
+ *p = '_';
544
+
545
+ return name;
546
+}
547
+
548
+static bool insn_is_subprog_call(const struct bpf_insn *insn)
549
+{
550
+ return BPF_CLASS(insn->code) == BPF_JMP &&
551
+ BPF_OP(insn->code) == BPF_CALL &&
552
+ BPF_SRC(insn->code) == BPF_K &&
553
+ insn->src_reg == BPF_PSEUDO_CALL &&
554
+ insn->dst_reg == 0 &&
555
+ insn->off == 0;
276556 }
277557
278558 static int
279
-bpf_program__init(void *data, size_t size, char *section_name, int idx,
280
- struct bpf_program *prog)
559
+bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
560
+ const char *name, size_t sec_idx, const char *sec_name,
561
+ size_t sec_off, void *insn_data, size_t insn_data_sz)
281562 {
282
- if (size < sizeof(struct bpf_insn)) {
283
- pr_warning("corrupted section '%s'\n", section_name);
563
+ if (insn_data_sz == 0 || insn_data_sz % BPF_INSN_SZ || sec_off % BPF_INSN_SZ) {
564
+ pr_warn("sec '%s': corrupted program '%s', offset %zu, size %zu\n",
565
+ sec_name, name, sec_off, insn_data_sz);
284566 return -EINVAL;
285567 }
286568
287
- bzero(prog, sizeof(*prog));
569
+ memset(prog, 0, sizeof(*prog));
570
+ prog->obj = obj;
288571
289
- prog->section_name = strdup(section_name);
290
- if (!prog->section_name) {
291
- pr_warning("failed to alloc name for prog under section(%d) %s\n",
292
- idx, section_name);
293
- goto errout;
294
- }
572
+ prog->sec_idx = sec_idx;
573
+ prog->sec_insn_off = sec_off / BPF_INSN_SZ;
574
+ prog->sec_insn_cnt = insn_data_sz / BPF_INSN_SZ;
575
+ /* insns_cnt can later be increased by appending used subprograms */
576
+ prog->insns_cnt = prog->sec_insn_cnt;
295577
296
- prog->insns = malloc(size);
297
- if (!prog->insns) {
298
- pr_warning("failed to alloc insns for prog under section %s\n",
299
- section_name);
300
- goto errout;
301
- }
302
- prog->insns_cnt = size / sizeof(struct bpf_insn);
303
- memcpy(prog->insns, data,
304
- prog->insns_cnt * sizeof(struct bpf_insn));
305
- prog->idx = idx;
578
+ prog->type = BPF_PROG_TYPE_UNSPEC;
579
+ prog->load = true;
580
+
306581 prog->instances.fds = NULL;
307582 prog->instances.nr = -1;
308
- prog->type = BPF_PROG_TYPE_KPROBE;
583
+
584
+ prog->sec_name = strdup(sec_name);
585
+ if (!prog->sec_name)
586
+ goto errout;
587
+
588
+ prog->name = strdup(name);
589
+ if (!prog->name)
590
+ goto errout;
591
+
592
+ prog->pin_name = __bpf_program__pin_name(prog);
593
+ if (!prog->pin_name)
594
+ goto errout;
595
+
596
+ prog->insns = malloc(insn_data_sz);
597
+ if (!prog->insns)
598
+ goto errout;
599
+ memcpy(prog->insns, insn_data, insn_data_sz);
309600
310601 return 0;
311602 errout:
603
+ pr_warn("sec '%s': failed to allocate memory for prog '%s'\n", sec_name, name);
312604 bpf_program__exit(prog);
313605 return -ENOMEM;
314606 }
315607
316608 static int
317
-bpf_object__add_program(struct bpf_object *obj, void *data, size_t size,
318
- char *section_name, int idx)
609
+bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
610
+ const char *sec_name, int sec_idx)
319611 {
320
- struct bpf_program prog, *progs;
612
+ struct bpf_program *prog, *progs;
613
+ void *data = sec_data->d_buf;
614
+ size_t sec_sz = sec_data->d_size, sec_off, prog_sz;
321615 int nr_progs, err;
322
-
323
- err = bpf_program__init(data, size, section_name, idx, &prog);
324
- if (err)
325
- return err;
616
+ const char *name;
617
+ GElf_Sym sym;
326618
327619 progs = obj->programs;
328620 nr_progs = obj->nr_programs;
621
+ sec_off = 0;
329622
330
- progs = reallocarray(progs, nr_progs + 1, sizeof(progs[0]));
331
- if (!progs) {
332
- /*
333
- * In this case the original obj->programs
334
- * is still valid, so don't need special treat for
335
- * bpf_close_object().
336
- */
337
- pr_warning("failed to alloc a new program under section '%s'\n",
338
- section_name);
339
- bpf_program__exit(&prog);
340
- return -ENOMEM;
623
+ while (sec_off < sec_sz) {
624
+ if (elf_sym_by_sec_off(obj, sec_idx, sec_off, STT_FUNC, &sym)) {
625
+ pr_warn("sec '%s': failed to find program symbol at offset %zu\n",
626
+ sec_name, sec_off);
627
+ return -LIBBPF_ERRNO__FORMAT;
628
+ }
629
+
630
+ prog_sz = sym.st_size;
631
+
632
+ name = elf_sym_str(obj, sym.st_name);
633
+ if (!name) {
634
+ pr_warn("sec '%s': failed to get symbol name for offset %zu\n",
635
+ sec_name, sec_off);
636
+ return -LIBBPF_ERRNO__FORMAT;
637
+ }
638
+
639
+ if (sec_off + prog_sz > sec_sz) {
640
+ pr_warn("sec '%s': program at offset %zu crosses section boundary\n",
641
+ sec_name, sec_off);
642
+ return -LIBBPF_ERRNO__FORMAT;
643
+ }
644
+
645
+ pr_debug("sec '%s': found program '%s' at insn offset %zu (%zu bytes), code size %zu insns (%zu bytes)\n",
646
+ sec_name, name, sec_off / BPF_INSN_SZ, sec_off, prog_sz / BPF_INSN_SZ, prog_sz);
647
+
648
+ progs = libbpf_reallocarray(progs, nr_progs + 1, sizeof(*progs));
649
+ if (!progs) {
650
+ /*
651
+ * In this case the original obj->programs
652
+ * is still valid, so don't need special treat for
653
+ * bpf_close_object().
654
+ */
655
+ pr_warn("sec '%s': failed to alloc memory for new program '%s'\n",
656
+ sec_name, name);
657
+ return -ENOMEM;
658
+ }
659
+ obj->programs = progs;
660
+
661
+ prog = &progs[nr_progs];
662
+
663
+ err = bpf_object__init_prog(obj, prog, name, sec_idx, sec_name,
664
+ sec_off, data + sec_off, prog_sz);
665
+ if (err)
666
+ return err;
667
+
668
+ nr_progs++;
669
+ obj->nr_programs = nr_progs;
670
+
671
+ sec_off += prog_sz;
341672 }
342673
343
- pr_debug("found program %s\n", prog.section_name);
344
- obj->programs = progs;
345
- obj->nr_programs = nr_progs + 1;
346
- prog.obj = obj;
347
- progs[nr_progs] = prog;
348674 return 0;
349675 }
350676
351
-static int
352
-bpf_object__init_prog_names(struct bpf_object *obj)
677
+static __u32 get_kernel_version(void)
353678 {
354
- Elf_Data *symbols = obj->efile.symbols;
355
- struct bpf_program *prog;
356
- size_t pi, si;
679
+ __u32 major, minor, patch;
680
+ struct utsname info;
357681
358
- for (pi = 0; pi < obj->nr_programs; pi++) {
359
- const char *name = NULL;
682
+ uname(&info);
683
+ if (sscanf(info.release, "%u.%u.%u", &major, &minor, &patch) != 3)
684
+ return 0;
685
+ return KERNEL_VERSION(major, minor, patch);
686
+}
360687
361
- prog = &obj->programs[pi];
688
+static const struct btf_member *
689
+find_member_by_offset(const struct btf_type *t, __u32 bit_offset)
690
+{
691
+ struct btf_member *m;
692
+ int i;
362693
363
- for (si = 0; si < symbols->d_size / sizeof(GElf_Sym) && !name;
364
- si++) {
365
- GElf_Sym sym;
694
+ for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
695
+ if (btf_member_bit_offset(t, i) == bit_offset)
696
+ return m;
697
+ }
366698
367
- if (!gelf_getsym(symbols, si, &sym))
368
- continue;
369
- if (sym.st_shndx != prog->idx)
370
- continue;
371
- if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL)
372
- continue;
699
+ return NULL;
700
+}
373701
374
- name = elf_strptr(obj->efile.elf,
375
- obj->efile.strtabidx,
376
- sym.st_name);
377
- if (!name) {
378
- pr_warning("failed to get sym name string for prog %s\n",
379
- prog->section_name);
380
- return -LIBBPF_ERRNO__LIBELF;
381
- }
702
+static const struct btf_member *
703
+find_member_by_name(const struct btf *btf, const struct btf_type *t,
704
+ const char *name)
705
+{
706
+ struct btf_member *m;
707
+ int i;
708
+
709
+ for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
710
+ if (!strcmp(btf__name_by_offset(btf, m->name_off), name))
711
+ return m;
712
+ }
713
+
714
+ return NULL;
715
+}
716
+
717
+#define STRUCT_OPS_VALUE_PREFIX "bpf_struct_ops_"
718
+static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
719
+ const char *name, __u32 kind);
720
+
721
+static int
722
+find_struct_ops_kern_types(const struct btf *btf, const char *tname,
723
+ const struct btf_type **type, __u32 *type_id,
724
+ const struct btf_type **vtype, __u32 *vtype_id,
725
+ const struct btf_member **data_member)
726
+{
727
+ const struct btf_type *kern_type, *kern_vtype;
728
+ const struct btf_member *kern_data_member;
729
+ __s32 kern_vtype_id, kern_type_id;
730
+ __u32 i;
731
+
732
+ kern_type_id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT);
733
+ if (kern_type_id < 0) {
734
+ pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n",
735
+ tname);
736
+ return kern_type_id;
737
+ }
738
+ kern_type = btf__type_by_id(btf, kern_type_id);
739
+
740
+ /* Find the corresponding "map_value" type that will be used
741
+ * in map_update(BPF_MAP_TYPE_STRUCT_OPS). For example,
742
+ * find "struct bpf_struct_ops_tcp_congestion_ops" from the
743
+ * btf_vmlinux.
744
+ */
745
+ kern_vtype_id = find_btf_by_prefix_kind(btf, STRUCT_OPS_VALUE_PREFIX,
746
+ tname, BTF_KIND_STRUCT);
747
+ if (kern_vtype_id < 0) {
748
+ pr_warn("struct_ops init_kern: struct %s%s is not found in kernel BTF\n",
749
+ STRUCT_OPS_VALUE_PREFIX, tname);
750
+ return kern_vtype_id;
751
+ }
752
+ kern_vtype = btf__type_by_id(btf, kern_vtype_id);
753
+
754
+ /* Find "struct tcp_congestion_ops" from
755
+ * struct bpf_struct_ops_tcp_congestion_ops {
756
+ * [ ... ]
757
+ * struct tcp_congestion_ops data;
758
+ * }
759
+ */
760
+ kern_data_member = btf_members(kern_vtype);
761
+ for (i = 0; i < btf_vlen(kern_vtype); i++, kern_data_member++) {
762
+ if (kern_data_member->type == kern_type_id)
763
+ break;
764
+ }
765
+ if (i == btf_vlen(kern_vtype)) {
766
+ pr_warn("struct_ops init_kern: struct %s data is not found in struct %s%s\n",
767
+ tname, STRUCT_OPS_VALUE_PREFIX, tname);
768
+ return -EINVAL;
769
+ }
770
+
771
+ *type = kern_type;
772
+ *type_id = kern_type_id;
773
+ *vtype = kern_vtype;
774
+ *vtype_id = kern_vtype_id;
775
+ *data_member = kern_data_member;
776
+
777
+ return 0;
778
+}
779
+
780
+static bool bpf_map__is_struct_ops(const struct bpf_map *map)
781
+{
782
+ return map->def.type == BPF_MAP_TYPE_STRUCT_OPS;
783
+}
784
+
785
+/* Init the map's fields that depend on kern_btf */
786
+static int bpf_map__init_kern_struct_ops(struct bpf_map *map,
787
+ const struct btf *btf,
788
+ const struct btf *kern_btf)
789
+{
790
+ const struct btf_member *member, *kern_member, *kern_data_member;
791
+ const struct btf_type *type, *kern_type, *kern_vtype;
792
+ __u32 i, kern_type_id, kern_vtype_id, kern_data_off;
793
+ struct bpf_struct_ops *st_ops;
794
+ void *data, *kern_data;
795
+ const char *tname;
796
+ int err;
797
+
798
+ st_ops = map->st_ops;
799
+ type = st_ops->type;
800
+ tname = st_ops->tname;
801
+ err = find_struct_ops_kern_types(kern_btf, tname,
802
+ &kern_type, &kern_type_id,
803
+ &kern_vtype, &kern_vtype_id,
804
+ &kern_data_member);
805
+ if (err)
806
+ return err;
807
+
808
+ pr_debug("struct_ops init_kern %s: type_id:%u kern_type_id:%u kern_vtype_id:%u\n",
809
+ map->name, st_ops->type_id, kern_type_id, kern_vtype_id);
810
+
811
+ map->def.value_size = kern_vtype->size;
812
+ map->btf_vmlinux_value_type_id = kern_vtype_id;
813
+
814
+ st_ops->kern_vdata = calloc(1, kern_vtype->size);
815
+ if (!st_ops->kern_vdata)
816
+ return -ENOMEM;
817
+
818
+ data = st_ops->data;
819
+ kern_data_off = kern_data_member->offset / 8;
820
+ kern_data = st_ops->kern_vdata + kern_data_off;
821
+
822
+ member = btf_members(type);
823
+ for (i = 0; i < btf_vlen(type); i++, member++) {
824
+ const struct btf_type *mtype, *kern_mtype;
825
+ __u32 mtype_id, kern_mtype_id;
826
+ void *mdata, *kern_mdata;
827
+ __s64 msize, kern_msize;
828
+ __u32 moff, kern_moff;
829
+ __u32 kern_member_idx;
830
+ const char *mname;
831
+
832
+ mname = btf__name_by_offset(btf, member->name_off);
833
+ kern_member = find_member_by_name(kern_btf, kern_type, mname);
834
+ if (!kern_member) {
835
+ pr_warn("struct_ops init_kern %s: Cannot find member %s in kernel BTF\n",
836
+ map->name, mname);
837
+ return -ENOTSUP;
382838 }
383839
384
- if (!name && prog->idx == obj->efile.text_shndx)
385
- name = ".text";
840
+ kern_member_idx = kern_member - btf_members(kern_type);
841
+ if (btf_member_bitfield_size(type, i) ||
842
+ btf_member_bitfield_size(kern_type, kern_member_idx)) {
843
+ pr_warn("struct_ops init_kern %s: bitfield %s is not supported\n",
844
+ map->name, mname);
845
+ return -ENOTSUP;
846
+ }
386847
387
- if (!name) {
388
- pr_warning("failed to find sym for prog %s\n",
389
- prog->section_name);
848
+ moff = member->offset / 8;
849
+ kern_moff = kern_member->offset / 8;
850
+
851
+ mdata = data + moff;
852
+ kern_mdata = kern_data + kern_moff;
853
+
854
+ mtype = skip_mods_and_typedefs(btf, member->type, &mtype_id);
855
+ kern_mtype = skip_mods_and_typedefs(kern_btf, kern_member->type,
856
+ &kern_mtype_id);
857
+ if (BTF_INFO_KIND(mtype->info) !=
858
+ BTF_INFO_KIND(kern_mtype->info)) {
859
+ pr_warn("struct_ops init_kern %s: Unmatched member type %s %u != %u(kernel)\n",
860
+ map->name, mname, BTF_INFO_KIND(mtype->info),
861
+ BTF_INFO_KIND(kern_mtype->info));
862
+ return -ENOTSUP;
863
+ }
864
+
865
+ if (btf_is_ptr(mtype)) {
866
+ struct bpf_program *prog;
867
+
868
+ prog = st_ops->progs[i];
869
+ if (!prog)
870
+ continue;
871
+
872
+ kern_mtype = skip_mods_and_typedefs(kern_btf,
873
+ kern_mtype->type,
874
+ &kern_mtype_id);
875
+
876
+ /* mtype->type must be a func_proto which was
877
+ * guaranteed in bpf_object__collect_st_ops_relos(),
878
+ * so only check kern_mtype for func_proto here.
879
+ */
880
+ if (!btf_is_func_proto(kern_mtype)) {
881
+ pr_warn("struct_ops init_kern %s: kernel member %s is not a func ptr\n",
882
+ map->name, mname);
883
+ return -ENOTSUP;
884
+ }
885
+
886
+ prog->attach_btf_id = kern_type_id;
887
+ prog->expected_attach_type = kern_member_idx;
888
+
889
+ st_ops->kern_func_off[i] = kern_data_off + kern_moff;
890
+
891
+ pr_debug("struct_ops init_kern %s: func ptr %s is set to prog %s from data(+%u) to kern_data(+%u)\n",
892
+ map->name, mname, prog->name, moff,
893
+ kern_moff);
894
+
895
+ continue;
896
+ }
897
+
898
+ msize = btf__resolve_size(btf, mtype_id);
899
+ kern_msize = btf__resolve_size(kern_btf, kern_mtype_id);
900
+ if (msize < 0 || kern_msize < 0 || msize != kern_msize) {
901
+ pr_warn("struct_ops init_kern %s: Error in size of member %s: %zd != %zd(kernel)\n",
902
+ map->name, mname, (ssize_t)msize,
903
+ (ssize_t)kern_msize);
904
+ return -ENOTSUP;
905
+ }
906
+
907
+ pr_debug("struct_ops init_kern %s: copy %s %u bytes from data(+%u) to kern_data(+%u)\n",
908
+ map->name, mname, (unsigned int)msize,
909
+ moff, kern_moff);
910
+ memcpy(kern_mdata, mdata, msize);
911
+ }
912
+
913
+ return 0;
914
+}
915
+
916
+static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj)
917
+{
918
+ struct bpf_map *map;
919
+ size_t i;
920
+ int err;
921
+
922
+ for (i = 0; i < obj->nr_maps; i++) {
923
+ map = &obj->maps[i];
924
+
925
+ if (!bpf_map__is_struct_ops(map))
926
+ continue;
927
+
928
+ err = bpf_map__init_kern_struct_ops(map, obj->btf,
929
+ obj->btf_vmlinux);
930
+ if (err)
931
+ return err;
932
+ }
933
+
934
+ return 0;
935
+}
936
+
937
+static int bpf_object__init_struct_ops_maps(struct bpf_object *obj)
938
+{
939
+ const struct btf_type *type, *datasec;
940
+ const struct btf_var_secinfo *vsi;
941
+ struct bpf_struct_ops *st_ops;
942
+ const char *tname, *var_name;
943
+ __s32 type_id, datasec_id;
944
+ const struct btf *btf;
945
+ struct bpf_map *map;
946
+ __u32 i;
947
+
948
+ if (obj->efile.st_ops_shndx == -1)
949
+ return 0;
950
+
951
+ btf = obj->btf;
952
+ datasec_id = btf__find_by_name_kind(btf, STRUCT_OPS_SEC,
953
+ BTF_KIND_DATASEC);
954
+ if (datasec_id < 0) {
955
+ pr_warn("struct_ops init: DATASEC %s not found\n",
956
+ STRUCT_OPS_SEC);
957
+ return -EINVAL;
958
+ }
959
+
960
+ datasec = btf__type_by_id(btf, datasec_id);
961
+ vsi = btf_var_secinfos(datasec);
962
+ for (i = 0; i < btf_vlen(datasec); i++, vsi++) {
963
+ type = btf__type_by_id(obj->btf, vsi->type);
964
+ var_name = btf__name_by_offset(obj->btf, type->name_off);
965
+
966
+ type_id = btf__resolve_type(obj->btf, vsi->type);
967
+ if (type_id < 0) {
968
+ pr_warn("struct_ops init: Cannot resolve var type_id %u in DATASEC %s\n",
969
+ vsi->type, STRUCT_OPS_SEC);
390970 return -EINVAL;
391971 }
392972
393
- prog->name = strdup(name);
394
- if (!prog->name) {
395
- pr_warning("failed to allocate memory for prog sym %s\n",
396
- name);
397
- return -ENOMEM;
973
+ type = btf__type_by_id(obj->btf, type_id);
974
+ tname = btf__name_by_offset(obj->btf, type->name_off);
975
+ if (!tname[0]) {
976
+ pr_warn("struct_ops init: anonymous type is not supported\n");
977
+ return -ENOTSUP;
398978 }
979
+ if (!btf_is_struct(type)) {
980
+ pr_warn("struct_ops init: %s is not a struct\n", tname);
981
+ return -EINVAL;
982
+ }
983
+
984
+ map = bpf_object__add_map(obj);
985
+ if (IS_ERR(map))
986
+ return PTR_ERR(map);
987
+
988
+ map->sec_idx = obj->efile.st_ops_shndx;
989
+ map->sec_offset = vsi->offset;
990
+ map->name = strdup(var_name);
991
+ if (!map->name)
992
+ return -ENOMEM;
993
+
994
+ map->def.type = BPF_MAP_TYPE_STRUCT_OPS;
995
+ map->def.key_size = sizeof(int);
996
+ map->def.value_size = type->size;
997
+ map->def.max_entries = 1;
998
+
999
+ map->st_ops = calloc(1, sizeof(*map->st_ops));
1000
+ if (!map->st_ops)
1001
+ return -ENOMEM;
1002
+ st_ops = map->st_ops;
1003
+ st_ops->data = malloc(type->size);
1004
+ st_ops->progs = calloc(btf_vlen(type), sizeof(*st_ops->progs));
1005
+ st_ops->kern_func_off = malloc(btf_vlen(type) *
1006
+ sizeof(*st_ops->kern_func_off));
1007
+ if (!st_ops->data || !st_ops->progs || !st_ops->kern_func_off)
1008
+ return -ENOMEM;
1009
+
1010
+ if (vsi->offset + type->size > obj->efile.st_ops_data->d_size) {
1011
+ pr_warn("struct_ops init: var %s is beyond the end of DATASEC %s\n",
1012
+ var_name, STRUCT_OPS_SEC);
1013
+ return -EINVAL;
1014
+ }
1015
+
1016
+ memcpy(st_ops->data,
1017
+ obj->efile.st_ops_data->d_buf + vsi->offset,
1018
+ type->size);
1019
+ st_ops->tname = tname;
1020
+ st_ops->type = type;
1021
+ st_ops->type_id = type_id;
1022
+
1023
+ pr_debug("struct_ops init: struct %s(type_id=%u) %s found at offset %u\n",
1024
+ tname, type_id, var_name, vsi->offset);
3991025 }
4001026
4011027 return 0;
4021028 }
4031029
4041030 static struct bpf_object *bpf_object__new(const char *path,
405
- void *obj_buf,
406
- size_t obj_buf_sz)
1031
+ const void *obj_buf,
1032
+ size_t obj_buf_sz,
1033
+ const char *obj_name)
4071034 {
4081035 struct bpf_object *obj;
1036
+ char *end;
4091037
4101038 obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1);
4111039 if (!obj) {
412
- pr_warning("alloc memory failed for %s\n", path);
1040
+ pr_warn("alloc memory failed for %s\n", path);
4131041 return ERR_PTR(-ENOMEM);
4141042 }
4151043
4161044 strcpy(obj->path, path);
417
- obj->efile.fd = -1;
1045
+ if (obj_name) {
1046
+ strncpy(obj->name, obj_name, sizeof(obj->name) - 1);
1047
+ obj->name[sizeof(obj->name) - 1] = 0;
1048
+ } else {
1049
+ /* Using basename() GNU version which doesn't modify arg. */
1050
+ strncpy(obj->name, basename((void *)path),
1051
+ sizeof(obj->name) - 1);
1052
+ end = strchr(obj->name, '.');
1053
+ if (end)
1054
+ *end = 0;
1055
+ }
4181056
1057
+ obj->efile.fd = -1;
4191058 /*
420
- * Caller of this function should also calls
1059
+ * Caller of this function should also call
4211060 * bpf_object__elf_finish() after data collection to return
4221061 * obj_buf to user. If not, we should duplicate the buffer to
4231062 * avoid user freeing them before elf finish.
....@@ -425,7 +1064,15 @@
4251064 obj->efile.obj_buf = obj_buf;
4261065 obj->efile.obj_buf_sz = obj_buf_sz;
4271066 obj->efile.maps_shndx = -1;
1067
+ obj->efile.btf_maps_shndx = -1;
1068
+ obj->efile.data_shndx = -1;
1069
+ obj->efile.rodata_shndx = -1;
1070
+ obj->efile.bss_shndx = -1;
1071
+ obj->efile.st_ops_shndx = -1;
1072
+ obj->kconfig_map_idx = -1;
1073
+ obj->rodata_map_idx = -1;
4281074
1075
+ obj->kern_version = get_kernel_version();
4291076 obj->loaded = false;
4301077
4311078 INIT_LIST_HEAD(&obj->list);
....@@ -443,13 +1090,22 @@
4431090 obj->efile.elf = NULL;
4441091 }
4451092 obj->efile.symbols = NULL;
1093
+ obj->efile.data = NULL;
1094
+ obj->efile.rodata = NULL;
1095
+ obj->efile.bss = NULL;
1096
+ obj->efile.st_ops_data = NULL;
4461097
447
- zfree(&obj->efile.reloc);
448
- obj->efile.nr_reloc = 0;
1098
+ zfree(&obj->efile.reloc_sects);
1099
+ obj->efile.nr_reloc_sects = 0;
4491100 zclose(obj->efile.fd);
4501101 obj->efile.obj_buf = NULL;
4511102 obj->efile.obj_buf_sz = 0;
4521103 }
1104
+
1105
+/* if libelf is old and doesn't support mmap(), fall back to read() */
1106
+#ifndef ELF_C_READ_MMAP
1107
+#define ELF_C_READ_MMAP ELF_C_READ
1108
+#endif
4531109
4541110 static int bpf_object__elf_init(struct bpf_object *obj)
4551111 {
....@@ -457,7 +1113,7 @@
4571113 GElf_Ehdr *ep;
4581114
4591115 if (obj_elf_valid(obj)) {
460
- pr_warning("elf init: internal error\n");
1116
+ pr_warn("elf: init internal error\n");
4611117 return -LIBBPF_ERRNO__LIBELF;
4621118 }
4631119
....@@ -466,42 +1122,54 @@
4661122 * obj_buf should have been validated by
4671123 * bpf_object__open_buffer().
4681124 */
469
- obj->efile.elf = elf_memory(obj->efile.obj_buf,
1125
+ obj->efile.elf = elf_memory((char *)obj->efile.obj_buf,
4701126 obj->efile.obj_buf_sz);
4711127 } else {
4721128 obj->efile.fd = open(obj->path, O_RDONLY);
4731129 if (obj->efile.fd < 0) {
474
- char errmsg[STRERR_BUFSIZE];
475
- char *cp = str_error(errno, errmsg, sizeof(errmsg));
1130
+ char errmsg[STRERR_BUFSIZE], *cp;
4761131
477
- pr_warning("failed to open %s: %s\n", obj->path, cp);
478
- return -errno;
1132
+ err = -errno;
1133
+ cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
1134
+ pr_warn("elf: failed to open %s: %s\n", obj->path, cp);
1135
+ return err;
4791136 }
4801137
481
- obj->efile.elf = elf_begin(obj->efile.fd,
482
- LIBBPF_ELF_C_READ_MMAP,
483
- NULL);
1138
+ obj->efile.elf = elf_begin(obj->efile.fd, ELF_C_READ_MMAP, NULL);
4841139 }
4851140
4861141 if (!obj->efile.elf) {
487
- pr_warning("failed to open %s as ELF file\n",
488
- obj->path);
1142
+ pr_warn("elf: failed to open %s as ELF file: %s\n", obj->path, elf_errmsg(-1));
4891143 err = -LIBBPF_ERRNO__LIBELF;
4901144 goto errout;
4911145 }
4921146
4931147 if (!gelf_getehdr(obj->efile.elf, &obj->efile.ehdr)) {
494
- pr_warning("failed to get EHDR from %s\n",
495
- obj->path);
1148
+ pr_warn("elf: failed to get ELF header from %s: %s\n", obj->path, elf_errmsg(-1));
4961149 err = -LIBBPF_ERRNO__FORMAT;
4971150 goto errout;
4981151 }
4991152 ep = &obj->efile.ehdr;
5001153
1154
+ if (elf_getshdrstrndx(obj->efile.elf, &obj->efile.shstrndx)) {
1155
+ pr_warn("elf: failed to get section names section index for %s: %s\n",
1156
+ obj->path, elf_errmsg(-1));
1157
+ err = -LIBBPF_ERRNO__FORMAT;
1158
+ goto errout;
1159
+ }
1160
+
1161
+ /* Elf is corrupted/truncated, avoid calling elf_strptr. */
1162
+ if (!elf_rawdata(elf_getscn(obj->efile.elf, obj->efile.shstrndx), NULL)) {
1163
+ pr_warn("elf: failed to get section names strings from %s: %s\n",
1164
+ obj->path, elf_errmsg(-1));
1165
+ err = -LIBBPF_ERRNO__FORMAT;
1166
+ goto errout;
1167
+ }
1168
+
5011169 /* Old LLVM set e_machine to EM_NONE */
502
- if ((ep->e_type != ET_REL) || (ep->e_machine && (ep->e_machine != EM_BPF))) {
503
- pr_warning("%s is not an eBPF object file\n",
504
- obj->path);
1170
+ if (ep->e_type != ET_REL ||
1171
+ (ep->e_machine && ep->e_machine != EM_BPF)) {
1172
+ pr_warn("elf: %s is not a valid eBPF object file\n", obj->path);
5051173 err = -LIBBPF_ERRNO__FORMAT;
5061174 goto errout;
5071175 }
....@@ -512,88 +1180,589 @@
5121180 return err;
5131181 }
5141182
515
-static int
516
-bpf_object__check_endianness(struct bpf_object *obj)
1183
+static int bpf_object__check_endianness(struct bpf_object *obj)
5171184 {
518
- static unsigned int const endian = 1;
519
-
520
- switch (obj->efile.ehdr.e_ident[EI_DATA]) {
521
- case ELFDATA2LSB:
522
- /* We are big endian, BPF obj is little endian. */
523
- if (*(unsigned char const *)&endian != 1)
524
- goto mismatch;
525
- break;
526
-
527
- case ELFDATA2MSB:
528
- /* We are little endian, BPF obj is big endian. */
529
- if (*(unsigned char const *)&endian != 0)
530
- goto mismatch;
531
- break;
532
- default:
533
- return -LIBBPF_ERRNO__ENDIAN;
534
- }
535
-
536
- return 0;
537
-
538
-mismatch:
539
- pr_warning("Error: endianness mismatch.\n");
1185
+#if __BYTE_ORDER == __LITTLE_ENDIAN
1186
+ if (obj->efile.ehdr.e_ident[EI_DATA] == ELFDATA2LSB)
1187
+ return 0;
1188
+#elif __BYTE_ORDER == __BIG_ENDIAN
1189
+ if (obj->efile.ehdr.e_ident[EI_DATA] == ELFDATA2MSB)
1190
+ return 0;
1191
+#else
1192
+# error "Unrecognized __BYTE_ORDER__"
1193
+#endif
1194
+ pr_warn("elf: endianness mismatch in %s.\n", obj->path);
5401195 return -LIBBPF_ERRNO__ENDIAN;
5411196 }
5421197
5431198 static int
544
-bpf_object__init_license(struct bpf_object *obj,
545
- void *data, size_t size)
1199
+bpf_object__init_license(struct bpf_object *obj, void *data, size_t size)
5461200 {
547
- memcpy(obj->license, data,
548
- min(size, sizeof(obj->license) - 1));
1201
+ memcpy(obj->license, data, min(size, sizeof(obj->license) - 1));
5491202 pr_debug("license of %s is %s\n", obj->path, obj->license);
5501203 return 0;
5511204 }
5521205
5531206 static int
554
-bpf_object__init_kversion(struct bpf_object *obj,
555
- void *data, size_t size)
1207
+bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size)
5561208 {
557
- u32 kver;
1209
+ __u32 kver;
5581210
5591211 if (size != sizeof(kver)) {
560
- pr_warning("invalid kver section in %s\n", obj->path);
1212
+ pr_warn("invalid kver section in %s\n", obj->path);
5611213 return -LIBBPF_ERRNO__FORMAT;
5621214 }
5631215 memcpy(&kver, data, sizeof(kver));
5641216 obj->kern_version = kver;
565
- pr_debug("kernel version of %s is %x\n", obj->path,
566
- obj->kern_version);
1217
+ pr_debug("kernel version of %s is %x\n", obj->path, obj->kern_version);
5671218 return 0;
5681219 }
5691220
570
-static int compare_bpf_map(const void *_a, const void *_b)
1221
+static bool bpf_map_type__is_map_in_map(enum bpf_map_type type)
5711222 {
572
- const struct bpf_map *a = _a;
573
- const struct bpf_map *b = _b;
1223
+ if (type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
1224
+ type == BPF_MAP_TYPE_HASH_OF_MAPS)
1225
+ return true;
1226
+ return false;
1227
+}
5741228
575
- return a->offset - b->offset;
1229
+int bpf_object__section_size(const struct bpf_object *obj, const char *name,
1230
+ __u32 *size)
1231
+{
1232
+ int ret = -ENOENT;
1233
+
1234
+ *size = 0;
1235
+ if (!name) {
1236
+ return -EINVAL;
1237
+ } else if (!strcmp(name, DATA_SEC)) {
1238
+ if (obj->efile.data)
1239
+ *size = obj->efile.data->d_size;
1240
+ } else if (!strcmp(name, BSS_SEC)) {
1241
+ if (obj->efile.bss)
1242
+ *size = obj->efile.bss->d_size;
1243
+ } else if (!strcmp(name, RODATA_SEC)) {
1244
+ if (obj->efile.rodata)
1245
+ *size = obj->efile.rodata->d_size;
1246
+ } else if (!strcmp(name, STRUCT_OPS_SEC)) {
1247
+ if (obj->efile.st_ops_data)
1248
+ *size = obj->efile.st_ops_data->d_size;
1249
+ } else {
1250
+ Elf_Scn *scn = elf_sec_by_name(obj, name);
1251
+ Elf_Data *data = elf_sec_data(obj, scn);
1252
+
1253
+ if (data) {
1254
+ ret = 0; /* found it */
1255
+ *size = data->d_size;
1256
+ }
1257
+ }
1258
+
1259
+ return *size ? 0 : ret;
1260
+}
1261
+
1262
+int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
1263
+ __u32 *off)
1264
+{
1265
+ Elf_Data *symbols = obj->efile.symbols;
1266
+ const char *sname;
1267
+ size_t si;
1268
+
1269
+ if (!name || !off)
1270
+ return -EINVAL;
1271
+
1272
+ for (si = 0; si < symbols->d_size / sizeof(GElf_Sym); si++) {
1273
+ GElf_Sym sym;
1274
+
1275
+ if (!gelf_getsym(symbols, si, &sym))
1276
+ continue;
1277
+ if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL ||
1278
+ GELF_ST_TYPE(sym.st_info) != STT_OBJECT)
1279
+ continue;
1280
+
1281
+ sname = elf_sym_str(obj, sym.st_name);
1282
+ if (!sname) {
1283
+ pr_warn("failed to get sym name string for var %s\n",
1284
+ name);
1285
+ return -EIO;
1286
+ }
1287
+ if (strcmp(name, sname) == 0) {
1288
+ *off = sym.st_value;
1289
+ return 0;
1290
+ }
1291
+ }
1292
+
1293
+ return -ENOENT;
1294
+}
1295
+
1296
+static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
1297
+{
1298
+ struct bpf_map *new_maps;
1299
+ size_t new_cap;
1300
+ int i;
1301
+
1302
+ if (obj->nr_maps < obj->maps_cap)
1303
+ return &obj->maps[obj->nr_maps++];
1304
+
1305
+ new_cap = max((size_t)4, obj->maps_cap * 3 / 2);
1306
+ new_maps = libbpf_reallocarray(obj->maps, new_cap, sizeof(*obj->maps));
1307
+ if (!new_maps) {
1308
+ pr_warn("alloc maps for object failed\n");
1309
+ return ERR_PTR(-ENOMEM);
1310
+ }
1311
+
1312
+ obj->maps_cap = new_cap;
1313
+ obj->maps = new_maps;
1314
+
1315
+ /* zero out new maps */
1316
+ memset(obj->maps + obj->nr_maps, 0,
1317
+ (obj->maps_cap - obj->nr_maps) * sizeof(*obj->maps));
1318
+ /*
1319
+ * fill all fd with -1 so won't close incorrect fd (fd=0 is stdin)
1320
+ * when failure (zclose won't close negative fd)).
1321
+ */
1322
+ for (i = obj->nr_maps; i < obj->maps_cap; i++) {
1323
+ obj->maps[i].fd = -1;
1324
+ obj->maps[i].inner_map_fd = -1;
1325
+ }
1326
+
1327
+ return &obj->maps[obj->nr_maps++];
1328
+}
1329
+
1330
+static size_t bpf_map_mmap_sz(const struct bpf_map *map)
1331
+{
1332
+ long page_sz = sysconf(_SC_PAGE_SIZE);
1333
+ size_t map_sz;
1334
+
1335
+ map_sz = (size_t)roundup(map->def.value_size, 8) * map->def.max_entries;
1336
+ map_sz = roundup(map_sz, page_sz);
1337
+ return map_sz;
1338
+}
1339
+
1340
+static char *internal_map_name(struct bpf_object *obj,
1341
+ enum libbpf_map_type type)
1342
+{
1343
+ char map_name[BPF_OBJ_NAME_LEN], *p;
1344
+ const char *sfx = libbpf_type_to_btf_name[type];
1345
+ int sfx_len = max((size_t)7, strlen(sfx));
1346
+ int pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1,
1347
+ strlen(obj->name));
1348
+
1349
+ snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name,
1350
+ sfx_len, libbpf_type_to_btf_name[type]);
1351
+
1352
+ /* sanitise map name to characters allowed by kernel */
1353
+ for (p = map_name; *p && p < map_name + sizeof(map_name); p++)
1354
+ if (!isalnum(*p) && *p != '_' && *p != '.')
1355
+ *p = '_';
1356
+
1357
+ return strdup(map_name);
5761358 }
5771359
5781360 static int
579
-bpf_object__init_maps(struct bpf_object *obj)
1361
+bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
1362
+ int sec_idx, void *data, size_t data_sz)
5801363 {
581
- int i, map_idx, map_def_sz, nr_maps = 0;
582
- Elf_Scn *scn;
583
- Elf_Data *data;
1364
+ struct bpf_map_def *def;
1365
+ struct bpf_map *map;
1366
+ int err;
1367
+
1368
+ map = bpf_object__add_map(obj);
1369
+ if (IS_ERR(map))
1370
+ return PTR_ERR(map);
1371
+
1372
+ map->libbpf_type = type;
1373
+ map->sec_idx = sec_idx;
1374
+ map->sec_offset = 0;
1375
+ map->name = internal_map_name(obj, type);
1376
+ if (!map->name) {
1377
+ pr_warn("failed to alloc map name\n");
1378
+ return -ENOMEM;
1379
+ }
1380
+
1381
+ def = &map->def;
1382
+ def->type = BPF_MAP_TYPE_ARRAY;
1383
+ def->key_size = sizeof(int);
1384
+ def->value_size = data_sz;
1385
+ def->max_entries = 1;
1386
+ def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG
1387
+ ? BPF_F_RDONLY_PROG : 0;
1388
+ def->map_flags |= BPF_F_MMAPABLE;
1389
+
1390
+ pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n",
1391
+ map->name, map->sec_idx, map->sec_offset, def->map_flags);
1392
+
1393
+ map->mmaped = mmap(NULL, bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE,
1394
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
1395
+ if (map->mmaped == MAP_FAILED) {
1396
+ err = -errno;
1397
+ map->mmaped = NULL;
1398
+ pr_warn("failed to alloc map '%s' content buffer: %d\n",
1399
+ map->name, err);
1400
+ zfree(&map->name);
1401
+ return err;
1402
+ }
1403
+
1404
+ if (data)
1405
+ memcpy(map->mmaped, data, data_sz);
1406
+
1407
+ pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name);
1408
+ return 0;
1409
+}
1410
+
1411
+static int bpf_object__init_global_data_maps(struct bpf_object *obj)
1412
+{
1413
+ int err;
1414
+
1415
+ /*
1416
+ * Populate obj->maps with libbpf internal maps.
1417
+ */
1418
+ if (obj->efile.data_shndx >= 0) {
1419
+ err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA,
1420
+ obj->efile.data_shndx,
1421
+ obj->efile.data->d_buf,
1422
+ obj->efile.data->d_size);
1423
+ if (err)
1424
+ return err;
1425
+ }
1426
+ if (obj->efile.rodata_shndx >= 0) {
1427
+ err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA,
1428
+ obj->efile.rodata_shndx,
1429
+ obj->efile.rodata->d_buf,
1430
+ obj->efile.rodata->d_size);
1431
+ if (err)
1432
+ return err;
1433
+
1434
+ obj->rodata_map_idx = obj->nr_maps - 1;
1435
+ }
1436
+ if (obj->efile.bss_shndx >= 0) {
1437
+ err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS,
1438
+ obj->efile.bss_shndx,
1439
+ NULL,
1440
+ obj->efile.bss->d_size);
1441
+ if (err)
1442
+ return err;
1443
+ }
1444
+ return 0;
1445
+}
1446
+
1447
+
1448
+static struct extern_desc *find_extern_by_name(const struct bpf_object *obj,
1449
+ const void *name)
1450
+{
1451
+ int i;
1452
+
1453
+ for (i = 0; i < obj->nr_extern; i++) {
1454
+ if (strcmp(obj->externs[i].name, name) == 0)
1455
+ return &obj->externs[i];
1456
+ }
1457
+ return NULL;
1458
+}
1459
+
1460
+static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val,
1461
+ char value)
1462
+{
1463
+ switch (ext->kcfg.type) {
1464
+ case KCFG_BOOL:
1465
+ if (value == 'm') {
1466
+ pr_warn("extern (kcfg) %s=%c should be tristate or char\n",
1467
+ ext->name, value);
1468
+ return -EINVAL;
1469
+ }
1470
+ *(bool *)ext_val = value == 'y' ? true : false;
1471
+ break;
1472
+ case KCFG_TRISTATE:
1473
+ if (value == 'y')
1474
+ *(enum libbpf_tristate *)ext_val = TRI_YES;
1475
+ else if (value == 'm')
1476
+ *(enum libbpf_tristate *)ext_val = TRI_MODULE;
1477
+ else /* value == 'n' */
1478
+ *(enum libbpf_tristate *)ext_val = TRI_NO;
1479
+ break;
1480
+ case KCFG_CHAR:
1481
+ *(char *)ext_val = value;
1482
+ break;
1483
+ case KCFG_UNKNOWN:
1484
+ case KCFG_INT:
1485
+ case KCFG_CHAR_ARR:
1486
+ default:
1487
+ pr_warn("extern (kcfg) %s=%c should be bool, tristate, or char\n",
1488
+ ext->name, value);
1489
+ return -EINVAL;
1490
+ }
1491
+ ext->is_set = true;
1492
+ return 0;
1493
+}
1494
+
1495
+static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val,
1496
+ const char *value)
1497
+{
1498
+ size_t len;
1499
+
1500
+ if (ext->kcfg.type != KCFG_CHAR_ARR) {
1501
+ pr_warn("extern (kcfg) %s=%s should be char array\n", ext->name, value);
1502
+ return -EINVAL;
1503
+ }
1504
+
1505
+ len = strlen(value);
1506
+ if (value[len - 1] != '"') {
1507
+ pr_warn("extern (kcfg) '%s': invalid string config '%s'\n",
1508
+ ext->name, value);
1509
+ return -EINVAL;
1510
+ }
1511
+
1512
+ /* strip quotes */
1513
+ len -= 2;
1514
+ if (len >= ext->kcfg.sz) {
1515
+ pr_warn("extern (kcfg) '%s': long string config %s of (%zu bytes) truncated to %d bytes\n",
1516
+ ext->name, value, len, ext->kcfg.sz - 1);
1517
+ len = ext->kcfg.sz - 1;
1518
+ }
1519
+ memcpy(ext_val, value + 1, len);
1520
+ ext_val[len] = '\0';
1521
+ ext->is_set = true;
1522
+ return 0;
1523
+}
1524
+
1525
+static int parse_u64(const char *value, __u64 *res)
1526
+{
1527
+ char *value_end;
1528
+ int err;
1529
+
1530
+ errno = 0;
1531
+ *res = strtoull(value, &value_end, 0);
1532
+ if (errno) {
1533
+ err = -errno;
1534
+ pr_warn("failed to parse '%s' as integer: %d\n", value, err);
1535
+ return err;
1536
+ }
1537
+ if (*value_end) {
1538
+ pr_warn("failed to parse '%s' as integer completely\n", value);
1539
+ return -EINVAL;
1540
+ }
1541
+ return 0;
1542
+}
1543
+
1544
+static bool is_kcfg_value_in_range(const struct extern_desc *ext, __u64 v)
1545
+{
1546
+ int bit_sz = ext->kcfg.sz * 8;
1547
+
1548
+ if (ext->kcfg.sz == 8)
1549
+ return true;
1550
+
1551
+ /* Validate that value stored in u64 fits in integer of `ext->sz`
1552
+ * bytes size without any loss of information. If the target integer
1553
+ * is signed, we rely on the following limits of integer type of
1554
+ * Y bits and subsequent transformation:
1555
+ *
1556
+ * -2^(Y-1) <= X <= 2^(Y-1) - 1
1557
+ * 0 <= X + 2^(Y-1) <= 2^Y - 1
1558
+ * 0 <= X + 2^(Y-1) < 2^Y
1559
+ *
1560
+ * For unsigned target integer, check that all the (64 - Y) bits are
1561
+ * zero.
1562
+ */
1563
+ if (ext->kcfg.is_signed)
1564
+ return v + (1ULL << (bit_sz - 1)) < (1ULL << bit_sz);
1565
+ else
1566
+ return (v >> bit_sz) == 0;
1567
+}
1568
+
1569
+static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val,
1570
+ __u64 value)
1571
+{
1572
+ if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) {
1573
+ pr_warn("extern (kcfg) %s=%llu should be integer\n",
1574
+ ext->name, (unsigned long long)value);
1575
+ return -EINVAL;
1576
+ }
1577
+ if (!is_kcfg_value_in_range(ext, value)) {
1578
+ pr_warn("extern (kcfg) %s=%llu value doesn't fit in %d bytes\n",
1579
+ ext->name, (unsigned long long)value, ext->kcfg.sz);
1580
+ return -ERANGE;
1581
+ }
1582
+ switch (ext->kcfg.sz) {
1583
+ case 1: *(__u8 *)ext_val = value; break;
1584
+ case 2: *(__u16 *)ext_val = value; break;
1585
+ case 4: *(__u32 *)ext_val = value; break;
1586
+ case 8: *(__u64 *)ext_val = value; break;
1587
+ default:
1588
+ return -EINVAL;
1589
+ }
1590
+ ext->is_set = true;
1591
+ return 0;
1592
+}
1593
+
1594
+static int bpf_object__process_kconfig_line(struct bpf_object *obj,
1595
+ char *buf, void *data)
1596
+{
1597
+ struct extern_desc *ext;
1598
+ char *sep, *value;
1599
+ int len, err = 0;
1600
+ void *ext_val;
1601
+ __u64 num;
1602
+
1603
+ if (strncmp(buf, "CONFIG_", 7))
1604
+ return 0;
1605
+
1606
+ sep = strchr(buf, '=');
1607
+ if (!sep) {
1608
+ pr_warn("failed to parse '%s': no separator\n", buf);
1609
+ return -EINVAL;
1610
+ }
1611
+
1612
+ /* Trim ending '\n' */
1613
+ len = strlen(buf);
1614
+ if (buf[len - 1] == '\n')
1615
+ buf[len - 1] = '\0';
1616
+ /* Split on '=' and ensure that a value is present. */
1617
+ *sep = '\0';
1618
+ if (!sep[1]) {
1619
+ *sep = '=';
1620
+ pr_warn("failed to parse '%s': no value\n", buf);
1621
+ return -EINVAL;
1622
+ }
1623
+
1624
+ ext = find_extern_by_name(obj, buf);
1625
+ if (!ext || ext->is_set)
1626
+ return 0;
1627
+
1628
+ ext_val = data + ext->kcfg.data_off;
1629
+ value = sep + 1;
1630
+
1631
+ switch (*value) {
1632
+ case 'y': case 'n': case 'm':
1633
+ err = set_kcfg_value_tri(ext, ext_val, *value);
1634
+ break;
1635
+ case '"':
1636
+ err = set_kcfg_value_str(ext, ext_val, value);
1637
+ break;
1638
+ default:
1639
+ /* assume integer */
1640
+ err = parse_u64(value, &num);
1641
+ if (err) {
1642
+ pr_warn("extern (kcfg) %s=%s should be integer\n",
1643
+ ext->name, value);
1644
+ return err;
1645
+ }
1646
+ err = set_kcfg_value_num(ext, ext_val, num);
1647
+ break;
1648
+ }
1649
+ if (err)
1650
+ return err;
1651
+ pr_debug("extern (kcfg) %s=%s\n", ext->name, value);
1652
+ return 0;
1653
+}
1654
+
1655
+static int bpf_object__read_kconfig_file(struct bpf_object *obj, void *data)
1656
+{
1657
+ char buf[PATH_MAX];
1658
+ struct utsname uts;
1659
+ int len, err = 0;
1660
+ gzFile file;
1661
+
1662
+ uname(&uts);
1663
+ len = snprintf(buf, PATH_MAX, "/boot/config-%s", uts.release);
1664
+ if (len < 0)
1665
+ return -EINVAL;
1666
+ else if (len >= PATH_MAX)
1667
+ return -ENAMETOOLONG;
1668
+
1669
+ /* gzopen also accepts uncompressed files. */
1670
+ file = gzopen(buf, "r");
1671
+ if (!file)
1672
+ file = gzopen("/proc/config.gz", "r");
1673
+
1674
+ if (!file) {
1675
+ pr_warn("failed to open system Kconfig\n");
1676
+ return -ENOENT;
1677
+ }
1678
+
1679
+ while (gzgets(file, buf, sizeof(buf))) {
1680
+ err = bpf_object__process_kconfig_line(obj, buf, data);
1681
+ if (err) {
1682
+ pr_warn("error parsing system Kconfig line '%s': %d\n",
1683
+ buf, err);
1684
+ goto out;
1685
+ }
1686
+ }
1687
+
1688
+out:
1689
+ gzclose(file);
1690
+ return err;
1691
+}
1692
+
1693
+static int bpf_object__read_kconfig_mem(struct bpf_object *obj,
1694
+ const char *config, void *data)
1695
+{
1696
+ char buf[PATH_MAX];
1697
+ int err = 0;
1698
+ FILE *file;
1699
+
1700
+ file = fmemopen((void *)config, strlen(config), "r");
1701
+ if (!file) {
1702
+ err = -errno;
1703
+ pr_warn("failed to open in-memory Kconfig: %d\n", err);
1704
+ return err;
1705
+ }
1706
+
1707
+ while (fgets(buf, sizeof(buf), file)) {
1708
+ err = bpf_object__process_kconfig_line(obj, buf, data);
1709
+ if (err) {
1710
+ pr_warn("error parsing in-memory Kconfig line '%s': %d\n",
1711
+ buf, err);
1712
+ break;
1713
+ }
1714
+ }
1715
+
1716
+ fclose(file);
1717
+ return err;
1718
+}
1719
+
1720
+static int bpf_object__init_kconfig_map(struct bpf_object *obj)
1721
+{
1722
+ struct extern_desc *last_ext = NULL, *ext;
1723
+ size_t map_sz;
1724
+ int i, err;
1725
+
1726
+ for (i = 0; i < obj->nr_extern; i++) {
1727
+ ext = &obj->externs[i];
1728
+ if (ext->type == EXT_KCFG)
1729
+ last_ext = ext;
1730
+ }
1731
+
1732
+ if (!last_ext)
1733
+ return 0;
1734
+
1735
+ map_sz = last_ext->kcfg.data_off + last_ext->kcfg.sz;
1736
+ err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG,
1737
+ obj->efile.symbols_shndx,
1738
+ NULL, map_sz);
1739
+ if (err)
1740
+ return err;
1741
+
1742
+ obj->kconfig_map_idx = obj->nr_maps - 1;
1743
+
1744
+ return 0;
1745
+}
1746
+
1747
+static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
1748
+{
5841749 Elf_Data *symbols = obj->efile.symbols;
1750
+ int i, map_def_sz = 0, nr_maps = 0, nr_syms;
1751
+ Elf_Data *data = NULL;
1752
+ Elf_Scn *scn;
5851753
5861754 if (obj->efile.maps_shndx < 0)
587
- return -EINVAL;
1755
+ return 0;
1756
+
5881757 if (!symbols)
5891758 return -EINVAL;
5901759
591
- scn = elf_getscn(obj->efile.elf, obj->efile.maps_shndx);
592
- if (scn)
593
- data = elf_getdata(scn, NULL);
1760
+
1761
+ scn = elf_sec_by_idx(obj, obj->efile.maps_shndx);
1762
+ data = elf_sec_data(obj, scn);
5941763 if (!scn || !data) {
595
- pr_warning("failed to get Elf_Data from map section %d\n",
596
- obj->efile.maps_shndx);
1764
+ pr_warn("elf: failed to get legacy map definitions for %s\n",
1765
+ obj->path);
5971766 return -EINVAL;
5981767 }
5991768
....@@ -604,7 +1773,8 @@
6041773 *
6051774 * TODO: Detect array of map and report error.
6061775 */
607
- for (i = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) {
1776
+ nr_syms = symbols->d_size / sizeof(GElf_Sym);
1777
+ for (i = 0; i < nr_syms; i++) {
6081778 GElf_Sym sym;
6091779
6101780 if (!gelf_getsym(symbols, i, &sym))
....@@ -613,68 +1783,57 @@
6131783 continue;
6141784 nr_maps++;
6151785 }
616
-
617
- /* Alloc obj->maps and fill nr_maps. */
618
- pr_debug("maps in %s: %d maps in %zd bytes\n", obj->path,
619
- nr_maps, data->d_size);
620
-
621
- if (!nr_maps)
622
- return 0;
623
-
6241786 /* Assume equally sized map definitions */
625
- map_def_sz = data->d_size / nr_maps;
626
- if (!data->d_size || (data->d_size % nr_maps) != 0) {
627
- pr_warning("unable to determine map definition size "
628
- "section %s, %d maps in %zd bytes\n",
629
- obj->path, nr_maps, data->d_size);
1787
+ pr_debug("elf: found %d legacy map definitions (%zd bytes) in %s\n",
1788
+ nr_maps, data->d_size, obj->path);
1789
+
1790
+ if (!data->d_size || nr_maps == 0 || (data->d_size % nr_maps) != 0) {
1791
+ pr_warn("elf: unable to determine legacy map definition size in %s\n",
1792
+ obj->path);
6301793 return -EINVAL;
6311794 }
1795
+ map_def_sz = data->d_size / nr_maps;
6321796
633
- obj->maps = calloc(nr_maps, sizeof(obj->maps[0]));
634
- if (!obj->maps) {
635
- pr_warning("alloc maps for object failed\n");
636
- return -ENOMEM;
637
- }
638
- obj->nr_maps = nr_maps;
639
-
640
- /*
641
- * fill all fd with -1 so won't close incorrect
642
- * fd (fd=0 is stdin) when failure (zclose won't close
643
- * negative fd)).
644
- */
645
- for (i = 0; i < nr_maps; i++)
646
- obj->maps[i].fd = -1;
647
-
648
- /*
649
- * Fill obj->maps using data in "maps" section.
650
- */
651
- for (i = 0, map_idx = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) {
1797
+ /* Fill obj->maps using data in "maps" section. */
1798
+ for (i = 0; i < nr_syms; i++) {
6521799 GElf_Sym sym;
6531800 const char *map_name;
6541801 struct bpf_map_def *def;
1802
+ struct bpf_map *map;
6551803
6561804 if (!gelf_getsym(symbols, i, &sym))
6571805 continue;
6581806 if (sym.st_shndx != obj->efile.maps_shndx)
6591807 continue;
6601808
661
- map_name = elf_strptr(obj->efile.elf,
662
- obj->efile.strtabidx,
663
- sym.st_name);
664
- obj->maps[map_idx].offset = sym.st_value;
1809
+ map = bpf_object__add_map(obj);
1810
+ if (IS_ERR(map))
1811
+ return PTR_ERR(map);
1812
+
1813
+ map_name = elf_sym_str(obj, sym.st_name);
1814
+ if (!map_name) {
1815
+ pr_warn("failed to get map #%d name sym string for obj %s\n",
1816
+ i, obj->path);
1817
+ return -LIBBPF_ERRNO__FORMAT;
1818
+ }
1819
+
1820
+ map->libbpf_type = LIBBPF_MAP_UNSPEC;
1821
+ map->sec_idx = sym.st_shndx;
1822
+ map->sec_offset = sym.st_value;
1823
+ pr_debug("map '%s' (legacy): at sec_idx %d, offset %zu.\n",
1824
+ map_name, map->sec_idx, map->sec_offset);
6651825 if (sym.st_value + map_def_sz > data->d_size) {
666
- pr_warning("corrupted maps section in %s: last map \"%s\" too small\n",
667
- obj->path, map_name);
1826
+ pr_warn("corrupted maps section in %s: last map \"%s\" too small\n",
1827
+ obj->path, map_name);
6681828 return -EINVAL;
6691829 }
6701830
671
- obj->maps[map_idx].name = strdup(map_name);
672
- if (!obj->maps[map_idx].name) {
673
- pr_warning("failed to alloc map name\n");
1831
+ map->name = strdup(map_name);
1832
+ if (!map->name) {
1833
+ pr_warn("failed to alloc map name\n");
6741834 return -ENOMEM;
6751835 }
676
- pr_debug("map %d is \"%s\"\n", map_idx,
677
- obj->maps[map_idx].name);
1836
+ pr_debug("map %d is \"%s\"\n", i, map->name);
6781837 def = (struct bpf_map_def *)(data->d_buf + sym.st_value);
6791838 /*
6801839 * If the definition of the map in the object file fits in
....@@ -683,7 +1842,7 @@
6831842 * calloc above.
6841843 */
6851844 if (map_def_sz <= sizeof(struct bpf_map_def)) {
686
- memcpy(&obj->maps[map_idx].def, def, map_def_sz);
1845
+ memcpy(&map->def, def, map_def_sz);
6871846 } else {
6881847 /*
6891848 * Here the map structure being read is bigger than what
....@@ -692,367 +1851,1800 @@
6921851 * incompatible.
6931852 */
6941853 char *b;
1854
+
6951855 for (b = ((char *)def) + sizeof(struct bpf_map_def);
6961856 b < ((char *)def) + map_def_sz; b++) {
6971857 if (*b != 0) {
698
- pr_warning("maps section in %s: \"%s\" "
699
- "has unrecognized, non-zero "
700
- "options\n",
701
- obj->path, map_name);
702
- return -EINVAL;
1858
+ pr_warn("maps section in %s: \"%s\" has unrecognized, non-zero options\n",
1859
+ obj->path, map_name);
1860
+ if (strict)
1861
+ return -EINVAL;
7031862 }
7041863 }
705
- memcpy(&obj->maps[map_idx].def, def,
706
- sizeof(struct bpf_map_def));
1864
+ memcpy(&map->def, def, sizeof(struct bpf_map_def));
7071865 }
708
- map_idx++;
1866
+ }
1867
+ return 0;
1868
+}
1869
+
1870
+static const struct btf_type *
1871
+skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id)
1872
+{
1873
+ const struct btf_type *t = btf__type_by_id(btf, id);
1874
+
1875
+ if (res_id)
1876
+ *res_id = id;
1877
+
1878
+ while (btf_is_mod(t) || btf_is_typedef(t)) {
1879
+ if (res_id)
1880
+ *res_id = t->type;
1881
+ t = btf__type_by_id(btf, t->type);
7091882 }
7101883
711
- qsort(obj->maps, obj->nr_maps, sizeof(obj->maps[0]), compare_bpf_map);
1884
+ return t;
1885
+}
1886
+
1887
+static const struct btf_type *
1888
+resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id)
1889
+{
1890
+ const struct btf_type *t;
1891
+
1892
+ t = skip_mods_and_typedefs(btf, id, NULL);
1893
+ if (!btf_is_ptr(t))
1894
+ return NULL;
1895
+
1896
+ t = skip_mods_and_typedefs(btf, t->type, res_id);
1897
+
1898
+ return btf_is_func_proto(t) ? t : NULL;
1899
+}
1900
+
1901
+static const char *btf_kind_str(const struct btf_type *t)
1902
+{
1903
+ switch (btf_kind(t)) {
1904
+ case BTF_KIND_UNKN: return "void";
1905
+ case BTF_KIND_INT: return "int";
1906
+ case BTF_KIND_PTR: return "ptr";
1907
+ case BTF_KIND_ARRAY: return "array";
1908
+ case BTF_KIND_STRUCT: return "struct";
1909
+ case BTF_KIND_UNION: return "union";
1910
+ case BTF_KIND_ENUM: return "enum";
1911
+ case BTF_KIND_FWD: return "fwd";
1912
+ case BTF_KIND_TYPEDEF: return "typedef";
1913
+ case BTF_KIND_VOLATILE: return "volatile";
1914
+ case BTF_KIND_CONST: return "const";
1915
+ case BTF_KIND_RESTRICT: return "restrict";
1916
+ case BTF_KIND_FUNC: return "func";
1917
+ case BTF_KIND_FUNC_PROTO: return "func_proto";
1918
+ case BTF_KIND_VAR: return "var";
1919
+ case BTF_KIND_DATASEC: return "datasec";
1920
+ default: return "unknown";
1921
+ }
1922
+}
1923
+
1924
+/*
1925
+ * Fetch integer attribute of BTF map definition. Such attributes are
1926
+ * represented using a pointer to an array, in which dimensionality of array
1927
+ * encodes specified integer value. E.g., int (*type)[BPF_MAP_TYPE_ARRAY];
1928
+ * encodes `type => BPF_MAP_TYPE_ARRAY` key/value pair completely using BTF
1929
+ * type definition, while using only sizeof(void *) space in ELF data section.
1930
+ */
1931
+static bool get_map_field_int(const char *map_name, const struct btf *btf,
1932
+ const struct btf_member *m, __u32 *res)
1933
+{
1934
+ const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL);
1935
+ const char *name = btf__name_by_offset(btf, m->name_off);
1936
+ const struct btf_array *arr_info;
1937
+ const struct btf_type *arr_t;
1938
+
1939
+ if (!btf_is_ptr(t)) {
1940
+ pr_warn("map '%s': attr '%s': expected PTR, got %s.\n",
1941
+ map_name, name, btf_kind_str(t));
1942
+ return false;
1943
+ }
1944
+
1945
+ arr_t = btf__type_by_id(btf, t->type);
1946
+ if (!arr_t) {
1947
+ pr_warn("map '%s': attr '%s': type [%u] not found.\n",
1948
+ map_name, name, t->type);
1949
+ return false;
1950
+ }
1951
+ if (!btf_is_array(arr_t)) {
1952
+ pr_warn("map '%s': attr '%s': expected ARRAY, got %s.\n",
1953
+ map_name, name, btf_kind_str(arr_t));
1954
+ return false;
1955
+ }
1956
+ arr_info = btf_array(arr_t);
1957
+ *res = arr_info->nelems;
1958
+ return true;
1959
+}
1960
+
1961
+static int build_map_pin_path(struct bpf_map *map, const char *path)
1962
+{
1963
+ char buf[PATH_MAX];
1964
+ int len;
1965
+
1966
+ if (!path)
1967
+ path = "/sys/fs/bpf";
1968
+
1969
+ len = snprintf(buf, PATH_MAX, "%s/%s", path, bpf_map__name(map));
1970
+ if (len < 0)
1971
+ return -EINVAL;
1972
+ else if (len >= PATH_MAX)
1973
+ return -ENAMETOOLONG;
1974
+
1975
+ return bpf_map__set_pin_path(map, buf);
1976
+}
1977
+
1978
+
1979
+static int parse_btf_map_def(struct bpf_object *obj,
1980
+ struct bpf_map *map,
1981
+ const struct btf_type *def,
1982
+ bool strict, bool is_inner,
1983
+ const char *pin_root_path)
1984
+{
1985
+ const struct btf_type *t;
1986
+ const struct btf_member *m;
1987
+ int vlen, i;
1988
+
1989
+ vlen = btf_vlen(def);
1990
+ m = btf_members(def);
1991
+ for (i = 0; i < vlen; i++, m++) {
1992
+ const char *name = btf__name_by_offset(obj->btf, m->name_off);
1993
+
1994
+ if (!name) {
1995
+ pr_warn("map '%s': invalid field #%d.\n", map->name, i);
1996
+ return -EINVAL;
1997
+ }
1998
+ if (strcmp(name, "type") == 0) {
1999
+ if (!get_map_field_int(map->name, obj->btf, m,
2000
+ &map->def.type))
2001
+ return -EINVAL;
2002
+ pr_debug("map '%s': found type = %u.\n",
2003
+ map->name, map->def.type);
2004
+ } else if (strcmp(name, "max_entries") == 0) {
2005
+ if (!get_map_field_int(map->name, obj->btf, m,
2006
+ &map->def.max_entries))
2007
+ return -EINVAL;
2008
+ pr_debug("map '%s': found max_entries = %u.\n",
2009
+ map->name, map->def.max_entries);
2010
+ } else if (strcmp(name, "map_flags") == 0) {
2011
+ if (!get_map_field_int(map->name, obj->btf, m,
2012
+ &map->def.map_flags))
2013
+ return -EINVAL;
2014
+ pr_debug("map '%s': found map_flags = %u.\n",
2015
+ map->name, map->def.map_flags);
2016
+ } else if (strcmp(name, "numa_node") == 0) {
2017
+ if (!get_map_field_int(map->name, obj->btf, m, &map->numa_node))
2018
+ return -EINVAL;
2019
+ pr_debug("map '%s': found numa_node = %u.\n", map->name, map->numa_node);
2020
+ } else if (strcmp(name, "key_size") == 0) {
2021
+ __u32 sz;
2022
+
2023
+ if (!get_map_field_int(map->name, obj->btf, m, &sz))
2024
+ return -EINVAL;
2025
+ pr_debug("map '%s': found key_size = %u.\n",
2026
+ map->name, sz);
2027
+ if (map->def.key_size && map->def.key_size != sz) {
2028
+ pr_warn("map '%s': conflicting key size %u != %u.\n",
2029
+ map->name, map->def.key_size, sz);
2030
+ return -EINVAL;
2031
+ }
2032
+ map->def.key_size = sz;
2033
+ } else if (strcmp(name, "key") == 0) {
2034
+ __s64 sz;
2035
+
2036
+ t = btf__type_by_id(obj->btf, m->type);
2037
+ if (!t) {
2038
+ pr_warn("map '%s': key type [%d] not found.\n",
2039
+ map->name, m->type);
2040
+ return -EINVAL;
2041
+ }
2042
+ if (!btf_is_ptr(t)) {
2043
+ pr_warn("map '%s': key spec is not PTR: %s.\n",
2044
+ map->name, btf_kind_str(t));
2045
+ return -EINVAL;
2046
+ }
2047
+ sz = btf__resolve_size(obj->btf, t->type);
2048
+ if (sz < 0) {
2049
+ pr_warn("map '%s': can't determine key size for type [%u]: %zd.\n",
2050
+ map->name, t->type, (ssize_t)sz);
2051
+ return sz;
2052
+ }
2053
+ pr_debug("map '%s': found key [%u], sz = %zd.\n",
2054
+ map->name, t->type, (ssize_t)sz);
2055
+ if (map->def.key_size && map->def.key_size != sz) {
2056
+ pr_warn("map '%s': conflicting key size %u != %zd.\n",
2057
+ map->name, map->def.key_size, (ssize_t)sz);
2058
+ return -EINVAL;
2059
+ }
2060
+ map->def.key_size = sz;
2061
+ map->btf_key_type_id = t->type;
2062
+ } else if (strcmp(name, "value_size") == 0) {
2063
+ __u32 sz;
2064
+
2065
+ if (!get_map_field_int(map->name, obj->btf, m, &sz))
2066
+ return -EINVAL;
2067
+ pr_debug("map '%s': found value_size = %u.\n",
2068
+ map->name, sz);
2069
+ if (map->def.value_size && map->def.value_size != sz) {
2070
+ pr_warn("map '%s': conflicting value size %u != %u.\n",
2071
+ map->name, map->def.value_size, sz);
2072
+ return -EINVAL;
2073
+ }
2074
+ map->def.value_size = sz;
2075
+ } else if (strcmp(name, "value") == 0) {
2076
+ __s64 sz;
2077
+
2078
+ t = btf__type_by_id(obj->btf, m->type);
2079
+ if (!t) {
2080
+ pr_warn("map '%s': value type [%d] not found.\n",
2081
+ map->name, m->type);
2082
+ return -EINVAL;
2083
+ }
2084
+ if (!btf_is_ptr(t)) {
2085
+ pr_warn("map '%s': value spec is not PTR: %s.\n",
2086
+ map->name, btf_kind_str(t));
2087
+ return -EINVAL;
2088
+ }
2089
+ sz = btf__resolve_size(obj->btf, t->type);
2090
+ if (sz < 0) {
2091
+ pr_warn("map '%s': can't determine value size for type [%u]: %zd.\n",
2092
+ map->name, t->type, (ssize_t)sz);
2093
+ return sz;
2094
+ }
2095
+ pr_debug("map '%s': found value [%u], sz = %zd.\n",
2096
+ map->name, t->type, (ssize_t)sz);
2097
+ if (map->def.value_size && map->def.value_size != sz) {
2098
+ pr_warn("map '%s': conflicting value size %u != %zd.\n",
2099
+ map->name, map->def.value_size, (ssize_t)sz);
2100
+ return -EINVAL;
2101
+ }
2102
+ map->def.value_size = sz;
2103
+ map->btf_value_type_id = t->type;
2104
+ }
2105
+ else if (strcmp(name, "values") == 0) {
2106
+ int err;
2107
+
2108
+ if (is_inner) {
2109
+ pr_warn("map '%s': multi-level inner maps not supported.\n",
2110
+ map->name);
2111
+ return -ENOTSUP;
2112
+ }
2113
+ if (i != vlen - 1) {
2114
+ pr_warn("map '%s': '%s' member should be last.\n",
2115
+ map->name, name);
2116
+ return -EINVAL;
2117
+ }
2118
+ if (!bpf_map_type__is_map_in_map(map->def.type)) {
2119
+ pr_warn("map '%s': should be map-in-map.\n",
2120
+ map->name);
2121
+ return -ENOTSUP;
2122
+ }
2123
+ if (map->def.value_size && map->def.value_size != 4) {
2124
+ pr_warn("map '%s': conflicting value size %u != 4.\n",
2125
+ map->name, map->def.value_size);
2126
+ return -EINVAL;
2127
+ }
2128
+ map->def.value_size = 4;
2129
+ t = btf__type_by_id(obj->btf, m->type);
2130
+ if (!t) {
2131
+ pr_warn("map '%s': map-in-map inner type [%d] not found.\n",
2132
+ map->name, m->type);
2133
+ return -EINVAL;
2134
+ }
2135
+ if (!btf_is_array(t) || btf_array(t)->nelems) {
2136
+ pr_warn("map '%s': map-in-map inner spec is not a zero-sized array.\n",
2137
+ map->name);
2138
+ return -EINVAL;
2139
+ }
2140
+ t = skip_mods_and_typedefs(obj->btf, btf_array(t)->type,
2141
+ NULL);
2142
+ if (!btf_is_ptr(t)) {
2143
+ pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
2144
+ map->name, btf_kind_str(t));
2145
+ return -EINVAL;
2146
+ }
2147
+ t = skip_mods_and_typedefs(obj->btf, t->type, NULL);
2148
+ if (!btf_is_struct(t)) {
2149
+ pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
2150
+ map->name, btf_kind_str(t));
2151
+ return -EINVAL;
2152
+ }
2153
+
2154
+ map->inner_map = calloc(1, sizeof(*map->inner_map));
2155
+ if (!map->inner_map)
2156
+ return -ENOMEM;
2157
+ map->inner_map->sec_idx = obj->efile.btf_maps_shndx;
2158
+ map->inner_map->name = malloc(strlen(map->name) +
2159
+ sizeof(".inner") + 1);
2160
+ if (!map->inner_map->name)
2161
+ return -ENOMEM;
2162
+ sprintf(map->inner_map->name, "%s.inner", map->name);
2163
+
2164
+ err = parse_btf_map_def(obj, map->inner_map, t, strict,
2165
+ true /* is_inner */, NULL);
2166
+ if (err)
2167
+ return err;
2168
+ } else if (strcmp(name, "pinning") == 0) {
2169
+ __u32 val;
2170
+ int err;
2171
+
2172
+ if (is_inner) {
2173
+ pr_debug("map '%s': inner def can't be pinned.\n",
2174
+ map->name);
2175
+ return -EINVAL;
2176
+ }
2177
+ if (!get_map_field_int(map->name, obj->btf, m, &val))
2178
+ return -EINVAL;
2179
+ pr_debug("map '%s': found pinning = %u.\n",
2180
+ map->name, val);
2181
+
2182
+ if (val != LIBBPF_PIN_NONE &&
2183
+ val != LIBBPF_PIN_BY_NAME) {
2184
+ pr_warn("map '%s': invalid pinning value %u.\n",
2185
+ map->name, val);
2186
+ return -EINVAL;
2187
+ }
2188
+ if (val == LIBBPF_PIN_BY_NAME) {
2189
+ err = build_map_pin_path(map, pin_root_path);
2190
+ if (err) {
2191
+ pr_warn("map '%s': couldn't build pin path.\n",
2192
+ map->name);
2193
+ return err;
2194
+ }
2195
+ }
2196
+ } else {
2197
+ if (strict) {
2198
+ pr_warn("map '%s': unknown field '%s'.\n",
2199
+ map->name, name);
2200
+ return -ENOTSUP;
2201
+ }
2202
+ pr_debug("map '%s': ignoring unknown field '%s'.\n",
2203
+ map->name, name);
2204
+ }
2205
+ }
2206
+
2207
+ if (map->def.type == BPF_MAP_TYPE_UNSPEC) {
2208
+ pr_warn("map '%s': map type isn't specified.\n", map->name);
2209
+ return -EINVAL;
2210
+ }
2211
+
2212
+ return 0;
2213
+}
2214
+
2215
+static int bpf_object__init_user_btf_map(struct bpf_object *obj,
2216
+ const struct btf_type *sec,
2217
+ int var_idx, int sec_idx,
2218
+ const Elf_Data *data, bool strict,
2219
+ const char *pin_root_path)
2220
+{
2221
+ const struct btf_type *var, *def;
2222
+ const struct btf_var_secinfo *vi;
2223
+ const struct btf_var *var_extra;
2224
+ const char *map_name;
2225
+ struct bpf_map *map;
2226
+
2227
+ vi = btf_var_secinfos(sec) + var_idx;
2228
+ var = btf__type_by_id(obj->btf, vi->type);
2229
+ var_extra = btf_var(var);
2230
+ map_name = btf__name_by_offset(obj->btf, var->name_off);
2231
+
2232
+ if (map_name == NULL || map_name[0] == '\0') {
2233
+ pr_warn("map #%d: empty name.\n", var_idx);
2234
+ return -EINVAL;
2235
+ }
2236
+ if ((__u64)vi->offset + vi->size > data->d_size) {
2237
+ pr_warn("map '%s' BTF data is corrupted.\n", map_name);
2238
+ return -EINVAL;
2239
+ }
2240
+ if (!btf_is_var(var)) {
2241
+ pr_warn("map '%s': unexpected var kind %s.\n",
2242
+ map_name, btf_kind_str(var));
2243
+ return -EINVAL;
2244
+ }
2245
+ if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED &&
2246
+ var_extra->linkage != BTF_VAR_STATIC) {
2247
+ pr_warn("map '%s': unsupported var linkage %u.\n",
2248
+ map_name, var_extra->linkage);
2249
+ return -EOPNOTSUPP;
2250
+ }
2251
+
2252
+ def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
2253
+ if (!btf_is_struct(def)) {
2254
+ pr_warn("map '%s': unexpected def kind %s.\n",
2255
+ map_name, btf_kind_str(var));
2256
+ return -EINVAL;
2257
+ }
2258
+ if (def->size > vi->size) {
2259
+ pr_warn("map '%s': invalid def size.\n", map_name);
2260
+ return -EINVAL;
2261
+ }
2262
+
2263
+ map = bpf_object__add_map(obj);
2264
+ if (IS_ERR(map))
2265
+ return PTR_ERR(map);
2266
+ map->name = strdup(map_name);
2267
+ if (!map->name) {
2268
+ pr_warn("map '%s': failed to alloc map name.\n", map_name);
2269
+ return -ENOMEM;
2270
+ }
2271
+ map->libbpf_type = LIBBPF_MAP_UNSPEC;
2272
+ map->def.type = BPF_MAP_TYPE_UNSPEC;
2273
+ map->sec_idx = sec_idx;
2274
+ map->sec_offset = vi->offset;
2275
+ map->btf_var_idx = var_idx;
2276
+ pr_debug("map '%s': at sec_idx %d, offset %zu.\n",
2277
+ map_name, map->sec_idx, map->sec_offset);
2278
+
2279
+ return parse_btf_map_def(obj, map, def, strict, false, pin_root_path);
2280
+}
2281
+
2282
+static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
2283
+ const char *pin_root_path)
2284
+{
2285
+ const struct btf_type *sec = NULL;
2286
+ int nr_types, i, vlen, err;
2287
+ const struct btf_type *t;
2288
+ const char *name;
2289
+ Elf_Data *data;
2290
+ Elf_Scn *scn;
2291
+
2292
+ if (obj->efile.btf_maps_shndx < 0)
2293
+ return 0;
2294
+
2295
+ scn = elf_sec_by_idx(obj, obj->efile.btf_maps_shndx);
2296
+ data = elf_sec_data(obj, scn);
2297
+ if (!scn || !data) {
2298
+ pr_warn("elf: failed to get %s map definitions for %s\n",
2299
+ MAPS_ELF_SEC, obj->path);
2300
+ return -EINVAL;
2301
+ }
2302
+
2303
+ nr_types = btf__get_nr_types(obj->btf);
2304
+ for (i = 1; i <= nr_types; i++) {
2305
+ t = btf__type_by_id(obj->btf, i);
2306
+ if (!btf_is_datasec(t))
2307
+ continue;
2308
+ name = btf__name_by_offset(obj->btf, t->name_off);
2309
+ if (strcmp(name, MAPS_ELF_SEC) == 0) {
2310
+ sec = t;
2311
+ obj->efile.btf_maps_sec_btf_id = i;
2312
+ break;
2313
+ }
2314
+ }
2315
+
2316
+ if (!sec) {
2317
+ pr_warn("DATASEC '%s' not found.\n", MAPS_ELF_SEC);
2318
+ return -ENOENT;
2319
+ }
2320
+
2321
+ vlen = btf_vlen(sec);
2322
+ for (i = 0; i < vlen; i++) {
2323
+ err = bpf_object__init_user_btf_map(obj, sec, i,
2324
+ obj->efile.btf_maps_shndx,
2325
+ data, strict,
2326
+ pin_root_path);
2327
+ if (err)
2328
+ return err;
2329
+ }
2330
+
2331
+ return 0;
2332
+}
2333
+
2334
+static int bpf_object__init_maps(struct bpf_object *obj,
2335
+ const struct bpf_object_open_opts *opts)
2336
+{
2337
+ const char *pin_root_path;
2338
+ bool strict;
2339
+ int err;
2340
+
2341
+ strict = !OPTS_GET(opts, relaxed_maps, false);
2342
+ pin_root_path = OPTS_GET(opts, pin_root_path, NULL);
2343
+
2344
+ err = bpf_object__init_user_maps(obj, strict);
2345
+ err = err ?: bpf_object__init_user_btf_maps(obj, strict, pin_root_path);
2346
+ err = err ?: bpf_object__init_global_data_maps(obj);
2347
+ err = err ?: bpf_object__init_kconfig_map(obj);
2348
+ err = err ?: bpf_object__init_struct_ops_maps(obj);
2349
+ if (err)
2350
+ return err;
2351
+
7122352 return 0;
7132353 }
7142354
7152355 static bool section_have_execinstr(struct bpf_object *obj, int idx)
7162356 {
717
- Elf_Scn *scn;
7182357 GElf_Shdr sh;
7192358
720
- scn = elf_getscn(obj->efile.elf, idx);
721
- if (!scn)
2359
+ if (elf_sec_hdr(obj, elf_sec_by_idx(obj, idx), &sh))
7222360 return false;
7232361
724
- if (gelf_getshdr(scn, &sh) != &sh)
725
- return false;
2362
+ return sh.sh_flags & SHF_EXECINSTR;
2363
+}
7262364
727
- if (sh.sh_flags & SHF_EXECINSTR)
2365
+static bool btf_needs_sanitization(struct bpf_object *obj)
2366
+{
2367
+ bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC);
2368
+ bool has_datasec = kernel_supports(FEAT_BTF_DATASEC);
2369
+ bool has_func = kernel_supports(FEAT_BTF_FUNC);
2370
+
2371
+ return !has_func || !has_datasec || !has_func_global;
2372
+}
2373
+
2374
+static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
2375
+{
2376
+ bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC);
2377
+ bool has_datasec = kernel_supports(FEAT_BTF_DATASEC);
2378
+ bool has_func = kernel_supports(FEAT_BTF_FUNC);
2379
+ struct btf_type *t;
2380
+ int i, j, vlen;
2381
+
2382
+ for (i = 1; i <= btf__get_nr_types(btf); i++) {
2383
+ t = (struct btf_type *)btf__type_by_id(btf, i);
2384
+
2385
+ if (!has_datasec && btf_is_var(t)) {
2386
+ /* replace VAR with INT */
2387
+ t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0);
2388
+ /*
2389
+ * using size = 1 is the safest choice, 4 will be too
2390
+ * big and cause kernel BTF validation failure if
2391
+ * original variable took less than 4 bytes
2392
+ */
2393
+ t->size = 1;
2394
+ *(int *)(t + 1) = BTF_INT_ENC(0, 0, 8);
2395
+ } else if (!has_datasec && btf_is_datasec(t)) {
2396
+ /* replace DATASEC with STRUCT */
2397
+ const struct btf_var_secinfo *v = btf_var_secinfos(t);
2398
+ struct btf_member *m = btf_members(t);
2399
+ struct btf_type *vt;
2400
+ char *name;
2401
+
2402
+ name = (char *)btf__name_by_offset(btf, t->name_off);
2403
+ while (*name) {
2404
+ if (*name == '.')
2405
+ *name = '_';
2406
+ name++;
2407
+ }
2408
+
2409
+ vlen = btf_vlen(t);
2410
+ t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, vlen);
2411
+ for (j = 0; j < vlen; j++, v++, m++) {
2412
+ /* order of field assignments is important */
2413
+ m->offset = v->offset * 8;
2414
+ m->type = v->type;
2415
+ /* preserve variable name as member name */
2416
+ vt = (void *)btf__type_by_id(btf, v->type);
2417
+ m->name_off = vt->name_off;
2418
+ }
2419
+ } else if (!has_func && btf_is_func_proto(t)) {
2420
+ /* replace FUNC_PROTO with ENUM */
2421
+ vlen = btf_vlen(t);
2422
+ t->info = BTF_INFO_ENC(BTF_KIND_ENUM, 0, vlen);
2423
+ t->size = sizeof(__u32); /* kernel enforced */
2424
+ } else if (!has_func && btf_is_func(t)) {
2425
+ /* replace FUNC with TYPEDEF */
2426
+ t->info = BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0);
2427
+ } else if (!has_func_global && btf_is_func(t)) {
2428
+ /* replace BTF_FUNC_GLOBAL with BTF_FUNC_STATIC */
2429
+ t->info = BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0);
2430
+ }
2431
+ }
2432
+}
2433
+
2434
+static bool libbpf_needs_btf(const struct bpf_object *obj)
2435
+{
2436
+ return obj->efile.btf_maps_shndx >= 0 ||
2437
+ obj->efile.st_ops_shndx >= 0 ||
2438
+ obj->nr_extern > 0;
2439
+}
2440
+
2441
+static bool kernel_needs_btf(const struct bpf_object *obj)
2442
+{
2443
+ return obj->efile.st_ops_shndx >= 0;
2444
+}
2445
+
2446
+static int bpf_object__init_btf(struct bpf_object *obj,
2447
+ Elf_Data *btf_data,
2448
+ Elf_Data *btf_ext_data)
2449
+{
2450
+ int err = -ENOENT;
2451
+
2452
+ if (btf_data) {
2453
+ obj->btf = btf__new(btf_data->d_buf, btf_data->d_size);
2454
+ if (IS_ERR(obj->btf)) {
2455
+ err = PTR_ERR(obj->btf);
2456
+ obj->btf = NULL;
2457
+ pr_warn("Error loading ELF section %s: %d.\n",
2458
+ BTF_ELF_SEC, err);
2459
+ goto out;
2460
+ }
2461
+ /* enforce 8-byte pointers for BPF-targeted BTFs */
2462
+ btf__set_pointer_size(obj->btf, 8);
2463
+ err = 0;
2464
+ }
2465
+ if (btf_ext_data) {
2466
+ if (!obj->btf) {
2467
+ pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n",
2468
+ BTF_EXT_ELF_SEC, BTF_ELF_SEC);
2469
+ goto out;
2470
+ }
2471
+ obj->btf_ext = btf_ext__new(btf_ext_data->d_buf,
2472
+ btf_ext_data->d_size);
2473
+ if (IS_ERR(obj->btf_ext)) {
2474
+ pr_warn("Error loading ELF section %s: %ld. Ignored and continue.\n",
2475
+ BTF_EXT_ELF_SEC, PTR_ERR(obj->btf_ext));
2476
+ obj->btf_ext = NULL;
2477
+ goto out;
2478
+ }
2479
+ }
2480
+out:
2481
+ if (err && libbpf_needs_btf(obj)) {
2482
+ pr_warn("BTF is required, but is missing or corrupted.\n");
2483
+ return err;
2484
+ }
2485
+ return 0;
2486
+}
2487
+
2488
+static int bpf_object__finalize_btf(struct bpf_object *obj)
2489
+{
2490
+ int err;
2491
+
2492
+ if (!obj->btf)
2493
+ return 0;
2494
+
2495
+ err = btf__finalize_data(obj, obj->btf);
2496
+ if (err) {
2497
+ pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err);
2498
+ return err;
2499
+ }
2500
+
2501
+ return 0;
2502
+}
2503
+
2504
+static inline bool libbpf_prog_needs_vmlinux_btf(struct bpf_program *prog)
2505
+{
2506
+ if (prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
2507
+ prog->type == BPF_PROG_TYPE_LSM)
2508
+ return true;
2509
+
2510
+ /* BPF_PROG_TYPE_TRACING programs which do not attach to other programs
2511
+ * also need vmlinux BTF
2512
+ */
2513
+ if (prog->type == BPF_PROG_TYPE_TRACING && !prog->attach_prog_fd)
7282514 return true;
7292515
7302516 return false;
7312517 }
7322518
733
-static int bpf_object__elf_collect(struct bpf_object *obj)
2519
+static int bpf_object__load_vmlinux_btf(struct bpf_object *obj)
7342520 {
735
- Elf *elf = obj->efile.elf;
736
- GElf_Ehdr *ep = &obj->efile.ehdr;
737
- Elf_Scn *scn = NULL;
738
- int idx = 0, err = 0;
2521
+ bool need_vmlinux_btf = false;
2522
+ struct bpf_program *prog;
2523
+ int i, err;
7392524
740
- /* Elf is corrupted/truncated, avoid calling elf_strptr. */
741
- if (!elf_rawdata(elf_getscn(elf, ep->e_shstrndx), NULL)) {
742
- pr_warning("failed to get e_shstrndx from %s\n",
743
- obj->path);
744
- return -LIBBPF_ERRNO__FORMAT;
2525
+ /* CO-RE relocations need kernel BTF */
2526
+ if (obj->btf_ext && obj->btf_ext->core_relo_info.len)
2527
+ need_vmlinux_btf = true;
2528
+
2529
+ /* Support for typed ksyms needs kernel BTF */
2530
+ for (i = 0; i < obj->nr_extern; i++) {
2531
+ const struct extern_desc *ext;
2532
+
2533
+ ext = &obj->externs[i];
2534
+ if (ext->type == EXT_KSYM && ext->ksym.type_id) {
2535
+ need_vmlinux_btf = true;
2536
+ break;
2537
+ }
7452538 }
7462539
747
- while ((scn = elf_nextscn(elf, scn)) != NULL) {
748
- char *name;
749
- GElf_Shdr sh;
750
- Elf_Data *data;
751
-
752
- idx++;
753
- if (gelf_getshdr(scn, &sh) != &sh) {
754
- pr_warning("failed to get section(%d) header from %s\n",
755
- idx, obj->path);
756
- err = -LIBBPF_ERRNO__FORMAT;
757
- goto out;
2540
+ bpf_object__for_each_program(prog, obj) {
2541
+ if (!prog->load)
2542
+ continue;
2543
+ if (libbpf_prog_needs_vmlinux_btf(prog)) {
2544
+ need_vmlinux_btf = true;
2545
+ break;
7582546 }
759
-
760
- name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name);
761
- if (!name) {
762
- pr_warning("failed to get section(%d) name from %s\n",
763
- idx, obj->path);
764
- err = -LIBBPF_ERRNO__FORMAT;
765
- goto out;
766
- }
767
-
768
- data = elf_getdata(scn, 0);
769
- if (!data) {
770
- pr_warning("failed to get section(%d) data from %s(%s)\n",
771
- idx, name, obj->path);
772
- err = -LIBBPF_ERRNO__FORMAT;
773
- goto out;
774
- }
775
- pr_debug("section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",
776
- idx, name, (unsigned long)data->d_size,
777
- (int)sh.sh_link, (unsigned long)sh.sh_flags,
778
- (int)sh.sh_type);
779
-
780
- if (strcmp(name, "license") == 0)
781
- err = bpf_object__init_license(obj,
782
- data->d_buf,
783
- data->d_size);
784
- else if (strcmp(name, "version") == 0)
785
- err = bpf_object__init_kversion(obj,
786
- data->d_buf,
787
- data->d_size);
788
- else if (strcmp(name, "maps") == 0)
789
- obj->efile.maps_shndx = idx;
790
- else if (strcmp(name, BTF_ELF_SEC) == 0) {
791
- obj->btf = btf__new(data->d_buf, data->d_size,
792
- __pr_debug);
793
- if (IS_ERR(obj->btf)) {
794
- pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n",
795
- BTF_ELF_SEC, PTR_ERR(obj->btf));
796
- obj->btf = NULL;
797
- }
798
- } else if (sh.sh_type == SHT_SYMTAB) {
799
- if (obj->efile.symbols) {
800
- pr_warning("bpf: multiple SYMTAB in %s\n",
801
- obj->path);
802
- err = -LIBBPF_ERRNO__FORMAT;
803
- } else {
804
- obj->efile.symbols = data;
805
- obj->efile.strtabidx = sh.sh_link;
806
- }
807
- } else if ((sh.sh_type == SHT_PROGBITS) &&
808
- (sh.sh_flags & SHF_EXECINSTR) &&
809
- (data->d_size > 0)) {
810
- if (strcmp(name, ".text") == 0)
811
- obj->efile.text_shndx = idx;
812
- err = bpf_object__add_program(obj, data->d_buf,
813
- data->d_size, name, idx);
814
- if (err) {
815
- char errmsg[STRERR_BUFSIZE];
816
- char *cp = str_error(-err, errmsg, sizeof(errmsg));
817
-
818
- pr_warning("failed to alloc program %s (%s): %s",
819
- name, obj->path, cp);
820
- }
821
- } else if (sh.sh_type == SHT_REL) {
822
- void *reloc = obj->efile.reloc;
823
- int nr_reloc = obj->efile.nr_reloc + 1;
824
- int sec = sh.sh_info; /* points to other section */
825
-
826
- /* Only do relo for section with exec instructions */
827
- if (!section_have_execinstr(obj, sec)) {
828
- pr_debug("skip relo %s(%d) for section(%d)\n",
829
- name, idx, sec);
830
- continue;
831
- }
832
-
833
- reloc = reallocarray(reloc, nr_reloc,
834
- sizeof(*obj->efile.reloc));
835
- if (!reloc) {
836
- pr_warning("realloc failed\n");
837
- err = -ENOMEM;
838
- } else {
839
- int n = nr_reloc - 1;
840
-
841
- obj->efile.reloc = reloc;
842
- obj->efile.nr_reloc = nr_reloc;
843
-
844
- obj->efile.reloc[n].shdr = sh;
845
- obj->efile.reloc[n].data = data;
846
- }
847
- } else {
848
- pr_debug("skip section(%d) %s\n", idx, name);
849
- }
850
- if (err)
851
- goto out;
8522547 }
8532548
854
- if (!obj->efile.strtabidx || obj->efile.strtabidx >= idx) {
855
- pr_warning("Corrupted ELF file: index of strtab invalid\n");
856
- return LIBBPF_ERRNO__FORMAT;
2549
+ if (!need_vmlinux_btf)
2550
+ return 0;
2551
+
2552
+ obj->btf_vmlinux = libbpf_find_kernel_btf();
2553
+ if (IS_ERR(obj->btf_vmlinux)) {
2554
+ err = PTR_ERR(obj->btf_vmlinux);
2555
+ pr_warn("Error loading vmlinux BTF: %d\n", err);
2556
+ obj->btf_vmlinux = NULL;
2557
+ return err;
8572558 }
858
- if (obj->efile.maps_shndx >= 0) {
859
- err = bpf_object__init_maps(obj);
860
- if (err)
861
- goto out;
2559
+ return 0;
2560
+}
2561
+
2562
+static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
2563
+{
2564
+ struct btf *kern_btf = obj->btf;
2565
+ bool btf_mandatory, sanitize;
2566
+ int err = 0;
2567
+
2568
+ if (!obj->btf)
2569
+ return 0;
2570
+
2571
+ if (!kernel_supports(FEAT_BTF)) {
2572
+ if (kernel_needs_btf(obj)) {
2573
+ err = -EOPNOTSUPP;
2574
+ goto report;
2575
+ }
2576
+ pr_debug("Kernel doesn't support BTF, skipping uploading it.\n");
2577
+ return 0;
8622578 }
863
- err = bpf_object__init_prog_names(obj);
864
-out:
2579
+
2580
+ sanitize = btf_needs_sanitization(obj);
2581
+ if (sanitize) {
2582
+ const void *raw_data;
2583
+ __u32 sz;
2584
+
2585
+ /* clone BTF to sanitize a copy and leave the original intact */
2586
+ raw_data = btf__get_raw_data(obj->btf, &sz);
2587
+ kern_btf = btf__new(raw_data, sz);
2588
+ if (IS_ERR(kern_btf))
2589
+ return PTR_ERR(kern_btf);
2590
+
2591
+ /* enforce 8-byte pointers for BPF-targeted BTFs */
2592
+ btf__set_pointer_size(obj->btf, 8);
2593
+ bpf_object__sanitize_btf(obj, kern_btf);
2594
+ }
2595
+
2596
+ err = btf__load(kern_btf);
2597
+ if (sanitize) {
2598
+ if (!err) {
2599
+ /* move fd to libbpf's BTF */
2600
+ btf__set_fd(obj->btf, btf__fd(kern_btf));
2601
+ btf__set_fd(kern_btf, -1);
2602
+ }
2603
+ btf__free(kern_btf);
2604
+ }
2605
+report:
2606
+ if (err) {
2607
+ btf_mandatory = kernel_needs_btf(obj);
2608
+ pr_warn("Error loading .BTF into kernel: %d. %s\n", err,
2609
+ btf_mandatory ? "BTF is mandatory, can't proceed."
2610
+ : "BTF is optional, ignoring.");
2611
+ if (!btf_mandatory)
2612
+ err = 0;
2613
+ }
8652614 return err;
8662615 }
8672616
868
-static struct bpf_program *
869
-bpf_object__find_prog_by_idx(struct bpf_object *obj, int idx)
2617
+static const char *elf_sym_str(const struct bpf_object *obj, size_t off)
8702618 {
871
- struct bpf_program *prog;
872
- size_t i;
2619
+ const char *name;
8732620
874
- for (i = 0; i < obj->nr_programs; i++) {
875
- prog = &obj->programs[i];
876
- if (prog->idx == idx)
877
- return prog;
2621
+ name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, off);
2622
+ if (!name) {
2623
+ pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
2624
+ off, obj->path, elf_errmsg(-1));
2625
+ return NULL;
2626
+ }
2627
+
2628
+ return name;
2629
+}
2630
+
2631
+static const char *elf_sec_str(const struct bpf_object *obj, size_t off)
2632
+{
2633
+ const char *name;
2634
+
2635
+ name = elf_strptr(obj->efile.elf, obj->efile.shstrndx, off);
2636
+ if (!name) {
2637
+ pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
2638
+ off, obj->path, elf_errmsg(-1));
2639
+ return NULL;
2640
+ }
2641
+
2642
+ return name;
2643
+}
2644
+
2645
+static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx)
2646
+{
2647
+ Elf_Scn *scn;
2648
+
2649
+ scn = elf_getscn(obj->efile.elf, idx);
2650
+ if (!scn) {
2651
+ pr_warn("elf: failed to get section(%zu) from %s: %s\n",
2652
+ idx, obj->path, elf_errmsg(-1));
2653
+ return NULL;
2654
+ }
2655
+ return scn;
2656
+}
2657
+
2658
+static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name)
2659
+{
2660
+ Elf_Scn *scn = NULL;
2661
+ Elf *elf = obj->efile.elf;
2662
+ const char *sec_name;
2663
+
2664
+ while ((scn = elf_nextscn(elf, scn)) != NULL) {
2665
+ sec_name = elf_sec_name(obj, scn);
2666
+ if (!sec_name)
2667
+ return NULL;
2668
+
2669
+ if (strcmp(sec_name, name) != 0)
2670
+ continue;
2671
+
2672
+ return scn;
8782673 }
8792674 return NULL;
8802675 }
8812676
2677
+static int elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn, GElf_Shdr *hdr)
2678
+{
2679
+ if (!scn)
2680
+ return -EINVAL;
2681
+
2682
+ if (gelf_getshdr(scn, hdr) != hdr) {
2683
+ pr_warn("elf: failed to get section(%zu) header from %s: %s\n",
2684
+ elf_ndxscn(scn), obj->path, elf_errmsg(-1));
2685
+ return -EINVAL;
2686
+ }
2687
+
2688
+ return 0;
2689
+}
2690
+
2691
+static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn)
2692
+{
2693
+ const char *name;
2694
+ GElf_Shdr sh;
2695
+
2696
+ if (!scn)
2697
+ return NULL;
2698
+
2699
+ if (elf_sec_hdr(obj, scn, &sh))
2700
+ return NULL;
2701
+
2702
+ name = elf_sec_str(obj, sh.sh_name);
2703
+ if (!name) {
2704
+ pr_warn("elf: failed to get section(%zu) name from %s: %s\n",
2705
+ elf_ndxscn(scn), obj->path, elf_errmsg(-1));
2706
+ return NULL;
2707
+ }
2708
+
2709
+ return name;
2710
+}
2711
+
2712
+static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn)
2713
+{
2714
+ Elf_Data *data;
2715
+
2716
+ if (!scn)
2717
+ return NULL;
2718
+
2719
+ data = elf_getdata(scn, 0);
2720
+ if (!data) {
2721
+ pr_warn("elf: failed to get section(%zu) %s data from %s: %s\n",
2722
+ elf_ndxscn(scn), elf_sec_name(obj, scn) ?: "<?>",
2723
+ obj->path, elf_errmsg(-1));
2724
+ return NULL;
2725
+ }
2726
+
2727
+ return data;
2728
+}
2729
+
2730
+static int elf_sym_by_sec_off(const struct bpf_object *obj, size_t sec_idx,
2731
+ size_t off, __u32 sym_type, GElf_Sym *sym)
2732
+{
2733
+ Elf_Data *symbols = obj->efile.symbols;
2734
+ size_t n = symbols->d_size / sizeof(GElf_Sym);
2735
+ int i;
2736
+
2737
+ for (i = 0; i < n; i++) {
2738
+ if (!gelf_getsym(symbols, i, sym))
2739
+ continue;
2740
+ if (sym->st_shndx != sec_idx || sym->st_value != off)
2741
+ continue;
2742
+ if (GELF_ST_TYPE(sym->st_info) != sym_type)
2743
+ continue;
2744
+ return 0;
2745
+ }
2746
+
2747
+ return -ENOENT;
2748
+}
2749
+
2750
+static bool is_sec_name_dwarf(const char *name)
2751
+{
2752
+ /* approximation, but the actual list is too long */
2753
+ return strncmp(name, ".debug_", sizeof(".debug_") - 1) == 0;
2754
+}
2755
+
2756
+static bool ignore_elf_section(GElf_Shdr *hdr, const char *name)
2757
+{
2758
+ /* no special handling of .strtab */
2759
+ if (hdr->sh_type == SHT_STRTAB)
2760
+ return true;
2761
+
2762
+ /* ignore .llvm_addrsig section as well */
2763
+ if (hdr->sh_type == 0x6FFF4C03 /* SHT_LLVM_ADDRSIG */)
2764
+ return true;
2765
+
2766
+ /* no subprograms will lead to an empty .text section, ignore it */
2767
+ if (hdr->sh_type == SHT_PROGBITS && hdr->sh_size == 0 &&
2768
+ strcmp(name, ".text") == 0)
2769
+ return true;
2770
+
2771
+ /* DWARF sections */
2772
+ if (is_sec_name_dwarf(name))
2773
+ return true;
2774
+
2775
+ if (strncmp(name, ".rel", sizeof(".rel") - 1) == 0) {
2776
+ name += sizeof(".rel") - 1;
2777
+ /* DWARF section relocations */
2778
+ if (is_sec_name_dwarf(name))
2779
+ return true;
2780
+
2781
+ /* .BTF and .BTF.ext don't need relocations */
2782
+ if (strcmp(name, BTF_ELF_SEC) == 0 ||
2783
+ strcmp(name, BTF_EXT_ELF_SEC) == 0)
2784
+ return true;
2785
+ }
2786
+
2787
+ return false;
2788
+}
2789
+
2790
+static int cmp_progs(const void *_a, const void *_b)
2791
+{
2792
+ const struct bpf_program *a = _a;
2793
+ const struct bpf_program *b = _b;
2794
+
2795
+ if (a->sec_idx != b->sec_idx)
2796
+ return a->sec_idx < b->sec_idx ? -1 : 1;
2797
+
2798
+ /* sec_insn_off can't be the same within the section */
2799
+ return a->sec_insn_off < b->sec_insn_off ? -1 : 1;
2800
+}
2801
+
2802
+static int bpf_object__elf_collect(struct bpf_object *obj)
2803
+{
2804
+ Elf *elf = obj->efile.elf;
2805
+ Elf_Data *btf_ext_data = NULL;
2806
+ Elf_Data *btf_data = NULL;
2807
+ int idx = 0, err = 0;
2808
+ const char *name;
2809
+ Elf_Data *data;
2810
+ Elf_Scn *scn;
2811
+ GElf_Shdr sh;
2812
+
2813
+ /* a bunch of ELF parsing functionality depends on processing symbols,
2814
+ * so do the first pass and find the symbol table
2815
+ */
2816
+ scn = NULL;
2817
+ while ((scn = elf_nextscn(elf, scn)) != NULL) {
2818
+ if (elf_sec_hdr(obj, scn, &sh))
2819
+ return -LIBBPF_ERRNO__FORMAT;
2820
+
2821
+ if (sh.sh_type == SHT_SYMTAB) {
2822
+ if (obj->efile.symbols) {
2823
+ pr_warn("elf: multiple symbol tables in %s\n", obj->path);
2824
+ return -LIBBPF_ERRNO__FORMAT;
2825
+ }
2826
+
2827
+ data = elf_sec_data(obj, scn);
2828
+ if (!data)
2829
+ return -LIBBPF_ERRNO__FORMAT;
2830
+
2831
+ obj->efile.symbols = data;
2832
+ obj->efile.symbols_shndx = elf_ndxscn(scn);
2833
+ obj->efile.strtabidx = sh.sh_link;
2834
+ }
2835
+ }
2836
+
2837
+ scn = NULL;
2838
+ while ((scn = elf_nextscn(elf, scn)) != NULL) {
2839
+ idx++;
2840
+
2841
+ if (elf_sec_hdr(obj, scn, &sh))
2842
+ return -LIBBPF_ERRNO__FORMAT;
2843
+
2844
+ name = elf_sec_str(obj, sh.sh_name);
2845
+ if (!name)
2846
+ return -LIBBPF_ERRNO__FORMAT;
2847
+
2848
+ if (ignore_elf_section(&sh, name))
2849
+ continue;
2850
+
2851
+ data = elf_sec_data(obj, scn);
2852
+ if (!data)
2853
+ return -LIBBPF_ERRNO__FORMAT;
2854
+
2855
+ pr_debug("elf: section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",
2856
+ idx, name, (unsigned long)data->d_size,
2857
+ (int)sh.sh_link, (unsigned long)sh.sh_flags,
2858
+ (int)sh.sh_type);
2859
+
2860
+ if (strcmp(name, "license") == 0) {
2861
+ err = bpf_object__init_license(obj, data->d_buf, data->d_size);
2862
+ if (err)
2863
+ return err;
2864
+ } else if (strcmp(name, "version") == 0) {
2865
+ err = bpf_object__init_kversion(obj, data->d_buf, data->d_size);
2866
+ if (err)
2867
+ return err;
2868
+ } else if (strcmp(name, "maps") == 0) {
2869
+ obj->efile.maps_shndx = idx;
2870
+ } else if (strcmp(name, MAPS_ELF_SEC) == 0) {
2871
+ obj->efile.btf_maps_shndx = idx;
2872
+ } else if (strcmp(name, BTF_ELF_SEC) == 0) {
2873
+ btf_data = data;
2874
+ } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) {
2875
+ btf_ext_data = data;
2876
+ } else if (sh.sh_type == SHT_SYMTAB) {
2877
+ /* already processed during the first pass above */
2878
+ } else if (sh.sh_type == SHT_PROGBITS && data->d_size > 0) {
2879
+ if (sh.sh_flags & SHF_EXECINSTR) {
2880
+ if (strcmp(name, ".text") == 0)
2881
+ obj->efile.text_shndx = idx;
2882
+ err = bpf_object__add_programs(obj, data, name, idx);
2883
+ if (err)
2884
+ return err;
2885
+ } else if (strcmp(name, DATA_SEC) == 0) {
2886
+ obj->efile.data = data;
2887
+ obj->efile.data_shndx = idx;
2888
+ } else if (strcmp(name, RODATA_SEC) == 0) {
2889
+ obj->efile.rodata = data;
2890
+ obj->efile.rodata_shndx = idx;
2891
+ } else if (strcmp(name, STRUCT_OPS_SEC) == 0) {
2892
+ obj->efile.st_ops_data = data;
2893
+ obj->efile.st_ops_shndx = idx;
2894
+ } else {
2895
+ pr_info("elf: skipping unrecognized data section(%d) %s\n",
2896
+ idx, name);
2897
+ }
2898
+ } else if (sh.sh_type == SHT_REL) {
2899
+ int nr_sects = obj->efile.nr_reloc_sects;
2900
+ void *sects = obj->efile.reloc_sects;
2901
+ int sec = sh.sh_info; /* points to other section */
2902
+
2903
+ /* Only do relo for section with exec instructions */
2904
+ if (!section_have_execinstr(obj, sec) &&
2905
+ strcmp(name, ".rel" STRUCT_OPS_SEC) &&
2906
+ strcmp(name, ".rel" MAPS_ELF_SEC)) {
2907
+ pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n",
2908
+ idx, name, sec,
2909
+ elf_sec_name(obj, elf_sec_by_idx(obj, sec)) ?: "<?>");
2910
+ continue;
2911
+ }
2912
+
2913
+ sects = libbpf_reallocarray(sects, nr_sects + 1,
2914
+ sizeof(*obj->efile.reloc_sects));
2915
+ if (!sects)
2916
+ return -ENOMEM;
2917
+
2918
+ obj->efile.reloc_sects = sects;
2919
+ obj->efile.nr_reloc_sects++;
2920
+
2921
+ obj->efile.reloc_sects[nr_sects].shdr = sh;
2922
+ obj->efile.reloc_sects[nr_sects].data = data;
2923
+ } else if (sh.sh_type == SHT_NOBITS && strcmp(name, BSS_SEC) == 0) {
2924
+ obj->efile.bss = data;
2925
+ obj->efile.bss_shndx = idx;
2926
+ } else {
2927
+ pr_info("elf: skipping section(%d) %s (size %zu)\n", idx, name,
2928
+ (size_t)sh.sh_size);
2929
+ }
2930
+ }
2931
+
2932
+ if (!obj->efile.strtabidx || obj->efile.strtabidx > idx) {
2933
+ pr_warn("elf: symbol strings section missing or invalid in %s\n", obj->path);
2934
+ return -LIBBPF_ERRNO__FORMAT;
2935
+ }
2936
+
2937
+ /* sort BPF programs by section name and in-section instruction offset
2938
+ * for faster search */
2939
+ qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs);
2940
+
2941
+ return bpf_object__init_btf(obj, btf_data, btf_ext_data);
2942
+}
2943
+
2944
+static bool sym_is_extern(const GElf_Sym *sym)
2945
+{
2946
+ int bind = GELF_ST_BIND(sym->st_info);
2947
+ /* externs are symbols w/ type=NOTYPE, bind=GLOBAL|WEAK, section=UND */
2948
+ return sym->st_shndx == SHN_UNDEF &&
2949
+ (bind == STB_GLOBAL || bind == STB_WEAK) &&
2950
+ GELF_ST_TYPE(sym->st_info) == STT_NOTYPE;
2951
+}
2952
+
2953
+static int find_extern_btf_id(const struct btf *btf, const char *ext_name)
2954
+{
2955
+ const struct btf_type *t;
2956
+ const char *var_name;
2957
+ int i, n;
2958
+
2959
+ if (!btf)
2960
+ return -ESRCH;
2961
+
2962
+ n = btf__get_nr_types(btf);
2963
+ for (i = 1; i <= n; i++) {
2964
+ t = btf__type_by_id(btf, i);
2965
+
2966
+ if (!btf_is_var(t))
2967
+ continue;
2968
+
2969
+ var_name = btf__name_by_offset(btf, t->name_off);
2970
+ if (strcmp(var_name, ext_name))
2971
+ continue;
2972
+
2973
+ if (btf_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN)
2974
+ return -EINVAL;
2975
+
2976
+ return i;
2977
+ }
2978
+
2979
+ return -ENOENT;
2980
+}
2981
+
2982
+static int find_extern_sec_btf_id(struct btf *btf, int ext_btf_id) {
2983
+ const struct btf_var_secinfo *vs;
2984
+ const struct btf_type *t;
2985
+ int i, j, n;
2986
+
2987
+ if (!btf)
2988
+ return -ESRCH;
2989
+
2990
+ n = btf__get_nr_types(btf);
2991
+ for (i = 1; i <= n; i++) {
2992
+ t = btf__type_by_id(btf, i);
2993
+
2994
+ if (!btf_is_datasec(t))
2995
+ continue;
2996
+
2997
+ vs = btf_var_secinfos(t);
2998
+ for (j = 0; j < btf_vlen(t); j++, vs++) {
2999
+ if (vs->type == ext_btf_id)
3000
+ return i;
3001
+ }
3002
+ }
3003
+
3004
+ return -ENOENT;
3005
+}
3006
+
3007
+static enum kcfg_type find_kcfg_type(const struct btf *btf, int id,
3008
+ bool *is_signed)
3009
+{
3010
+ const struct btf_type *t;
3011
+ const char *name;
3012
+
3013
+ t = skip_mods_and_typedefs(btf, id, NULL);
3014
+ name = btf__name_by_offset(btf, t->name_off);
3015
+
3016
+ if (is_signed)
3017
+ *is_signed = false;
3018
+ switch (btf_kind(t)) {
3019
+ case BTF_KIND_INT: {
3020
+ int enc = btf_int_encoding(t);
3021
+
3022
+ if (enc & BTF_INT_BOOL)
3023
+ return t->size == 1 ? KCFG_BOOL : KCFG_UNKNOWN;
3024
+ if (is_signed)
3025
+ *is_signed = enc & BTF_INT_SIGNED;
3026
+ if (t->size == 1)
3027
+ return KCFG_CHAR;
3028
+ if (t->size < 1 || t->size > 8 || (t->size & (t->size - 1)))
3029
+ return KCFG_UNKNOWN;
3030
+ return KCFG_INT;
3031
+ }
3032
+ case BTF_KIND_ENUM:
3033
+ if (t->size != 4)
3034
+ return KCFG_UNKNOWN;
3035
+ if (strcmp(name, "libbpf_tristate"))
3036
+ return KCFG_UNKNOWN;
3037
+ return KCFG_TRISTATE;
3038
+ case BTF_KIND_ARRAY:
3039
+ if (btf_array(t)->nelems == 0)
3040
+ return KCFG_UNKNOWN;
3041
+ if (find_kcfg_type(btf, btf_array(t)->type, NULL) != KCFG_CHAR)
3042
+ return KCFG_UNKNOWN;
3043
+ return KCFG_CHAR_ARR;
3044
+ default:
3045
+ return KCFG_UNKNOWN;
3046
+ }
3047
+}
3048
+
3049
+static int cmp_externs(const void *_a, const void *_b)
3050
+{
3051
+ const struct extern_desc *a = _a;
3052
+ const struct extern_desc *b = _b;
3053
+
3054
+ if (a->type != b->type)
3055
+ return a->type < b->type ? -1 : 1;
3056
+
3057
+ if (a->type == EXT_KCFG) {
3058
+ /* descending order by alignment requirements */
3059
+ if (a->kcfg.align != b->kcfg.align)
3060
+ return a->kcfg.align > b->kcfg.align ? -1 : 1;
3061
+ /* ascending order by size, within same alignment class */
3062
+ if (a->kcfg.sz != b->kcfg.sz)
3063
+ return a->kcfg.sz < b->kcfg.sz ? -1 : 1;
3064
+ }
3065
+
3066
+ /* resolve ties by name */
3067
+ return strcmp(a->name, b->name);
3068
+}
3069
+
3070
+static int find_int_btf_id(const struct btf *btf)
3071
+{
3072
+ const struct btf_type *t;
3073
+ int i, n;
3074
+
3075
+ n = btf__get_nr_types(btf);
3076
+ for (i = 1; i <= n; i++) {
3077
+ t = btf__type_by_id(btf, i);
3078
+
3079
+ if (btf_is_int(t) && btf_int_bits(t) == 32)
3080
+ return i;
3081
+ }
3082
+
3083
+ return 0;
3084
+}
3085
+
3086
+static int bpf_object__collect_externs(struct bpf_object *obj)
3087
+{
3088
+ struct btf_type *sec, *kcfg_sec = NULL, *ksym_sec = NULL;
3089
+ const struct btf_type *t;
3090
+ struct extern_desc *ext;
3091
+ int i, n, off;
3092
+ const char *ext_name, *sec_name;
3093
+ Elf_Scn *scn;
3094
+ GElf_Shdr sh;
3095
+
3096
+ if (!obj->efile.symbols)
3097
+ return 0;
3098
+
3099
+ scn = elf_sec_by_idx(obj, obj->efile.symbols_shndx);
3100
+ if (elf_sec_hdr(obj, scn, &sh))
3101
+ return -LIBBPF_ERRNO__FORMAT;
3102
+
3103
+ n = sh.sh_size / sh.sh_entsize;
3104
+ pr_debug("looking for externs among %d symbols...\n", n);
3105
+
3106
+ for (i = 0; i < n; i++) {
3107
+ GElf_Sym sym;
3108
+
3109
+ if (!gelf_getsym(obj->efile.symbols, i, &sym))
3110
+ return -LIBBPF_ERRNO__FORMAT;
3111
+ if (!sym_is_extern(&sym))
3112
+ continue;
3113
+ ext_name = elf_sym_str(obj, sym.st_name);
3114
+ if (!ext_name || !ext_name[0])
3115
+ continue;
3116
+
3117
+ ext = obj->externs;
3118
+ ext = libbpf_reallocarray(ext, obj->nr_extern + 1, sizeof(*ext));
3119
+ if (!ext)
3120
+ return -ENOMEM;
3121
+ obj->externs = ext;
3122
+ ext = &ext[obj->nr_extern];
3123
+ memset(ext, 0, sizeof(*ext));
3124
+ obj->nr_extern++;
3125
+
3126
+ ext->btf_id = find_extern_btf_id(obj->btf, ext_name);
3127
+ if (ext->btf_id <= 0) {
3128
+ pr_warn("failed to find BTF for extern '%s': %d\n",
3129
+ ext_name, ext->btf_id);
3130
+ return ext->btf_id;
3131
+ }
3132
+ t = btf__type_by_id(obj->btf, ext->btf_id);
3133
+ ext->name = btf__name_by_offset(obj->btf, t->name_off);
3134
+ ext->sym_idx = i;
3135
+ ext->is_weak = GELF_ST_BIND(sym.st_info) == STB_WEAK;
3136
+
3137
+ ext->sec_btf_id = find_extern_sec_btf_id(obj->btf, ext->btf_id);
3138
+ if (ext->sec_btf_id <= 0) {
3139
+ pr_warn("failed to find BTF for extern '%s' [%d] section: %d\n",
3140
+ ext_name, ext->btf_id, ext->sec_btf_id);
3141
+ return ext->sec_btf_id;
3142
+ }
3143
+ sec = (void *)btf__type_by_id(obj->btf, ext->sec_btf_id);
3144
+ sec_name = btf__name_by_offset(obj->btf, sec->name_off);
3145
+
3146
+ if (strcmp(sec_name, KCONFIG_SEC) == 0) {
3147
+ kcfg_sec = sec;
3148
+ ext->type = EXT_KCFG;
3149
+ ext->kcfg.sz = btf__resolve_size(obj->btf, t->type);
3150
+ if (ext->kcfg.sz <= 0) {
3151
+ pr_warn("failed to resolve size of extern (kcfg) '%s': %d\n",
3152
+ ext_name, ext->kcfg.sz);
3153
+ return ext->kcfg.sz;
3154
+ }
3155
+ ext->kcfg.align = btf__align_of(obj->btf, t->type);
3156
+ if (ext->kcfg.align <= 0) {
3157
+ pr_warn("failed to determine alignment of extern (kcfg) '%s': %d\n",
3158
+ ext_name, ext->kcfg.align);
3159
+ return -EINVAL;
3160
+ }
3161
+ ext->kcfg.type = find_kcfg_type(obj->btf, t->type,
3162
+ &ext->kcfg.is_signed);
3163
+ if (ext->kcfg.type == KCFG_UNKNOWN) {
3164
+ pr_warn("extern (kcfg) '%s' type is unsupported\n", ext_name);
3165
+ return -ENOTSUP;
3166
+ }
3167
+ } else if (strcmp(sec_name, KSYMS_SEC) == 0) {
3168
+ ksym_sec = sec;
3169
+ ext->type = EXT_KSYM;
3170
+ skip_mods_and_typedefs(obj->btf, t->type,
3171
+ &ext->ksym.type_id);
3172
+ } else {
3173
+ pr_warn("unrecognized extern section '%s'\n", sec_name);
3174
+ return -ENOTSUP;
3175
+ }
3176
+ }
3177
+ pr_debug("collected %d externs total\n", obj->nr_extern);
3178
+
3179
+ if (!obj->nr_extern)
3180
+ return 0;
3181
+
3182
+ /* sort externs by type, for kcfg ones also by (align, size, name) */
3183
+ qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs);
3184
+
3185
+ /* for .ksyms section, we need to turn all externs into allocated
3186
+ * variables in BTF to pass kernel verification; we do this by
3187
+ * pretending that each extern is a 8-byte variable
3188
+ */
3189
+ if (ksym_sec) {
3190
+ /* find existing 4-byte integer type in BTF to use for fake
3191
+ * extern variables in DATASEC
3192
+ */
3193
+ int int_btf_id = find_int_btf_id(obj->btf);
3194
+
3195
+ for (i = 0; i < obj->nr_extern; i++) {
3196
+ ext = &obj->externs[i];
3197
+ if (ext->type != EXT_KSYM)
3198
+ continue;
3199
+ pr_debug("extern (ksym) #%d: symbol %d, name %s\n",
3200
+ i, ext->sym_idx, ext->name);
3201
+ }
3202
+
3203
+ sec = ksym_sec;
3204
+ n = btf_vlen(sec);
3205
+ for (i = 0, off = 0; i < n; i++, off += sizeof(int)) {
3206
+ struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
3207
+ struct btf_type *vt;
3208
+
3209
+ vt = (void *)btf__type_by_id(obj->btf, vs->type);
3210
+ ext_name = btf__name_by_offset(obj->btf, vt->name_off);
3211
+ ext = find_extern_by_name(obj, ext_name);
3212
+ if (!ext) {
3213
+ pr_warn("failed to find extern definition for BTF var '%s'\n",
3214
+ ext_name);
3215
+ return -ESRCH;
3216
+ }
3217
+ btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
3218
+ vt->type = int_btf_id;
3219
+ vs->offset = off;
3220
+ vs->size = sizeof(int);
3221
+ }
3222
+ sec->size = off;
3223
+ }
3224
+
3225
+ if (kcfg_sec) {
3226
+ sec = kcfg_sec;
3227
+ /* for kcfg externs calculate their offsets within a .kconfig map */
3228
+ off = 0;
3229
+ for (i = 0; i < obj->nr_extern; i++) {
3230
+ ext = &obj->externs[i];
3231
+ if (ext->type != EXT_KCFG)
3232
+ continue;
3233
+
3234
+ ext->kcfg.data_off = roundup(off, ext->kcfg.align);
3235
+ off = ext->kcfg.data_off + ext->kcfg.sz;
3236
+ pr_debug("extern (kcfg) #%d: symbol %d, off %u, name %s\n",
3237
+ i, ext->sym_idx, ext->kcfg.data_off, ext->name);
3238
+ }
3239
+ sec->size = off;
3240
+ n = btf_vlen(sec);
3241
+ for (i = 0; i < n; i++) {
3242
+ struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
3243
+
3244
+ t = btf__type_by_id(obj->btf, vs->type);
3245
+ ext_name = btf__name_by_offset(obj->btf, t->name_off);
3246
+ ext = find_extern_by_name(obj, ext_name);
3247
+ if (!ext) {
3248
+ pr_warn("failed to find extern definition for BTF var '%s'\n",
3249
+ ext_name);
3250
+ return -ESRCH;
3251
+ }
3252
+ btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
3253
+ vs->offset = ext->kcfg.data_off;
3254
+ }
3255
+ }
3256
+ return 0;
3257
+}
3258
+
8823259 struct bpf_program *
883
-bpf_object__find_program_by_title(struct bpf_object *obj, const char *title)
3260
+bpf_object__find_program_by_title(const struct bpf_object *obj,
3261
+ const char *title)
8843262 {
8853263 struct bpf_program *pos;
8863264
8873265 bpf_object__for_each_program(pos, obj) {
888
- if (pos->section_name && !strcmp(pos->section_name, title))
3266
+ if (pos->sec_name && !strcmp(pos->sec_name, title))
8893267 return pos;
8903268 }
8913269 return NULL;
8923270 }
8933271
3272
+static bool prog_is_subprog(const struct bpf_object *obj,
3273
+ const struct bpf_program *prog)
3274
+{
3275
+ /* For legacy reasons, libbpf supports an entry-point BPF programs
3276
+ * without SEC() attribute, i.e., those in the .text section. But if
3277
+ * there are 2 or more such programs in the .text section, they all
3278
+ * must be subprograms called from entry-point BPF programs in
3279
+ * designated SEC()'tions, otherwise there is no way to distinguish
3280
+ * which of those programs should be loaded vs which are a subprogram.
3281
+ * Similarly, if there is a function/program in .text and at least one
3282
+ * other BPF program with custom SEC() attribute, then we just assume
3283
+ * .text programs are subprograms (even if they are not called from
3284
+ * other programs), because libbpf never explicitly supported mixing
3285
+ * SEC()-designated BPF programs and .text entry-point BPF programs.
3286
+ */
3287
+ return prog->sec_idx == obj->efile.text_shndx && obj->nr_programs > 1;
3288
+}
3289
+
3290
+struct bpf_program *
3291
+bpf_object__find_program_by_name(const struct bpf_object *obj,
3292
+ const char *name)
3293
+{
3294
+ struct bpf_program *prog;
3295
+
3296
+ bpf_object__for_each_program(prog, obj) {
3297
+ if (prog_is_subprog(obj, prog))
3298
+ continue;
3299
+ if (!strcmp(prog->name, name))
3300
+ return prog;
3301
+ }
3302
+ return NULL;
3303
+}
3304
+
3305
+static bool bpf_object__shndx_is_data(const struct bpf_object *obj,
3306
+ int shndx)
3307
+{
3308
+ return shndx == obj->efile.data_shndx ||
3309
+ shndx == obj->efile.bss_shndx ||
3310
+ shndx == obj->efile.rodata_shndx;
3311
+}
3312
+
3313
+static bool bpf_object__shndx_is_maps(const struct bpf_object *obj,
3314
+ int shndx)
3315
+{
3316
+ return shndx == obj->efile.maps_shndx ||
3317
+ shndx == obj->efile.btf_maps_shndx;
3318
+}
3319
+
3320
+static enum libbpf_map_type
3321
+bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)
3322
+{
3323
+ if (shndx == obj->efile.data_shndx)
3324
+ return LIBBPF_MAP_DATA;
3325
+ else if (shndx == obj->efile.bss_shndx)
3326
+ return LIBBPF_MAP_BSS;
3327
+ else if (shndx == obj->efile.rodata_shndx)
3328
+ return LIBBPF_MAP_RODATA;
3329
+ else if (shndx == obj->efile.symbols_shndx)
3330
+ return LIBBPF_MAP_KCONFIG;
3331
+ else
3332
+ return LIBBPF_MAP_UNSPEC;
3333
+}
3334
+
3335
+static int bpf_program__record_reloc(struct bpf_program *prog,
3336
+ struct reloc_desc *reloc_desc,
3337
+ __u32 insn_idx, const char *sym_name,
3338
+ const GElf_Sym *sym, const GElf_Rel *rel)
3339
+{
3340
+ struct bpf_insn *insn = &prog->insns[insn_idx];
3341
+ size_t map_idx, nr_maps = prog->obj->nr_maps;
3342
+ struct bpf_object *obj = prog->obj;
3343
+ __u32 shdr_idx = sym->st_shndx;
3344
+ enum libbpf_map_type type;
3345
+ const char *sym_sec_name;
3346
+ struct bpf_map *map;
3347
+
3348
+ reloc_desc->processed = false;
3349
+
3350
+ /* sub-program call relocation */
3351
+ if (insn->code == (BPF_JMP | BPF_CALL)) {
3352
+ if (insn->src_reg != BPF_PSEUDO_CALL) {
3353
+ pr_warn("prog '%s': incorrect bpf_call opcode\n", prog->name);
3354
+ return -LIBBPF_ERRNO__RELOC;
3355
+ }
3356
+ /* text_shndx can be 0, if no default "main" program exists */
3357
+ if (!shdr_idx || shdr_idx != obj->efile.text_shndx) {
3358
+ sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
3359
+ pr_warn("prog '%s': bad call relo against '%s' in section '%s'\n",
3360
+ prog->name, sym_name, sym_sec_name);
3361
+ return -LIBBPF_ERRNO__RELOC;
3362
+ }
3363
+ if (sym->st_value % BPF_INSN_SZ) {
3364
+ pr_warn("prog '%s': bad call relo against '%s' at offset %zu\n",
3365
+ prog->name, sym_name, (size_t)sym->st_value);
3366
+ return -LIBBPF_ERRNO__RELOC;
3367
+ }
3368
+ reloc_desc->type = RELO_CALL;
3369
+ reloc_desc->insn_idx = insn_idx;
3370
+ reloc_desc->sym_off = sym->st_value;
3371
+ return 0;
3372
+ }
3373
+
3374
+ if (insn->code != (BPF_LD | BPF_IMM | BPF_DW)) {
3375
+ pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n",
3376
+ prog->name, sym_name, insn_idx, insn->code);
3377
+ return -LIBBPF_ERRNO__RELOC;
3378
+ }
3379
+
3380
+ if (sym_is_extern(sym)) {
3381
+ int sym_idx = GELF_R_SYM(rel->r_info);
3382
+ int i, n = obj->nr_extern;
3383
+ struct extern_desc *ext;
3384
+
3385
+ for (i = 0; i < n; i++) {
3386
+ ext = &obj->externs[i];
3387
+ if (ext->sym_idx == sym_idx)
3388
+ break;
3389
+ }
3390
+ if (i >= n) {
3391
+ pr_warn("prog '%s': extern relo failed to find extern for '%s' (%d)\n",
3392
+ prog->name, sym_name, sym_idx);
3393
+ return -LIBBPF_ERRNO__RELOC;
3394
+ }
3395
+ pr_debug("prog '%s': found extern #%d '%s' (sym %d) for insn #%u\n",
3396
+ prog->name, i, ext->name, ext->sym_idx, insn_idx);
3397
+ reloc_desc->type = RELO_EXTERN;
3398
+ reloc_desc->insn_idx = insn_idx;
3399
+ reloc_desc->sym_off = i; /* sym_off stores extern index */
3400
+ return 0;
3401
+ }
3402
+
3403
+ if (!shdr_idx || shdr_idx >= SHN_LORESERVE) {
3404
+ pr_warn("prog '%s': invalid relo against '%s' in special section 0x%x; forgot to initialize global var?..\n",
3405
+ prog->name, sym_name, shdr_idx);
3406
+ return -LIBBPF_ERRNO__RELOC;
3407
+ }
3408
+
3409
+ type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx);
3410
+ sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
3411
+
3412
+ /* generic map reference relocation */
3413
+ if (type == LIBBPF_MAP_UNSPEC) {
3414
+ if (!bpf_object__shndx_is_maps(obj, shdr_idx)) {
3415
+ pr_warn("prog '%s': bad map relo against '%s' in section '%s'\n",
3416
+ prog->name, sym_name, sym_sec_name);
3417
+ return -LIBBPF_ERRNO__RELOC;
3418
+ }
3419
+ for (map_idx = 0; map_idx < nr_maps; map_idx++) {
3420
+ map = &obj->maps[map_idx];
3421
+ if (map->libbpf_type != type ||
3422
+ map->sec_idx != sym->st_shndx ||
3423
+ map->sec_offset != sym->st_value)
3424
+ continue;
3425
+ pr_debug("prog '%s': found map %zd (%s, sec %d, off %zu) for insn #%u\n",
3426
+ prog->name, map_idx, map->name, map->sec_idx,
3427
+ map->sec_offset, insn_idx);
3428
+ break;
3429
+ }
3430
+ if (map_idx >= nr_maps) {
3431
+ pr_warn("prog '%s': map relo failed to find map for section '%s', off %zu\n",
3432
+ prog->name, sym_sec_name, (size_t)sym->st_value);
3433
+ return -LIBBPF_ERRNO__RELOC;
3434
+ }
3435
+ reloc_desc->type = RELO_LD64;
3436
+ reloc_desc->insn_idx = insn_idx;
3437
+ reloc_desc->map_idx = map_idx;
3438
+ reloc_desc->sym_off = 0; /* sym->st_value determines map_idx */
3439
+ return 0;
3440
+ }
3441
+
3442
+ /* global data map relocation */
3443
+ if (!bpf_object__shndx_is_data(obj, shdr_idx)) {
3444
+ pr_warn("prog '%s': bad data relo against section '%s'\n",
3445
+ prog->name, sym_sec_name);
3446
+ return -LIBBPF_ERRNO__RELOC;
3447
+ }
3448
+ for (map_idx = 0; map_idx < nr_maps; map_idx++) {
3449
+ map = &obj->maps[map_idx];
3450
+ if (map->libbpf_type != type)
3451
+ continue;
3452
+ pr_debug("prog '%s': found data map %zd (%s, sec %d, off %zu) for insn %u\n",
3453
+ prog->name, map_idx, map->name, map->sec_idx,
3454
+ map->sec_offset, insn_idx);
3455
+ break;
3456
+ }
3457
+ if (map_idx >= nr_maps) {
3458
+ pr_warn("prog '%s': data relo failed to find map for section '%s'\n",
3459
+ prog->name, sym_sec_name);
3460
+ return -LIBBPF_ERRNO__RELOC;
3461
+ }
3462
+
3463
+ reloc_desc->type = RELO_DATA;
3464
+ reloc_desc->insn_idx = insn_idx;
3465
+ reloc_desc->map_idx = map_idx;
3466
+ reloc_desc->sym_off = sym->st_value;
3467
+ return 0;
3468
+}
3469
+
3470
+static bool prog_contains_insn(const struct bpf_program *prog, size_t insn_idx)
3471
+{
3472
+ return insn_idx >= prog->sec_insn_off &&
3473
+ insn_idx < prog->sec_insn_off + prog->sec_insn_cnt;
3474
+}
3475
+
3476
+static struct bpf_program *find_prog_by_sec_insn(const struct bpf_object *obj,
3477
+ size_t sec_idx, size_t insn_idx)
3478
+{
3479
+ int l = 0, r = obj->nr_programs - 1, m;
3480
+ struct bpf_program *prog;
3481
+
3482
+ while (l < r) {
3483
+ m = l + (r - l + 1) / 2;
3484
+ prog = &obj->programs[m];
3485
+
3486
+ if (prog->sec_idx < sec_idx ||
3487
+ (prog->sec_idx == sec_idx && prog->sec_insn_off <= insn_idx))
3488
+ l = m;
3489
+ else
3490
+ r = m - 1;
3491
+ }
3492
+ /* matching program could be at index l, but it still might be the
3493
+ * wrong one, so we need to double check conditions for the last time
3494
+ */
3495
+ prog = &obj->programs[l];
3496
+ if (prog->sec_idx == sec_idx && prog_contains_insn(prog, insn_idx))
3497
+ return prog;
3498
+ return NULL;
3499
+}
3500
+
8943501 static int
895
-bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
896
- Elf_Data *data, struct bpf_object *obj)
3502
+bpf_object__collect_prog_relos(struct bpf_object *obj, GElf_Shdr *shdr, Elf_Data *data)
8973503 {
8983504 Elf_Data *symbols = obj->efile.symbols;
899
- int text_shndx = obj->efile.text_shndx;
900
- int maps_shndx = obj->efile.maps_shndx;
901
- struct bpf_map *maps = obj->maps;
902
- size_t nr_maps = obj->nr_maps;
903
- int i, nrels;
3505
+ const char *relo_sec_name, *sec_name;
3506
+ size_t sec_idx = shdr->sh_info;
3507
+ struct bpf_program *prog;
3508
+ struct reloc_desc *relos;
3509
+ int err, i, nrels;
3510
+ const char *sym_name;
3511
+ __u32 insn_idx;
3512
+ GElf_Sym sym;
3513
+ GElf_Rel rel;
9043514
905
- pr_debug("collecting relocating info for: '%s'\n",
906
- prog->section_name);
3515
+ relo_sec_name = elf_sec_str(obj, shdr->sh_name);
3516
+ sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
3517
+ if (!relo_sec_name || !sec_name)
3518
+ return -EINVAL;
3519
+
3520
+ pr_debug("sec '%s': collecting relocation for section(%zu) '%s'\n",
3521
+ relo_sec_name, sec_idx, sec_name);
9073522 nrels = shdr->sh_size / shdr->sh_entsize;
9083523
909
- prog->reloc_desc = malloc(sizeof(*prog->reloc_desc) * nrels);
910
- if (!prog->reloc_desc) {
911
- pr_warning("failed to alloc memory in relocation\n");
912
- return -ENOMEM;
913
- }
914
- prog->nr_reloc = nrels;
915
-
9163524 for (i = 0; i < nrels; i++) {
917
- GElf_Sym sym;
918
- GElf_Rel rel;
919
- unsigned int insn_idx;
920
- struct bpf_insn *insns = prog->insns;
921
- size_t map_idx;
922
-
9233525 if (!gelf_getrel(data, i, &rel)) {
924
- pr_warning("relocation: failed to get %d reloc\n", i);
3526
+ pr_warn("sec '%s': failed to get relo #%d\n", relo_sec_name, i);
3527
+ return -LIBBPF_ERRNO__FORMAT;
3528
+ }
3529
+ if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) {
3530
+ pr_warn("sec '%s': symbol 0x%zx not found for relo #%d\n",
3531
+ relo_sec_name, (size_t)GELF_R_SYM(rel.r_info), i);
3532
+ return -LIBBPF_ERRNO__FORMAT;
3533
+ }
3534
+ if (rel.r_offset % BPF_INSN_SZ) {
3535
+ pr_warn("sec '%s': invalid offset 0x%zx for relo #%d\n",
3536
+ relo_sec_name, (size_t)GELF_R_SYM(rel.r_info), i);
9253537 return -LIBBPF_ERRNO__FORMAT;
9263538 }
9273539
928
- if (!gelf_getsym(symbols,
929
- GELF_R_SYM(rel.r_info),
930
- &sym)) {
931
- pr_warning("relocation: symbol %"PRIx64" not found\n",
932
- GELF_R_SYM(rel.r_info));
933
- return -LIBBPF_ERRNO__FORMAT;
934
- }
935
- pr_debug("relo for %lld value %lld name %d\n",
936
- (long long) (rel.r_info >> 32),
937
- (long long) sym.st_value, sym.st_name);
3540
+ insn_idx = rel.r_offset / BPF_INSN_SZ;
3541
+ /* relocations against static functions are recorded as
3542
+ * relocations against the section that contains a function;
3543
+ * in such case, symbol will be STT_SECTION and sym.st_name
3544
+ * will point to empty string (0), so fetch section name
3545
+ * instead
3546
+ */
3547
+ if (GELF_ST_TYPE(sym.st_info) == STT_SECTION && sym.st_name == 0)
3548
+ sym_name = elf_sec_name(obj, elf_sec_by_idx(obj, sym.st_shndx));
3549
+ else
3550
+ sym_name = elf_sym_str(obj, sym.st_name);
3551
+ sym_name = sym_name ?: "<?";
9383552
939
- if (sym.st_shndx != maps_shndx && sym.st_shndx != text_shndx) {
940
- pr_warning("Program '%s' contains non-map related relo data pointing to section %u\n",
941
- prog->section_name, sym.st_shndx);
3553
+ pr_debug("sec '%s': relo #%d: insn #%u against '%s'\n",
3554
+ relo_sec_name, i, insn_idx, sym_name);
3555
+
3556
+ prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
3557
+ if (!prog) {
3558
+ pr_warn("sec '%s': relo #%d: program not found in section '%s' for insn #%u\n",
3559
+ relo_sec_name, i, sec_name, insn_idx);
9423560 return -LIBBPF_ERRNO__RELOC;
9433561 }
9443562
945
- insn_idx = rel.r_offset / sizeof(struct bpf_insn);
946
- pr_debug("relocation: insn_idx=%u\n", insn_idx);
3563
+ relos = libbpf_reallocarray(prog->reloc_desc,
3564
+ prog->nr_reloc + 1, sizeof(*relos));
3565
+ if (!relos)
3566
+ return -ENOMEM;
3567
+ prog->reloc_desc = relos;
9473568
948
- if (insns[insn_idx].code == (BPF_JMP | BPF_CALL)) {
949
- if (insns[insn_idx].src_reg != BPF_PSEUDO_CALL) {
950
- pr_warning("incorrect bpf_call opcode\n");
951
- return -LIBBPF_ERRNO__RELOC;
952
- }
953
- prog->reloc_desc[i].type = RELO_CALL;
954
- prog->reloc_desc[i].insn_idx = insn_idx;
955
- prog->reloc_desc[i].text_off = sym.st_value;
956
- obj->has_pseudo_calls = true;
957
- continue;
958
- }
3569
+ /* adjust insn_idx to local BPF program frame of reference */
3570
+ insn_idx -= prog->sec_insn_off;
3571
+ err = bpf_program__record_reloc(prog, &relos[prog->nr_reloc],
3572
+ insn_idx, sym_name, &sym, &rel);
3573
+ if (err)
3574
+ return err;
9593575
960
- if (insns[insn_idx].code != (BPF_LD | BPF_IMM | BPF_DW)) {
961
- pr_warning("bpf: relocation: invalid relo for insns[%d].code 0x%x\n",
962
- insn_idx, insns[insn_idx].code);
963
- return -LIBBPF_ERRNO__RELOC;
964
- }
965
-
966
- /* TODO: 'maps' is sorted. We can use bsearch to make it faster. */
967
- for (map_idx = 0; map_idx < nr_maps; map_idx++) {
968
- if (maps[map_idx].offset == sym.st_value) {
969
- pr_debug("relocation: find map %zd (%s) for insn %u\n",
970
- map_idx, maps[map_idx].name, insn_idx);
971
- break;
972
- }
973
- }
974
-
975
- if (map_idx >= nr_maps) {
976
- pr_warning("bpf relocation: map_idx %d large than %d\n",
977
- (int)map_idx, (int)nr_maps - 1);
978
- return -LIBBPF_ERRNO__RELOC;
979
- }
980
-
981
- prog->reloc_desc[i].type = RELO_LD64;
982
- prog->reloc_desc[i].insn_idx = insn_idx;
983
- prog->reloc_desc[i].map_idx = map_idx;
3576
+ prog->nr_reloc++;
9843577 }
9853578 return 0;
9863579 }
9873580
988
-static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf)
3581
+static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map)
9893582 {
990
- const struct btf_type *container_type;
991
- const struct btf_member *key, *value;
9923583 struct bpf_map_def *def = &map->def;
993
- const size_t max_name = 256;
994
- char container_name[max_name];
995
- __s64 key_size, value_size;
996
- __s32 container_id;
3584
+ __u32 key_type_id = 0, value_type_id = 0;
3585
+ int ret;
9973586
998
- if (snprintf(container_name, max_name, "____btf_map_%s", map->name) ==
999
- max_name) {
1000
- pr_warning("map:%s length of '____btf_map_%s' is too long\n",
1001
- map->name, map->name);
1002
- return -EINVAL;
3587
+ /* if it's BTF-defined map, we don't need to search for type IDs.
3588
+ * For struct_ops map, it does not need btf_key_type_id and
3589
+ * btf_value_type_id.
3590
+ */
3591
+ if (map->sec_idx == obj->efile.btf_maps_shndx ||
3592
+ bpf_map__is_struct_ops(map))
3593
+ return 0;
3594
+
3595
+ if (!bpf_map__is_internal(map)) {
3596
+ ret = btf__get_map_kv_tids(obj->btf, map->name, def->key_size,
3597
+ def->value_size, &key_type_id,
3598
+ &value_type_id);
3599
+ } else {
3600
+ /*
3601
+ * LLVM annotates global data differently in BTF, that is,
3602
+ * only as '.data', '.bss' or '.rodata'.
3603
+ */
3604
+ ret = btf__find_by_name(obj->btf,
3605
+ libbpf_type_to_btf_name[map->libbpf_type]);
3606
+ }
3607
+ if (ret < 0)
3608
+ return ret;
3609
+
3610
+ map->btf_key_type_id = key_type_id;
3611
+ map->btf_value_type_id = bpf_map__is_internal(map) ?
3612
+ ret : value_type_id;
3613
+ return 0;
3614
+}
3615
+
3616
+static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info)
3617
+{
3618
+ char file[PATH_MAX], buff[4096];
3619
+ FILE *fp;
3620
+ __u32 val;
3621
+ int err;
3622
+
3623
+ snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
3624
+ memset(info, 0, sizeof(*info));
3625
+
3626
+ fp = fopen(file, "r");
3627
+ if (!fp) {
3628
+ err = -errno;
3629
+ pr_warn("failed to open %s: %d. No procfs support?\n", file,
3630
+ err);
3631
+ return err;
10033632 }
10043633
1005
- container_id = btf__find_by_name(btf, container_name);
1006
- if (container_id < 0) {
1007
- pr_debug("map:%s container_name:%s cannot be found in BTF. Missing BPF_ANNOTATE_KV_PAIR?\n",
1008
- map->name, container_name);
1009
- return container_id;
3634
+ while (fgets(buff, sizeof(buff), fp)) {
3635
+ if (sscanf(buff, "map_type:\t%u", &val) == 1)
3636
+ info->type = val;
3637
+ else if (sscanf(buff, "key_size:\t%u", &val) == 1)
3638
+ info->key_size = val;
3639
+ else if (sscanf(buff, "value_size:\t%u", &val) == 1)
3640
+ info->value_size = val;
3641
+ else if (sscanf(buff, "max_entries:\t%u", &val) == 1)
3642
+ info->max_entries = val;
3643
+ else if (sscanf(buff, "map_flags:\t%i", &val) == 1)
3644
+ info->map_flags = val;
10103645 }
10113646
1012
- container_type = btf__type_by_id(btf, container_id);
1013
- if (!container_type) {
1014
- pr_warning("map:%s cannot find BTF type for container_id:%u\n",
1015
- map->name, container_id);
1016
- return -EINVAL;
1017
- }
1018
-
1019
- if (BTF_INFO_KIND(container_type->info) != BTF_KIND_STRUCT ||
1020
- BTF_INFO_VLEN(container_type->info) < 2) {
1021
- pr_warning("map:%s container_name:%s is an invalid container struct\n",
1022
- map->name, container_name);
1023
- return -EINVAL;
1024
- }
1025
-
1026
- key = (struct btf_member *)(container_type + 1);
1027
- value = key + 1;
1028
-
1029
- key_size = btf__resolve_size(btf, key->type);
1030
- if (key_size < 0) {
1031
- pr_warning("map:%s invalid BTF key_type_size\n",
1032
- map->name);
1033
- return key_size;
1034
- }
1035
-
1036
- if (def->key_size != key_size) {
1037
- pr_warning("map:%s btf_key_type_size:%u != map_def_key_size:%u\n",
1038
- map->name, (__u32)key_size, def->key_size);
1039
- return -EINVAL;
1040
- }
1041
-
1042
- value_size = btf__resolve_size(btf, value->type);
1043
- if (value_size < 0) {
1044
- pr_warning("map:%s invalid BTF value_type_size\n", map->name);
1045
- return value_size;
1046
- }
1047
-
1048
- if (def->value_size != value_size) {
1049
- pr_warning("map:%s btf_value_type_size:%u != map_def_value_size:%u\n",
1050
- map->name, (__u32)value_size, def->value_size);
1051
- return -EINVAL;
1052
- }
1053
-
1054
- map->btf_key_type_id = key->type;
1055
- map->btf_value_type_id = value->type;
3647
+ fclose(fp);
10563648
10573649 return 0;
10583650 }
....@@ -1060,15 +3652,22 @@
10603652 int bpf_map__reuse_fd(struct bpf_map *map, int fd)
10613653 {
10623654 struct bpf_map_info info = {};
1063
- __u32 len = sizeof(info);
3655
+ __u32 len = sizeof(info), name_len;
10643656 int new_fd, err;
10653657 char *new_name;
10663658
10673659 err = bpf_obj_get_info_by_fd(fd, &info, &len);
3660
+ if (err && errno == EINVAL)
3661
+ err = bpf_get_map_info_from_fdinfo(fd, &info);
10683662 if (err)
10693663 return err;
10703664
1071
- new_name = strdup(info.name);
3665
+ name_len = strlen(info.name);
3666
+ if (name_len == BPF_OBJ_NAME_LEN - 1 && strncmp(map->name, info.name, name_len) == 0)
3667
+ new_name = strdup(map->name);
3668
+ else
3669
+ new_name = strdup(info.name);
3670
+
10723671 if (!new_name)
10733672 return -errno;
10743673
....@@ -1100,6 +3699,7 @@
11003699 map->def.map_flags = info.map_flags;
11013700 map->btf_key_type_id = info.btf_key_type_id;
11023701 map->btf_value_type_id = info.btf_value_type_id;
3702
+ map->reused = true;
11033703
11043704 return 0;
11053705
....@@ -1110,272 +3710,3102 @@
11103710 return err;
11113711 }
11123712
1113
-static int
1114
-bpf_object__create_maps(struct bpf_object *obj)
3713
+__u32 bpf_map__max_entries(const struct bpf_map *map)
11153714 {
1116
- struct bpf_create_map_attr create_attr = {};
1117
- unsigned int i;
3715
+ return map->def.max_entries;
3716
+}
3717
+
3718
+int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
3719
+{
3720
+ if (map->fd >= 0)
3721
+ return -EBUSY;
3722
+ map->def.max_entries = max_entries;
3723
+ return 0;
3724
+}
3725
+
3726
+int bpf_map__resize(struct bpf_map *map, __u32 max_entries)
3727
+{
3728
+ if (!map || !max_entries)
3729
+ return -EINVAL;
3730
+
3731
+ return bpf_map__set_max_entries(map, max_entries);
3732
+}
3733
+
3734
+static int
3735
+bpf_object__probe_loading(struct bpf_object *obj)
3736
+{
3737
+ struct bpf_load_program_attr attr;
3738
+ char *cp, errmsg[STRERR_BUFSIZE];
3739
+ struct bpf_insn insns[] = {
3740
+ BPF_MOV64_IMM(BPF_REG_0, 0),
3741
+ BPF_EXIT_INSN(),
3742
+ };
3743
+ int ret;
3744
+
3745
+ /* make sure basic loading works */
3746
+
3747
+ memset(&attr, 0, sizeof(attr));
3748
+ attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
3749
+ attr.insns = insns;
3750
+ attr.insns_cnt = ARRAY_SIZE(insns);
3751
+ attr.license = "GPL";
3752
+
3753
+ ret = bpf_load_program_xattr(&attr, NULL, 0);
3754
+ if (ret < 0) {
3755
+ ret = errno;
3756
+ cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
3757
+ pr_warn("Error in %s():%s(%d). Couldn't load trivial BPF "
3758
+ "program. Make sure your kernel supports BPF "
3759
+ "(CONFIG_BPF_SYSCALL=y) and/or that RLIMIT_MEMLOCK is "
3760
+ "set to big enough value.\n", __func__, cp, ret);
3761
+ return -ret;
3762
+ }
3763
+ close(ret);
3764
+
3765
+ return 0;
3766
+}
3767
+
3768
+static int probe_fd(int fd)
3769
+{
3770
+ if (fd >= 0)
3771
+ close(fd);
3772
+ return fd >= 0;
3773
+}
3774
+
3775
+static int probe_kern_prog_name(void)
3776
+{
3777
+ struct bpf_load_program_attr attr;
3778
+ struct bpf_insn insns[] = {
3779
+ BPF_MOV64_IMM(BPF_REG_0, 0),
3780
+ BPF_EXIT_INSN(),
3781
+ };
3782
+ int ret;
3783
+
3784
+ /* make sure loading with name works */
3785
+
3786
+ memset(&attr, 0, sizeof(attr));
3787
+ attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
3788
+ attr.insns = insns;
3789
+ attr.insns_cnt = ARRAY_SIZE(insns);
3790
+ attr.license = "GPL";
3791
+ attr.name = "test";
3792
+ ret = bpf_load_program_xattr(&attr, NULL, 0);
3793
+ return probe_fd(ret);
3794
+}
3795
+
3796
+static int probe_kern_global_data(void)
3797
+{
3798
+ struct bpf_load_program_attr prg_attr;
3799
+ struct bpf_create_map_attr map_attr;
3800
+ char *cp, errmsg[STRERR_BUFSIZE];
3801
+ struct bpf_insn insns[] = {
3802
+ BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16),
3803
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42),
3804
+ BPF_MOV64_IMM(BPF_REG_0, 0),
3805
+ BPF_EXIT_INSN(),
3806
+ };
3807
+ int ret, map;
3808
+
3809
+ memset(&map_attr, 0, sizeof(map_attr));
3810
+ map_attr.map_type = BPF_MAP_TYPE_ARRAY;
3811
+ map_attr.key_size = sizeof(int);
3812
+ map_attr.value_size = 32;
3813
+ map_attr.max_entries = 1;
3814
+
3815
+ map = bpf_create_map_xattr(&map_attr);
3816
+ if (map < 0) {
3817
+ ret = -errno;
3818
+ cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
3819
+ pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
3820
+ __func__, cp, -ret);
3821
+ return ret;
3822
+ }
3823
+
3824
+ insns[0].imm = map;
3825
+
3826
+ memset(&prg_attr, 0, sizeof(prg_attr));
3827
+ prg_attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
3828
+ prg_attr.insns = insns;
3829
+ prg_attr.insns_cnt = ARRAY_SIZE(insns);
3830
+ prg_attr.license = "GPL";
3831
+
3832
+ ret = bpf_load_program_xattr(&prg_attr, NULL, 0);
3833
+ close(map);
3834
+ return probe_fd(ret);
3835
+}
3836
+
3837
+static int probe_kern_btf(void)
3838
+{
3839
+ static const char strs[] = "\0int";
3840
+ __u32 types[] = {
3841
+ /* int */
3842
+ BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
3843
+ };
3844
+
3845
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
3846
+ strs, sizeof(strs)));
3847
+}
3848
+
3849
+static int probe_kern_btf_func(void)
3850
+{
3851
+ static const char strs[] = "\0int\0x\0a";
3852
+ /* void x(int a) {} */
3853
+ __u32 types[] = {
3854
+ /* int */
3855
+ BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
3856
+ /* FUNC_PROTO */ /* [2] */
3857
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
3858
+ BTF_PARAM_ENC(7, 1),
3859
+ /* FUNC x */ /* [3] */
3860
+ BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2),
3861
+ };
3862
+
3863
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
3864
+ strs, sizeof(strs)));
3865
+}
3866
+
3867
+static int probe_kern_btf_func_global(void)
3868
+{
3869
+ static const char strs[] = "\0int\0x\0a";
3870
+ /* static void x(int a) {} */
3871
+ __u32 types[] = {
3872
+ /* int */
3873
+ BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
3874
+ /* FUNC_PROTO */ /* [2] */
3875
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
3876
+ BTF_PARAM_ENC(7, 1),
3877
+ /* FUNC x BTF_FUNC_GLOBAL */ /* [3] */
3878
+ BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2),
3879
+ };
3880
+
3881
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
3882
+ strs, sizeof(strs)));
3883
+}
3884
+
3885
+static int probe_kern_btf_datasec(void)
3886
+{
3887
+ static const char strs[] = "\0x\0.data";
3888
+ /* static int a; */
3889
+ __u32 types[] = {
3890
+ /* int */
3891
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
3892
+ /* VAR x */ /* [2] */
3893
+ BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
3894
+ BTF_VAR_STATIC,
3895
+ /* DATASEC val */ /* [3] */
3896
+ BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
3897
+ BTF_VAR_SECINFO_ENC(2, 0, 4),
3898
+ };
3899
+
3900
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
3901
+ strs, sizeof(strs)));
3902
+}
3903
+
3904
+static int probe_kern_array_mmap(void)
3905
+{
3906
+ struct bpf_create_map_attr attr = {
3907
+ .map_type = BPF_MAP_TYPE_ARRAY,
3908
+ .map_flags = BPF_F_MMAPABLE,
3909
+ .key_size = sizeof(int),
3910
+ .value_size = sizeof(int),
3911
+ .max_entries = 1,
3912
+ };
3913
+
3914
+ return probe_fd(bpf_create_map_xattr(&attr));
3915
+}
3916
+
3917
+static int probe_kern_exp_attach_type(void)
3918
+{
3919
+ struct bpf_load_program_attr attr;
3920
+ struct bpf_insn insns[] = {
3921
+ BPF_MOV64_IMM(BPF_REG_0, 0),
3922
+ BPF_EXIT_INSN(),
3923
+ };
3924
+
3925
+ memset(&attr, 0, sizeof(attr));
3926
+ /* use any valid combination of program type and (optional)
3927
+ * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS)
3928
+ * to see if kernel supports expected_attach_type field for
3929
+ * BPF_PROG_LOAD command
3930
+ */
3931
+ attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
3932
+ attr.expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE;
3933
+ attr.insns = insns;
3934
+ attr.insns_cnt = ARRAY_SIZE(insns);
3935
+ attr.license = "GPL";
3936
+
3937
+ return probe_fd(bpf_load_program_xattr(&attr, NULL, 0));
3938
+}
3939
+
3940
+static int probe_kern_probe_read_kernel(void)
3941
+{
3942
+ struct bpf_load_program_attr attr;
3943
+ struct bpf_insn insns[] = {
3944
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), /* r1 = r10 (fp) */
3945
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), /* r1 += -8 */
3946
+ BPF_MOV64_IMM(BPF_REG_2, 8), /* r2 = 8 */
3947
+ BPF_MOV64_IMM(BPF_REG_3, 0), /* r3 = 0 */
3948
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel),
3949
+ BPF_EXIT_INSN(),
3950
+ };
3951
+
3952
+ memset(&attr, 0, sizeof(attr));
3953
+ attr.prog_type = BPF_PROG_TYPE_KPROBE;
3954
+ attr.insns = insns;
3955
+ attr.insns_cnt = ARRAY_SIZE(insns);
3956
+ attr.license = "GPL";
3957
+
3958
+ return probe_fd(bpf_load_program_xattr(&attr, NULL, 0));
3959
+}
3960
+
3961
+static int probe_prog_bind_map(void)
3962
+{
3963
+ struct bpf_load_program_attr prg_attr;
3964
+ struct bpf_create_map_attr map_attr;
3965
+ char *cp, errmsg[STRERR_BUFSIZE];
3966
+ struct bpf_insn insns[] = {
3967
+ BPF_MOV64_IMM(BPF_REG_0, 0),
3968
+ BPF_EXIT_INSN(),
3969
+ };
3970
+ int ret, map, prog;
3971
+
3972
+ memset(&map_attr, 0, sizeof(map_attr));
3973
+ map_attr.map_type = BPF_MAP_TYPE_ARRAY;
3974
+ map_attr.key_size = sizeof(int);
3975
+ map_attr.value_size = 32;
3976
+ map_attr.max_entries = 1;
3977
+
3978
+ map = bpf_create_map_xattr(&map_attr);
3979
+ if (map < 0) {
3980
+ ret = -errno;
3981
+ cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
3982
+ pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
3983
+ __func__, cp, -ret);
3984
+ return ret;
3985
+ }
3986
+
3987
+ memset(&prg_attr, 0, sizeof(prg_attr));
3988
+ prg_attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
3989
+ prg_attr.insns = insns;
3990
+ prg_attr.insns_cnt = ARRAY_SIZE(insns);
3991
+ prg_attr.license = "GPL";
3992
+
3993
+ prog = bpf_load_program_xattr(&prg_attr, NULL, 0);
3994
+ if (prog < 0) {
3995
+ close(map);
3996
+ return 0;
3997
+ }
3998
+
3999
+ ret = bpf_prog_bind_map(prog, map, NULL);
4000
+
4001
+ close(map);
4002
+ close(prog);
4003
+
4004
+ return ret >= 0;
4005
+}
4006
+
4007
+enum kern_feature_result {
4008
+ FEAT_UNKNOWN = 0,
4009
+ FEAT_SUPPORTED = 1,
4010
+ FEAT_MISSING = 2,
4011
+};
4012
+
4013
+typedef int (*feature_probe_fn)(void);
4014
+
4015
+static struct kern_feature_desc {
4016
+ const char *desc;
4017
+ feature_probe_fn probe;
4018
+ enum kern_feature_result res;
4019
+} feature_probes[__FEAT_CNT] = {
4020
+ [FEAT_PROG_NAME] = {
4021
+ "BPF program name", probe_kern_prog_name,
4022
+ },
4023
+ [FEAT_GLOBAL_DATA] = {
4024
+ "global variables", probe_kern_global_data,
4025
+ },
4026
+ [FEAT_BTF] = {
4027
+ "minimal BTF", probe_kern_btf,
4028
+ },
4029
+ [FEAT_BTF_FUNC] = {
4030
+ "BTF functions", probe_kern_btf_func,
4031
+ },
4032
+ [FEAT_BTF_GLOBAL_FUNC] = {
4033
+ "BTF global function", probe_kern_btf_func_global,
4034
+ },
4035
+ [FEAT_BTF_DATASEC] = {
4036
+ "BTF data section and variable", probe_kern_btf_datasec,
4037
+ },
4038
+ [FEAT_ARRAY_MMAP] = {
4039
+ "ARRAY map mmap()", probe_kern_array_mmap,
4040
+ },
4041
+ [FEAT_EXP_ATTACH_TYPE] = {
4042
+ "BPF_PROG_LOAD expected_attach_type attribute",
4043
+ probe_kern_exp_attach_type,
4044
+ },
4045
+ [FEAT_PROBE_READ_KERN] = {
4046
+ "bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel,
4047
+ },
4048
+ [FEAT_PROG_BIND_MAP] = {
4049
+ "BPF_PROG_BIND_MAP support", probe_prog_bind_map,
4050
+ }
4051
+};
4052
+
4053
+static bool kernel_supports(enum kern_feature_id feat_id)
4054
+{
4055
+ struct kern_feature_desc *feat = &feature_probes[feat_id];
4056
+ int ret;
4057
+
4058
+ if (READ_ONCE(feat->res) == FEAT_UNKNOWN) {
4059
+ ret = feat->probe();
4060
+ if (ret > 0) {
4061
+ WRITE_ONCE(feat->res, FEAT_SUPPORTED);
4062
+ } else if (ret == 0) {
4063
+ WRITE_ONCE(feat->res, FEAT_MISSING);
4064
+ } else {
4065
+ pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret);
4066
+ WRITE_ONCE(feat->res, FEAT_MISSING);
4067
+ }
4068
+ }
4069
+
4070
+ return READ_ONCE(feat->res) == FEAT_SUPPORTED;
4071
+}
4072
+
4073
+static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
4074
+{
4075
+ struct bpf_map_info map_info = {};
4076
+ char msg[STRERR_BUFSIZE];
4077
+ __u32 map_info_len;
11184078 int err;
11194079
1120
- for (i = 0; i < obj->nr_maps; i++) {
1121
- struct bpf_map *map = &obj->maps[i];
1122
- struct bpf_map_def *def = &map->def;
1123
- char *cp, errmsg[STRERR_BUFSIZE];
1124
- int *pfd = &map->fd;
4080
+ map_info_len = sizeof(map_info);
11254081
1126
- if (map->fd >= 0) {
1127
- pr_debug("skip map create (preset) %s: fd=%d\n",
1128
- map->name, map->fd);
1129
- continue;
4082
+ err = bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len);
4083
+ if (err && errno == EINVAL)
4084
+ err = bpf_get_map_info_from_fdinfo(map_fd, &map_info);
4085
+ if (err) {
4086
+ pr_warn("failed to get map info for map FD %d: %s\n", map_fd,
4087
+ libbpf_strerror_r(errno, msg, sizeof(msg)));
4088
+ return false;
4089
+ }
4090
+
4091
+ return (map_info.type == map->def.type &&
4092
+ map_info.key_size == map->def.key_size &&
4093
+ map_info.value_size == map->def.value_size &&
4094
+ map_info.max_entries == map->def.max_entries &&
4095
+ map_info.map_flags == map->def.map_flags);
4096
+}
4097
+
4098
+static int
4099
+bpf_object__reuse_map(struct bpf_map *map)
4100
+{
4101
+ char *cp, errmsg[STRERR_BUFSIZE];
4102
+ int err, pin_fd;
4103
+
4104
+ pin_fd = bpf_obj_get(map->pin_path);
4105
+ if (pin_fd < 0) {
4106
+ err = -errno;
4107
+ if (err == -ENOENT) {
4108
+ pr_debug("found no pinned map to reuse at '%s'\n",
4109
+ map->pin_path);
4110
+ return 0;
11304111 }
11314112
4113
+ cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
4114
+ pr_warn("couldn't retrieve pinned map '%s': %s\n",
4115
+ map->pin_path, cp);
4116
+ return err;
4117
+ }
4118
+
4119
+ if (!map_is_reuse_compat(map, pin_fd)) {
4120
+ pr_warn("couldn't reuse pinned map at '%s': parameter mismatch\n",
4121
+ map->pin_path);
4122
+ close(pin_fd);
4123
+ return -EINVAL;
4124
+ }
4125
+
4126
+ err = bpf_map__reuse_fd(map, pin_fd);
4127
+ if (err) {
4128
+ close(pin_fd);
4129
+ return err;
4130
+ }
4131
+ map->pinned = true;
4132
+ pr_debug("reused pinned map at '%s'\n", map->pin_path);
4133
+
4134
+ return 0;
4135
+}
4136
+
4137
+static int
4138
+bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
4139
+{
4140
+ enum libbpf_map_type map_type = map->libbpf_type;
4141
+ char *cp, errmsg[STRERR_BUFSIZE];
4142
+ int err, zero = 0;
4143
+
4144
+ err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0);
4145
+ if (err) {
4146
+ err = -errno;
4147
+ cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
4148
+ pr_warn("Error setting initial map(%s) contents: %s\n",
4149
+ map->name, cp);
4150
+ return err;
4151
+ }
4152
+
4153
+ /* Freeze .rodata and .kconfig map as read-only from syscall side. */
4154
+ if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG) {
4155
+ err = bpf_map_freeze(map->fd);
4156
+ if (err) {
4157
+ err = -errno;
4158
+ cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
4159
+ pr_warn("Error freezing map(%s) as read-only: %s\n",
4160
+ map->name, cp);
4161
+ return err;
4162
+ }
4163
+ }
4164
+ return 0;
4165
+}
4166
+
4167
+static void bpf_map__destroy(struct bpf_map *map);
4168
+
4169
+static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map)
4170
+{
4171
+ struct bpf_create_map_attr create_attr;
4172
+ struct bpf_map_def *def = &map->def;
4173
+ int err = 0;
4174
+
4175
+ memset(&create_attr, 0, sizeof(create_attr));
4176
+
4177
+ if (kernel_supports(FEAT_PROG_NAME))
11324178 create_attr.name = map->name;
1133
- create_attr.map_ifindex = map->map_ifindex;
1134
- create_attr.map_type = def->type;
1135
- create_attr.map_flags = def->map_flags;
1136
- create_attr.key_size = def->key_size;
1137
- create_attr.value_size = def->value_size;
4179
+ create_attr.map_ifindex = map->map_ifindex;
4180
+ create_attr.map_type = def->type;
4181
+ create_attr.map_flags = def->map_flags;
4182
+ create_attr.key_size = def->key_size;
4183
+ create_attr.value_size = def->value_size;
4184
+ create_attr.numa_node = map->numa_node;
4185
+
4186
+ if (def->type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !def->max_entries) {
4187
+ int nr_cpus;
4188
+
4189
+ nr_cpus = libbpf_num_possible_cpus();
4190
+ if (nr_cpus < 0) {
4191
+ pr_warn("map '%s': failed to determine number of system CPUs: %d\n",
4192
+ map->name, nr_cpus);
4193
+ return nr_cpus;
4194
+ }
4195
+ pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus);
4196
+ create_attr.max_entries = nr_cpus;
4197
+ } else {
11384198 create_attr.max_entries = def->max_entries;
4199
+ }
4200
+
4201
+ if (bpf_map__is_struct_ops(map))
4202
+ create_attr.btf_vmlinux_value_type_id =
4203
+ map->btf_vmlinux_value_type_id;
4204
+
4205
+ create_attr.btf_fd = 0;
4206
+ create_attr.btf_key_type_id = 0;
4207
+ create_attr.btf_value_type_id = 0;
4208
+ if (obj->btf && btf__fd(obj->btf) >= 0 && !bpf_map_find_btf_info(obj, map)) {
4209
+ create_attr.btf_fd = btf__fd(obj->btf);
4210
+ create_attr.btf_key_type_id = map->btf_key_type_id;
4211
+ create_attr.btf_value_type_id = map->btf_value_type_id;
4212
+ }
4213
+
4214
+ if (bpf_map_type__is_map_in_map(def->type)) {
4215
+ if (map->inner_map) {
4216
+ err = bpf_object__create_map(obj, map->inner_map);
4217
+ if (err) {
4218
+ pr_warn("map '%s': failed to create inner map: %d\n",
4219
+ map->name, err);
4220
+ return err;
4221
+ }
4222
+ map->inner_map_fd = bpf_map__fd(map->inner_map);
4223
+ }
4224
+ if (map->inner_map_fd >= 0)
4225
+ create_attr.inner_map_fd = map->inner_map_fd;
4226
+ }
4227
+
4228
+ map->fd = bpf_create_map_xattr(&create_attr);
4229
+ if (map->fd < 0 && (create_attr.btf_key_type_id ||
4230
+ create_attr.btf_value_type_id)) {
4231
+ char *cp, errmsg[STRERR_BUFSIZE];
4232
+
4233
+ err = -errno;
4234
+ cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
4235
+ pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
4236
+ map->name, cp, err);
11394237 create_attr.btf_fd = 0;
11404238 create_attr.btf_key_type_id = 0;
11414239 create_attr.btf_value_type_id = 0;
4240
+ map->btf_key_type_id = 0;
4241
+ map->btf_value_type_id = 0;
4242
+ map->fd = bpf_create_map_xattr(&create_attr);
4243
+ }
11424244
1143
- if (obj->btf && !bpf_map_find_btf_info(map, obj->btf)) {
1144
- create_attr.btf_fd = btf__fd(obj->btf);
1145
- create_attr.btf_key_type_id = map->btf_key_type_id;
1146
- create_attr.btf_value_type_id = map->btf_value_type_id;
1147
- }
4245
+ err = map->fd < 0 ? -errno : 0;
11484246
1149
- *pfd = bpf_create_map_xattr(&create_attr);
1150
- if (*pfd < 0 && create_attr.btf_key_type_id) {
1151
- cp = str_error(errno, errmsg, sizeof(errmsg));
1152
- pr_warning("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
1153
- map->name, cp, errno);
1154
- create_attr.btf_fd = 0;
1155
- create_attr.btf_key_type_id = 0;
1156
- create_attr.btf_value_type_id = 0;
1157
- map->btf_key_type_id = 0;
1158
- map->btf_value_type_id = 0;
1159
- *pfd = bpf_create_map_xattr(&create_attr);
1160
- }
4247
+ if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) {
4248
+ bpf_map__destroy(map->inner_map);
4249
+ zfree(&map->inner_map);
4250
+ }
11614251
1162
- if (*pfd < 0) {
1163
- size_t j;
4252
+ return err;
4253
+}
11644254
1165
- err = *pfd;
1166
- cp = str_error(errno, errmsg, sizeof(errmsg));
1167
- pr_warning("failed to create map (name: '%s'): %s\n",
1168
- map->name, cp);
1169
- for (j = 0; j < i; j++)
1170
- zclose(obj->maps[j].fd);
4255
+static int init_map_slots(struct bpf_map *map)
4256
+{
4257
+ const struct bpf_map *targ_map;
4258
+ unsigned int i;
4259
+ int fd, err;
4260
+
4261
+ for (i = 0; i < map->init_slots_sz; i++) {
4262
+ if (!map->init_slots[i])
4263
+ continue;
4264
+
4265
+ targ_map = map->init_slots[i];
4266
+ fd = bpf_map__fd(targ_map);
4267
+ err = bpf_map_update_elem(map->fd, &i, &fd, 0);
4268
+ if (err) {
4269
+ err = -errno;
4270
+ pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n",
4271
+ map->name, i, targ_map->name,
4272
+ fd, err);
11714273 return err;
11724274 }
1173
- pr_debug("create map %s: fd=%d\n", map->name, *pfd);
4275
+ pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n",
4276
+ map->name, i, targ_map->name, fd);
11744277 }
4278
+
4279
+ zfree(&map->init_slots);
4280
+ map->init_slots_sz = 0;
11754281
11764282 return 0;
11774283 }
11784284
11794285 static int
1180
-bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj,
1181
- struct reloc_desc *relo)
4286
+bpf_object__create_maps(struct bpf_object *obj)
11824287 {
1183
- struct bpf_insn *insn, *new_insn;
1184
- struct bpf_program *text;
1185
- size_t new_cnt;
4288
+ struct bpf_map *map;
4289
+ char *cp, errmsg[STRERR_BUFSIZE];
4290
+ unsigned int i, j;
4291
+ int err;
4292
+ bool retried;
11864293
1187
- if (relo->type != RELO_CALL)
1188
- return -LIBBPF_ERRNO__RELOC;
4294
+ for (i = 0; i < obj->nr_maps; i++) {
4295
+ map = &obj->maps[i];
11894296
1190
- if (prog->idx == obj->efile.text_shndx) {
1191
- pr_warning("relo in .text insn %d into off %d\n",
1192
- relo->insn_idx, relo->text_off);
1193
- return -LIBBPF_ERRNO__RELOC;
4297
+ retried = false;
4298
+retry:
4299
+ if (map->pin_path) {
4300
+ err = bpf_object__reuse_map(map);
4301
+ if (err) {
4302
+ pr_warn("map '%s': error reusing pinned map\n",
4303
+ map->name);
4304
+ goto err_out;
4305
+ }
4306
+ if (retried && map->fd < 0) {
4307
+ pr_warn("map '%s': cannot find pinned map\n",
4308
+ map->name);
4309
+ err = -ENOENT;
4310
+ goto err_out;
4311
+ }
4312
+ }
4313
+
4314
+ if (map->fd >= 0) {
4315
+ pr_debug("map '%s': skipping creation (preset fd=%d)\n",
4316
+ map->name, map->fd);
4317
+ } else {
4318
+ err = bpf_object__create_map(obj, map);
4319
+ if (err)
4320
+ goto err_out;
4321
+
4322
+ pr_debug("map '%s': created successfully, fd=%d\n",
4323
+ map->name, map->fd);
4324
+
4325
+ if (bpf_map__is_internal(map)) {
4326
+ err = bpf_object__populate_internal_map(obj, map);
4327
+ if (err < 0) {
4328
+ zclose(map->fd);
4329
+ goto err_out;
4330
+ }
4331
+ }
4332
+
4333
+ if (map->init_slots_sz) {
4334
+ err = init_map_slots(map);
4335
+ if (err < 0) {
4336
+ zclose(map->fd);
4337
+ goto err_out;
4338
+ }
4339
+ }
4340
+ }
4341
+
4342
+ if (map->pin_path && !map->pinned) {
4343
+ err = bpf_map__pin(map, NULL);
4344
+ if (err) {
4345
+ zclose(map->fd);
4346
+ if (!retried && err == -EEXIST) {
4347
+ retried = true;
4348
+ goto retry;
4349
+ }
4350
+ pr_warn("map '%s': failed to auto-pin at '%s': %d\n",
4351
+ map->name, map->pin_path, err);
4352
+ goto err_out;
4353
+ }
4354
+ }
11944355 }
11954356
1196
- if (prog->main_prog_cnt == 0) {
1197
- text = bpf_object__find_prog_by_idx(obj, obj->efile.text_shndx);
1198
- if (!text) {
1199
- pr_warning("no .text section found yet relo into text exist\n");
1200
- return -LIBBPF_ERRNO__RELOC;
1201
- }
1202
- new_cnt = prog->insns_cnt + text->insns_cnt;
1203
- new_insn = reallocarray(prog->insns, new_cnt, sizeof(*insn));
1204
- if (!new_insn) {
1205
- pr_warning("oom in prog realloc\n");
1206
- return -ENOMEM;
1207
- }
1208
- memcpy(new_insn + prog->insns_cnt, text->insns,
1209
- text->insns_cnt * sizeof(*insn));
1210
- prog->insns = new_insn;
1211
- prog->main_prog_cnt = prog->insns_cnt;
1212
- prog->insns_cnt = new_cnt;
1213
- pr_debug("added %zd insn from %s to prog %s\n",
1214
- text->insns_cnt, text->section_name,
1215
- prog->section_name);
4357
+ return 0;
4358
+
4359
+err_out:
4360
+ cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
4361
+ pr_warn("map '%s': failed to create: %s(%d)\n", map->name, cp, err);
4362
+ pr_perm_msg(err);
4363
+ for (j = 0; j < i; j++)
4364
+ zclose(obj->maps[j].fd);
4365
+ return err;
4366
+}
4367
+
4368
+#define BPF_CORE_SPEC_MAX_LEN 64
4369
+
4370
+/* represents BPF CO-RE field or array element accessor */
4371
+struct bpf_core_accessor {
4372
+ __u32 type_id; /* struct/union type or array element type */
4373
+ __u32 idx; /* field index or array index */
4374
+ const char *name; /* field name or NULL for array accessor */
4375
+};
4376
+
4377
+struct bpf_core_spec {
4378
+ const struct btf *btf;
4379
+ /* high-level spec: named fields and array indices only */
4380
+ struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN];
4381
+ /* original unresolved (no skip_mods_or_typedefs) root type ID */
4382
+ __u32 root_type_id;
4383
+ /* CO-RE relocation kind */
4384
+ enum bpf_core_relo_kind relo_kind;
4385
+ /* high-level spec length */
4386
+ int len;
4387
+ /* raw, low-level spec: 1-to-1 with accessor spec string */
4388
+ int raw_spec[BPF_CORE_SPEC_MAX_LEN];
4389
+ /* raw spec length */
4390
+ int raw_len;
4391
+ /* field bit offset represented by spec */
4392
+ __u32 bit_offset;
4393
+};
4394
+
4395
+static bool str_is_empty(const char *s)
4396
+{
4397
+ return !s || !s[0];
4398
+}
4399
+
4400
+static bool is_flex_arr(const struct btf *btf,
4401
+ const struct bpf_core_accessor *acc,
4402
+ const struct btf_array *arr)
4403
+{
4404
+ const struct btf_type *t;
4405
+
4406
+ /* not a flexible array, if not inside a struct or has non-zero size */
4407
+ if (!acc->name || arr->nelems > 0)
4408
+ return false;
4409
+
4410
+ /* has to be the last member of enclosing struct */
4411
+ t = btf__type_by_id(btf, acc->type_id);
4412
+ return acc->idx == btf_vlen(t) - 1;
4413
+}
4414
+
4415
+static const char *core_relo_kind_str(enum bpf_core_relo_kind kind)
4416
+{
4417
+ switch (kind) {
4418
+ case BPF_FIELD_BYTE_OFFSET: return "byte_off";
4419
+ case BPF_FIELD_BYTE_SIZE: return "byte_sz";
4420
+ case BPF_FIELD_EXISTS: return "field_exists";
4421
+ case BPF_FIELD_SIGNED: return "signed";
4422
+ case BPF_FIELD_LSHIFT_U64: return "lshift_u64";
4423
+ case BPF_FIELD_RSHIFT_U64: return "rshift_u64";
4424
+ case BPF_TYPE_ID_LOCAL: return "local_type_id";
4425
+ case BPF_TYPE_ID_TARGET: return "target_type_id";
4426
+ case BPF_TYPE_EXISTS: return "type_exists";
4427
+ case BPF_TYPE_SIZE: return "type_size";
4428
+ case BPF_ENUMVAL_EXISTS: return "enumval_exists";
4429
+ case BPF_ENUMVAL_VALUE: return "enumval_value";
4430
+ default: return "unknown";
12164431 }
1217
- insn = &prog->insns[relo->insn_idx];
1218
- insn->imm += prog->main_prog_cnt - relo->insn_idx;
4432
+}
4433
+
4434
+static bool core_relo_is_field_based(enum bpf_core_relo_kind kind)
4435
+{
4436
+ switch (kind) {
4437
+ case BPF_FIELD_BYTE_OFFSET:
4438
+ case BPF_FIELD_BYTE_SIZE:
4439
+ case BPF_FIELD_EXISTS:
4440
+ case BPF_FIELD_SIGNED:
4441
+ case BPF_FIELD_LSHIFT_U64:
4442
+ case BPF_FIELD_RSHIFT_U64:
4443
+ return true;
4444
+ default:
4445
+ return false;
4446
+ }
4447
+}
4448
+
4449
+static bool core_relo_is_type_based(enum bpf_core_relo_kind kind)
4450
+{
4451
+ switch (kind) {
4452
+ case BPF_TYPE_ID_LOCAL:
4453
+ case BPF_TYPE_ID_TARGET:
4454
+ case BPF_TYPE_EXISTS:
4455
+ case BPF_TYPE_SIZE:
4456
+ return true;
4457
+ default:
4458
+ return false;
4459
+ }
4460
+}
4461
+
4462
+static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind)
4463
+{
4464
+ switch (kind) {
4465
+ case BPF_ENUMVAL_EXISTS:
4466
+ case BPF_ENUMVAL_VALUE:
4467
+ return true;
4468
+ default:
4469
+ return false;
4470
+ }
4471
+}
4472
+
4473
+/*
4474
+ * Turn bpf_core_relo into a low- and high-level spec representation,
4475
+ * validating correctness along the way, as well as calculating resulting
4476
+ * field bit offset, specified by accessor string. Low-level spec captures
4477
+ * every single level of nestedness, including traversing anonymous
4478
+ * struct/union members. High-level one only captures semantically meaningful
4479
+ * "turning points": named fields and array indicies.
4480
+ * E.g., for this case:
4481
+ *
4482
+ * struct sample {
4483
+ * int __unimportant;
4484
+ * struct {
4485
+ * int __1;
4486
+ * int __2;
4487
+ * int a[7];
4488
+ * };
4489
+ * };
4490
+ *
4491
+ * struct sample *s = ...;
4492
+ *
4493
+ * int x = &s->a[3]; // access string = '0:1:2:3'
4494
+ *
4495
+ * Low-level spec has 1:1 mapping with each element of access string (it's
4496
+ * just a parsed access string representation): [0, 1, 2, 3].
4497
+ *
4498
+ * High-level spec will capture only 3 points:
4499
+ * - intial zero-index access by pointer (&s->... is the same as &s[0]...);
4500
+ * - field 'a' access (corresponds to '2' in low-level spec);
4501
+ * - array element #3 access (corresponds to '3' in low-level spec).
4502
+ *
4503
+ * Type-based relocations (TYPE_EXISTS/TYPE_SIZE,
4504
+ * TYPE_ID_LOCAL/TYPE_ID_TARGET) don't capture any field information. Their
4505
+ * spec and raw_spec are kept empty.
4506
+ *
4507
+ * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access
4508
+ * string to specify enumerator's value index that need to be relocated.
4509
+ */
4510
+static int bpf_core_parse_spec(const struct btf *btf,
4511
+ __u32 type_id,
4512
+ const char *spec_str,
4513
+ enum bpf_core_relo_kind relo_kind,
4514
+ struct bpf_core_spec *spec)
4515
+{
4516
+ int access_idx, parsed_len, i;
4517
+ struct bpf_core_accessor *acc;
4518
+ const struct btf_type *t;
4519
+ const char *name;
4520
+ __u32 id;
4521
+ __s64 sz;
4522
+
4523
+ if (str_is_empty(spec_str) || *spec_str == ':')
4524
+ return -EINVAL;
4525
+
4526
+ memset(spec, 0, sizeof(*spec));
4527
+ spec->btf = btf;
4528
+ spec->root_type_id = type_id;
4529
+ spec->relo_kind = relo_kind;
4530
+
4531
+ /* type-based relocations don't have a field access string */
4532
+ if (core_relo_is_type_based(relo_kind)) {
4533
+ if (strcmp(spec_str, "0"))
4534
+ return -EINVAL;
4535
+ return 0;
4536
+ }
4537
+
4538
+ /* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */
4539
+ while (*spec_str) {
4540
+ if (*spec_str == ':')
4541
+ ++spec_str;
4542
+ if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1)
4543
+ return -EINVAL;
4544
+ if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
4545
+ return -E2BIG;
4546
+ spec_str += parsed_len;
4547
+ spec->raw_spec[spec->raw_len++] = access_idx;
4548
+ }
4549
+
4550
+ if (spec->raw_len == 0)
4551
+ return -EINVAL;
4552
+
4553
+ t = skip_mods_and_typedefs(btf, type_id, &id);
4554
+ if (!t)
4555
+ return -EINVAL;
4556
+
4557
+ access_idx = spec->raw_spec[0];
4558
+ acc = &spec->spec[0];
4559
+ acc->type_id = id;
4560
+ acc->idx = access_idx;
4561
+ spec->len++;
4562
+
4563
+ if (core_relo_is_enumval_based(relo_kind)) {
4564
+ if (!btf_is_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t))
4565
+ return -EINVAL;
4566
+
4567
+ /* record enumerator name in a first accessor */
4568
+ acc->name = btf__name_by_offset(btf, btf_enum(t)[access_idx].name_off);
4569
+ return 0;
4570
+ }
4571
+
4572
+ if (!core_relo_is_field_based(relo_kind))
4573
+ return -EINVAL;
4574
+
4575
+ sz = btf__resolve_size(btf, id);
4576
+ if (sz < 0)
4577
+ return sz;
4578
+ spec->bit_offset = access_idx * sz * 8;
4579
+
4580
+ for (i = 1; i < spec->raw_len; i++) {
4581
+ t = skip_mods_and_typedefs(btf, id, &id);
4582
+ if (!t)
4583
+ return -EINVAL;
4584
+
4585
+ access_idx = spec->raw_spec[i];
4586
+ acc = &spec->spec[spec->len];
4587
+
4588
+ if (btf_is_composite(t)) {
4589
+ const struct btf_member *m;
4590
+ __u32 bit_offset;
4591
+
4592
+ if (access_idx >= btf_vlen(t))
4593
+ return -EINVAL;
4594
+
4595
+ bit_offset = btf_member_bit_offset(t, access_idx);
4596
+ spec->bit_offset += bit_offset;
4597
+
4598
+ m = btf_members(t) + access_idx;
4599
+ if (m->name_off) {
4600
+ name = btf__name_by_offset(btf, m->name_off);
4601
+ if (str_is_empty(name))
4602
+ return -EINVAL;
4603
+
4604
+ acc->type_id = id;
4605
+ acc->idx = access_idx;
4606
+ acc->name = name;
4607
+ spec->len++;
4608
+ }
4609
+
4610
+ id = m->type;
4611
+ } else if (btf_is_array(t)) {
4612
+ const struct btf_array *a = btf_array(t);
4613
+ bool flex;
4614
+
4615
+ t = skip_mods_and_typedefs(btf, a->type, &id);
4616
+ if (!t)
4617
+ return -EINVAL;
4618
+
4619
+ flex = is_flex_arr(btf, acc - 1, a);
4620
+ if (!flex && access_idx >= a->nelems)
4621
+ return -EINVAL;
4622
+
4623
+ spec->spec[spec->len].type_id = id;
4624
+ spec->spec[spec->len].idx = access_idx;
4625
+ spec->len++;
4626
+
4627
+ sz = btf__resolve_size(btf, id);
4628
+ if (sz < 0)
4629
+ return sz;
4630
+ spec->bit_offset += access_idx * sz * 8;
4631
+ } else {
4632
+ pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n",
4633
+ type_id, spec_str, i, id, btf_kind_str(t));
4634
+ return -EINVAL;
4635
+ }
4636
+ }
4637
+
12194638 return 0;
12204639 }
12214640
1222
-static int
1223
-bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj)
4641
+static bool bpf_core_is_flavor_sep(const char *s)
12244642 {
1225
- int i, err;
4643
+ /* check X___Y name pattern, where X and Y are not underscores */
4644
+ return s[0] != '_' && /* X */
4645
+ s[1] == '_' && s[2] == '_' && s[3] == '_' && /* ___ */
4646
+ s[4] != '_'; /* Y */
4647
+}
12264648
1227
- if (!prog || !prog->reloc_desc)
4649
+/* Given 'some_struct_name___with_flavor' return the length of a name prefix
4650
+ * before last triple underscore. Struct name part after last triple
4651
+ * underscore is ignored by BPF CO-RE relocation during relocation matching.
4652
+ */
4653
+static size_t bpf_core_essential_name_len(const char *name)
4654
+{
4655
+ size_t n = strlen(name);
4656
+ int i;
4657
+
4658
+ for (i = n - 5; i >= 0; i--) {
4659
+ if (bpf_core_is_flavor_sep(name + i))
4660
+ return i + 1;
4661
+ }
4662
+ return n;
4663
+}
4664
+
4665
+/* dynamically sized list of type IDs */
4666
+struct ids_vec {
4667
+ __u32 *data;
4668
+ int len;
4669
+};
4670
+
4671
+static void bpf_core_free_cands(struct ids_vec *cand_ids)
4672
+{
4673
+ free(cand_ids->data);
4674
+ free(cand_ids);
4675
+}
4676
+
4677
+static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf,
4678
+ __u32 local_type_id,
4679
+ const struct btf *targ_btf)
4680
+{
4681
+ size_t local_essent_len, targ_essent_len;
4682
+ const char *local_name, *targ_name;
4683
+ const struct btf_type *t, *local_t;
4684
+ struct ids_vec *cand_ids;
4685
+ __u32 *new_ids;
4686
+ int i, err, n;
4687
+
4688
+ local_t = btf__type_by_id(local_btf, local_type_id);
4689
+ if (!local_t)
4690
+ return ERR_PTR(-EINVAL);
4691
+
4692
+ local_name = btf__name_by_offset(local_btf, local_t->name_off);
4693
+ if (str_is_empty(local_name))
4694
+ return ERR_PTR(-EINVAL);
4695
+ local_essent_len = bpf_core_essential_name_len(local_name);
4696
+
4697
+ cand_ids = calloc(1, sizeof(*cand_ids));
4698
+ if (!cand_ids)
4699
+ return ERR_PTR(-ENOMEM);
4700
+
4701
+ n = btf__get_nr_types(targ_btf);
4702
+ for (i = 1; i <= n; i++) {
4703
+ t = btf__type_by_id(targ_btf, i);
4704
+ if (btf_kind(t) != btf_kind(local_t))
4705
+ continue;
4706
+
4707
+ targ_name = btf__name_by_offset(targ_btf, t->name_off);
4708
+ if (str_is_empty(targ_name))
4709
+ continue;
4710
+
4711
+ targ_essent_len = bpf_core_essential_name_len(targ_name);
4712
+ if (targ_essent_len != local_essent_len)
4713
+ continue;
4714
+
4715
+ if (strncmp(local_name, targ_name, local_essent_len) == 0) {
4716
+ pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s\n",
4717
+ local_type_id, btf_kind_str(local_t),
4718
+ local_name, i, btf_kind_str(t), targ_name);
4719
+ new_ids = libbpf_reallocarray(cand_ids->data,
4720
+ cand_ids->len + 1,
4721
+ sizeof(*cand_ids->data));
4722
+ if (!new_ids) {
4723
+ err = -ENOMEM;
4724
+ goto err_out;
4725
+ }
4726
+ cand_ids->data = new_ids;
4727
+ cand_ids->data[cand_ids->len++] = i;
4728
+ }
4729
+ }
4730
+ return cand_ids;
4731
+err_out:
4732
+ bpf_core_free_cands(cand_ids);
4733
+ return ERR_PTR(err);
4734
+}
4735
+
4736
+/* Check two types for compatibility for the purpose of field access
4737
+ * relocation. const/volatile/restrict and typedefs are skipped to ensure we
4738
+ * are relocating semantically compatible entities:
4739
+ * - any two STRUCTs/UNIONs are compatible and can be mixed;
4740
+ * - any two FWDs are compatible, if their names match (modulo flavor suffix);
4741
+ * - any two PTRs are always compatible;
4742
+ * - for ENUMs, names should be the same (ignoring flavor suffix) or at
4743
+ * least one of enums should be anonymous;
4744
+ * - for ENUMs, check sizes, names are ignored;
4745
+ * - for INT, size and signedness are ignored;
4746
+ * - for ARRAY, dimensionality is ignored, element types are checked for
4747
+ * compatibility recursively;
4748
+ * - everything else shouldn't be ever a target of relocation.
4749
+ * These rules are not set in stone and probably will be adjusted as we get
4750
+ * more experience with using BPF CO-RE relocations.
4751
+ */
4752
+static int bpf_core_fields_are_compat(const struct btf *local_btf,
4753
+ __u32 local_id,
4754
+ const struct btf *targ_btf,
4755
+ __u32 targ_id)
4756
+{
4757
+ const struct btf_type *local_type, *targ_type;
4758
+
4759
+recur:
4760
+ local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
4761
+ targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
4762
+ if (!local_type || !targ_type)
4763
+ return -EINVAL;
4764
+
4765
+ if (btf_is_composite(local_type) && btf_is_composite(targ_type))
4766
+ return 1;
4767
+ if (btf_kind(local_type) != btf_kind(targ_type))
12284768 return 0;
12294769
1230
- for (i = 0; i < prog->nr_reloc; i++) {
1231
- if (prog->reloc_desc[i].type == RELO_LD64) {
1232
- struct bpf_insn *insns = prog->insns;
1233
- int insn_idx, map_idx;
4770
+ switch (btf_kind(local_type)) {
4771
+ case BTF_KIND_PTR:
4772
+ return 1;
4773
+ case BTF_KIND_FWD:
4774
+ case BTF_KIND_ENUM: {
4775
+ const char *local_name, *targ_name;
4776
+ size_t local_len, targ_len;
12344777
1235
- insn_idx = prog->reloc_desc[i].insn_idx;
1236
- map_idx = prog->reloc_desc[i].map_idx;
4778
+ local_name = btf__name_by_offset(local_btf,
4779
+ local_type->name_off);
4780
+ targ_name = btf__name_by_offset(targ_btf, targ_type->name_off);
4781
+ local_len = bpf_core_essential_name_len(local_name);
4782
+ targ_len = bpf_core_essential_name_len(targ_name);
4783
+ /* one of them is anonymous or both w/ same flavor-less names */
4784
+ return local_len == 0 || targ_len == 0 ||
4785
+ (local_len == targ_len &&
4786
+ strncmp(local_name, targ_name, local_len) == 0);
4787
+ }
4788
+ case BTF_KIND_INT:
4789
+ /* just reject deprecated bitfield-like integers; all other
4790
+ * integers are by default compatible between each other
4791
+ */
4792
+ return btf_int_offset(local_type) == 0 &&
4793
+ btf_int_offset(targ_type) == 0;
4794
+ case BTF_KIND_ARRAY:
4795
+ local_id = btf_array(local_type)->type;
4796
+ targ_id = btf_array(targ_type)->type;
4797
+ goto recur;
4798
+ default:
4799
+ pr_warn("unexpected kind %d relocated, local [%d], target [%d]\n",
4800
+ btf_kind(local_type), local_id, targ_id);
4801
+ return 0;
4802
+ }
4803
+}
12374804
1238
- if (insn_idx >= (int)prog->insns_cnt) {
1239
- pr_warning("relocation out of range: '%s'\n",
1240
- prog->section_name);
1241
- return -LIBBPF_ERRNO__RELOC;
4805
+/*
4806
+ * Given single high-level named field accessor in local type, find
4807
+ * corresponding high-level accessor for a target type. Along the way,
4808
+ * maintain low-level spec for target as well. Also keep updating target
4809
+ * bit offset.
4810
+ *
4811
+ * Searching is performed through recursive exhaustive enumeration of all
4812
+ * fields of a struct/union. If there are any anonymous (embedded)
4813
+ * structs/unions, they are recursively searched as well. If field with
4814
+ * desired name is found, check compatibility between local and target types,
4815
+ * before returning result.
4816
+ *
4817
+ * 1 is returned, if field is found.
4818
+ * 0 is returned if no compatible field is found.
4819
+ * <0 is returned on error.
4820
+ */
4821
+static int bpf_core_match_member(const struct btf *local_btf,
4822
+ const struct bpf_core_accessor *local_acc,
4823
+ const struct btf *targ_btf,
4824
+ __u32 targ_id,
4825
+ struct bpf_core_spec *spec,
4826
+ __u32 *next_targ_id)
4827
+{
4828
+ const struct btf_type *local_type, *targ_type;
4829
+ const struct btf_member *local_member, *m;
4830
+ const char *local_name, *targ_name;
4831
+ __u32 local_id;
4832
+ int i, n, found;
4833
+
4834
+ targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
4835
+ if (!targ_type)
4836
+ return -EINVAL;
4837
+ if (!btf_is_composite(targ_type))
4838
+ return 0;
4839
+
4840
+ local_id = local_acc->type_id;
4841
+ local_type = btf__type_by_id(local_btf, local_id);
4842
+ local_member = btf_members(local_type) + local_acc->idx;
4843
+ local_name = btf__name_by_offset(local_btf, local_member->name_off);
4844
+
4845
+ n = btf_vlen(targ_type);
4846
+ m = btf_members(targ_type);
4847
+ for (i = 0; i < n; i++, m++) {
4848
+ __u32 bit_offset;
4849
+
4850
+ bit_offset = btf_member_bit_offset(targ_type, i);
4851
+
4852
+ /* too deep struct/union/array nesting */
4853
+ if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
4854
+ return -E2BIG;
4855
+
4856
+ /* speculate this member will be the good one */
4857
+ spec->bit_offset += bit_offset;
4858
+ spec->raw_spec[spec->raw_len++] = i;
4859
+
4860
+ targ_name = btf__name_by_offset(targ_btf, m->name_off);
4861
+ if (str_is_empty(targ_name)) {
4862
+ /* embedded struct/union, we need to go deeper */
4863
+ found = bpf_core_match_member(local_btf, local_acc,
4864
+ targ_btf, m->type,
4865
+ spec, next_targ_id);
4866
+ if (found) /* either found or error */
4867
+ return found;
4868
+ } else if (strcmp(local_name, targ_name) == 0) {
4869
+ /* matching named field */
4870
+ struct bpf_core_accessor *targ_acc;
4871
+
4872
+ targ_acc = &spec->spec[spec->len++];
4873
+ targ_acc->type_id = targ_id;
4874
+ targ_acc->idx = i;
4875
+ targ_acc->name = targ_name;
4876
+
4877
+ *next_targ_id = m->type;
4878
+ found = bpf_core_fields_are_compat(local_btf,
4879
+ local_member->type,
4880
+ targ_btf, m->type);
4881
+ if (!found)
4882
+ spec->len--; /* pop accessor */
4883
+ return found;
4884
+ }
4885
+ /* member turned out not to be what we looked for */
4886
+ spec->bit_offset -= bit_offset;
4887
+ spec->raw_len--;
4888
+ }
4889
+
4890
+ return 0;
4891
+}
4892
+
4893
+/* Check local and target types for compatibility. This check is used for
4894
+ * type-based CO-RE relocations and follow slightly different rules than
4895
+ * field-based relocations. This function assumes that root types were already
4896
+ * checked for name match. Beyond that initial root-level name check, names
4897
+ * are completely ignored. Compatibility rules are as follows:
4898
+ * - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but
4899
+ * kind should match for local and target types (i.e., STRUCT is not
4900
+ * compatible with UNION);
4901
+ * - for ENUMs, the size is ignored;
4902
+ * - for INT, size and signedness are ignored;
4903
+ * - for ARRAY, dimensionality is ignored, element types are checked for
4904
+ * compatibility recursively;
4905
+ * - CONST/VOLATILE/RESTRICT modifiers are ignored;
4906
+ * - TYPEDEFs/PTRs are compatible if types they pointing to are compatible;
4907
+ * - FUNC_PROTOs are compatible if they have compatible signature: same
4908
+ * number of input args and compatible return and argument types.
4909
+ * These rules are not set in stone and probably will be adjusted as we get
4910
+ * more experience with using BPF CO-RE relocations.
4911
+ */
4912
+static int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
4913
+ const struct btf *targ_btf, __u32 targ_id)
4914
+{
4915
+ const struct btf_type *local_type, *targ_type;
4916
+ int depth = 32; /* max recursion depth */
4917
+
4918
+ /* caller made sure that names match (ignoring flavor suffix) */
4919
+ local_type = btf__type_by_id(local_btf, local_id);
4920
+ targ_type = btf__type_by_id(targ_btf, targ_id);
4921
+ if (btf_kind(local_type) != btf_kind(targ_type))
4922
+ return 0;
4923
+
4924
+recur:
4925
+ depth--;
4926
+ if (depth < 0)
4927
+ return -EINVAL;
4928
+
4929
+ local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
4930
+ targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
4931
+ if (!local_type || !targ_type)
4932
+ return -EINVAL;
4933
+
4934
+ if (btf_kind(local_type) != btf_kind(targ_type))
4935
+ return 0;
4936
+
4937
+ switch (btf_kind(local_type)) {
4938
+ case BTF_KIND_UNKN:
4939
+ case BTF_KIND_STRUCT:
4940
+ case BTF_KIND_UNION:
4941
+ case BTF_KIND_ENUM:
4942
+ case BTF_KIND_FWD:
4943
+ return 1;
4944
+ case BTF_KIND_INT:
4945
+ /* just reject deprecated bitfield-like integers; all other
4946
+ * integers are by default compatible between each other
4947
+ */
4948
+ return btf_int_offset(local_type) == 0 && btf_int_offset(targ_type) == 0;
4949
+ case BTF_KIND_PTR:
4950
+ local_id = local_type->type;
4951
+ targ_id = targ_type->type;
4952
+ goto recur;
4953
+ case BTF_KIND_ARRAY:
4954
+ local_id = btf_array(local_type)->type;
4955
+ targ_id = btf_array(targ_type)->type;
4956
+ goto recur;
4957
+ case BTF_KIND_FUNC_PROTO: {
4958
+ struct btf_param *local_p = btf_params(local_type);
4959
+ struct btf_param *targ_p = btf_params(targ_type);
4960
+ __u16 local_vlen = btf_vlen(local_type);
4961
+ __u16 targ_vlen = btf_vlen(targ_type);
4962
+ int i, err;
4963
+
4964
+ if (local_vlen != targ_vlen)
4965
+ return 0;
4966
+
4967
+ for (i = 0; i < local_vlen; i++, local_p++, targ_p++) {
4968
+ skip_mods_and_typedefs(local_btf, local_p->type, &local_id);
4969
+ skip_mods_and_typedefs(targ_btf, targ_p->type, &targ_id);
4970
+ err = bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id);
4971
+ if (err <= 0)
4972
+ return err;
4973
+ }
4974
+
4975
+ /* tail recurse for return type check */
4976
+ skip_mods_and_typedefs(local_btf, local_type->type, &local_id);
4977
+ skip_mods_and_typedefs(targ_btf, targ_type->type, &targ_id);
4978
+ goto recur;
4979
+ }
4980
+ default:
4981
+ pr_warn("unexpected kind %s relocated, local [%d], target [%d]\n",
4982
+ btf_kind_str(local_type), local_id, targ_id);
4983
+ return 0;
4984
+ }
4985
+}
4986
+
4987
+/*
4988
+ * Try to match local spec to a target type and, if successful, produce full
4989
+ * target spec (high-level, low-level + bit offset).
4990
+ */
4991
+static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
4992
+ const struct btf *targ_btf, __u32 targ_id,
4993
+ struct bpf_core_spec *targ_spec)
4994
+{
4995
+ const struct btf_type *targ_type;
4996
+ const struct bpf_core_accessor *local_acc;
4997
+ struct bpf_core_accessor *targ_acc;
4998
+ int i, sz, matched;
4999
+
5000
+ memset(targ_spec, 0, sizeof(*targ_spec));
5001
+ targ_spec->btf = targ_btf;
5002
+ targ_spec->root_type_id = targ_id;
5003
+ targ_spec->relo_kind = local_spec->relo_kind;
5004
+
5005
+ if (core_relo_is_type_based(local_spec->relo_kind)) {
5006
+ return bpf_core_types_are_compat(local_spec->btf,
5007
+ local_spec->root_type_id,
5008
+ targ_btf, targ_id);
5009
+ }
5010
+
5011
+ local_acc = &local_spec->spec[0];
5012
+ targ_acc = &targ_spec->spec[0];
5013
+
5014
+ if (core_relo_is_enumval_based(local_spec->relo_kind)) {
5015
+ size_t local_essent_len, targ_essent_len;
5016
+ const struct btf_enum *e;
5017
+ const char *targ_name;
5018
+
5019
+ /* has to resolve to an enum */
5020
+ targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, &targ_id);
5021
+ if (!btf_is_enum(targ_type))
5022
+ return 0;
5023
+
5024
+ local_essent_len = bpf_core_essential_name_len(local_acc->name);
5025
+
5026
+ for (i = 0, e = btf_enum(targ_type); i < btf_vlen(targ_type); i++, e++) {
5027
+ targ_name = btf__name_by_offset(targ_spec->btf, e->name_off);
5028
+ targ_essent_len = bpf_core_essential_name_len(targ_name);
5029
+ if (targ_essent_len != local_essent_len)
5030
+ continue;
5031
+ if (strncmp(local_acc->name, targ_name, local_essent_len) == 0) {
5032
+ targ_acc->type_id = targ_id;
5033
+ targ_acc->idx = i;
5034
+ targ_acc->name = targ_name;
5035
+ targ_spec->len++;
5036
+ targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
5037
+ targ_spec->raw_len++;
5038
+ return 1;
12425039 }
1243
- insns[insn_idx].src_reg = BPF_PSEUDO_MAP_FD;
1244
- insns[insn_idx].imm = obj->maps[map_idx].fd;
5040
+ }
5041
+ return 0;
5042
+ }
5043
+
5044
+ if (!core_relo_is_field_based(local_spec->relo_kind))
5045
+ return -EINVAL;
5046
+
5047
+ for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) {
5048
+ targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id,
5049
+ &targ_id);
5050
+ if (!targ_type)
5051
+ return -EINVAL;
5052
+
5053
+ if (local_acc->name) {
5054
+ matched = bpf_core_match_member(local_spec->btf,
5055
+ local_acc,
5056
+ targ_btf, targ_id,
5057
+ targ_spec, &targ_id);
5058
+ if (matched <= 0)
5059
+ return matched;
12455060 } else {
1246
- err = bpf_program__reloc_text(prog, obj,
1247
- &prog->reloc_desc[i]);
5061
+ /* for i=0, targ_id is already treated as array element
5062
+ * type (because it's the original struct), for others
5063
+ * we should find array element type first
5064
+ */
5065
+ if (i > 0) {
5066
+ const struct btf_array *a;
5067
+ bool flex;
5068
+
5069
+ if (!btf_is_array(targ_type))
5070
+ return 0;
5071
+
5072
+ a = btf_array(targ_type);
5073
+ flex = is_flex_arr(targ_btf, targ_acc - 1, a);
5074
+ if (!flex && local_acc->idx >= a->nelems)
5075
+ return 0;
5076
+ if (!skip_mods_and_typedefs(targ_btf, a->type,
5077
+ &targ_id))
5078
+ return -EINVAL;
5079
+ }
5080
+
5081
+ /* too deep struct/union/array nesting */
5082
+ if (targ_spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
5083
+ return -E2BIG;
5084
+
5085
+ targ_acc->type_id = targ_id;
5086
+ targ_acc->idx = local_acc->idx;
5087
+ targ_acc->name = NULL;
5088
+ targ_spec->len++;
5089
+ targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
5090
+ targ_spec->raw_len++;
5091
+
5092
+ sz = btf__resolve_size(targ_btf, targ_id);
5093
+ if (sz < 0)
5094
+ return sz;
5095
+ targ_spec->bit_offset += local_acc->idx * sz * 8;
5096
+ }
5097
+ }
5098
+
5099
+ return 1;
5100
+}
5101
+
5102
+static int bpf_core_calc_field_relo(const struct bpf_program *prog,
5103
+ const struct bpf_core_relo *relo,
5104
+ const struct bpf_core_spec *spec,
5105
+ __u32 *val, __u32 *field_sz, __u32 *type_id,
5106
+ bool *validate)
5107
+{
5108
+ const struct bpf_core_accessor *acc;
5109
+ const struct btf_type *t;
5110
+ __u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id;
5111
+ const struct btf_member *m;
5112
+ const struct btf_type *mt;
5113
+ bool bitfield;
5114
+ __s64 sz;
5115
+
5116
+ *field_sz = 0;
5117
+
5118
+ if (relo->kind == BPF_FIELD_EXISTS) {
5119
+ *val = spec ? 1 : 0;
5120
+ return 0;
5121
+ }
5122
+
5123
+ if (!spec)
5124
+ return -EUCLEAN; /* request instruction poisoning */
5125
+
5126
+ acc = &spec->spec[spec->len - 1];
5127
+ t = btf__type_by_id(spec->btf, acc->type_id);
5128
+
5129
+ /* a[n] accessor needs special handling */
5130
+ if (!acc->name) {
5131
+ if (relo->kind == BPF_FIELD_BYTE_OFFSET) {
5132
+ *val = spec->bit_offset / 8;
5133
+ /* remember field size for load/store mem size */
5134
+ sz = btf__resolve_size(spec->btf, acc->type_id);
5135
+ if (sz < 0)
5136
+ return -EINVAL;
5137
+ *field_sz = sz;
5138
+ *type_id = acc->type_id;
5139
+ } else if (relo->kind == BPF_FIELD_BYTE_SIZE) {
5140
+ sz = btf__resolve_size(spec->btf, acc->type_id);
5141
+ if (sz < 0)
5142
+ return -EINVAL;
5143
+ *val = sz;
5144
+ } else {
5145
+ pr_warn("prog '%s': relo %d at insn #%d can't be applied to array access\n",
5146
+ prog->name, relo->kind, relo->insn_off / 8);
5147
+ return -EINVAL;
5148
+ }
5149
+ if (validate)
5150
+ *validate = true;
5151
+ return 0;
5152
+ }
5153
+
5154
+ m = btf_members(t) + acc->idx;
5155
+ mt = skip_mods_and_typedefs(spec->btf, m->type, &field_type_id);
5156
+ bit_off = spec->bit_offset;
5157
+ bit_sz = btf_member_bitfield_size(t, acc->idx);
5158
+
5159
+ bitfield = bit_sz > 0;
5160
+ if (bitfield) {
5161
+ byte_sz = mt->size;
5162
+ byte_off = bit_off / 8 / byte_sz * byte_sz;
5163
+ /* figure out smallest int size necessary for bitfield load */
5164
+ while (bit_off + bit_sz - byte_off * 8 > byte_sz * 8) {
5165
+ if (byte_sz >= 8) {
5166
+ /* bitfield can't be read with 64-bit read */
5167
+ pr_warn("prog '%s': relo %d at insn #%d can't be satisfied for bitfield\n",
5168
+ prog->name, relo->kind, relo->insn_off / 8);
5169
+ return -E2BIG;
5170
+ }
5171
+ byte_sz *= 2;
5172
+ byte_off = bit_off / 8 / byte_sz * byte_sz;
5173
+ }
5174
+ } else {
5175
+ sz = btf__resolve_size(spec->btf, field_type_id);
5176
+ if (sz < 0)
5177
+ return -EINVAL;
5178
+ byte_sz = sz;
5179
+ byte_off = spec->bit_offset / 8;
5180
+ bit_sz = byte_sz * 8;
5181
+ }
5182
+
5183
+ /* for bitfields, all the relocatable aspects are ambiguous and we
5184
+ * might disagree with compiler, so turn off validation of expected
5185
+ * value, except for signedness
5186
+ */
5187
+ if (validate)
5188
+ *validate = !bitfield;
5189
+
5190
+ switch (relo->kind) {
5191
+ case BPF_FIELD_BYTE_OFFSET:
5192
+ *val = byte_off;
5193
+ if (!bitfield) {
5194
+ *field_sz = byte_sz;
5195
+ *type_id = field_type_id;
5196
+ }
5197
+ break;
5198
+ case BPF_FIELD_BYTE_SIZE:
5199
+ *val = byte_sz;
5200
+ break;
5201
+ case BPF_FIELD_SIGNED:
5202
+ /* enums will be assumed unsigned */
5203
+ *val = btf_is_enum(mt) ||
5204
+ (btf_int_encoding(mt) & BTF_INT_SIGNED);
5205
+ if (validate)
5206
+ *validate = true; /* signedness is never ambiguous */
5207
+ break;
5208
+ case BPF_FIELD_LSHIFT_U64:
5209
+#if __BYTE_ORDER == __LITTLE_ENDIAN
5210
+ *val = 64 - (bit_off + bit_sz - byte_off * 8);
5211
+#else
5212
+ *val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8);
5213
+#endif
5214
+ break;
5215
+ case BPF_FIELD_RSHIFT_U64:
5216
+ *val = 64 - bit_sz;
5217
+ if (validate)
5218
+ *validate = true; /* right shift is never ambiguous */
5219
+ break;
5220
+ case BPF_FIELD_EXISTS:
5221
+ default:
5222
+ return -EOPNOTSUPP;
5223
+ }
5224
+
5225
+ return 0;
5226
+}
5227
+
5228
+static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo,
5229
+ const struct bpf_core_spec *spec,
5230
+ __u32 *val)
5231
+{
5232
+ __s64 sz;
5233
+
5234
+ /* type-based relos return zero when target type is not found */
5235
+ if (!spec) {
5236
+ *val = 0;
5237
+ return 0;
5238
+ }
5239
+
5240
+ switch (relo->kind) {
5241
+ case BPF_TYPE_ID_TARGET:
5242
+ *val = spec->root_type_id;
5243
+ break;
5244
+ case BPF_TYPE_EXISTS:
5245
+ *val = 1;
5246
+ break;
5247
+ case BPF_TYPE_SIZE:
5248
+ sz = btf__resolve_size(spec->btf, spec->root_type_id);
5249
+ if (sz < 0)
5250
+ return -EINVAL;
5251
+ *val = sz;
5252
+ break;
5253
+ case BPF_TYPE_ID_LOCAL:
5254
+ /* BPF_TYPE_ID_LOCAL is handled specially and shouldn't get here */
5255
+ default:
5256
+ return -EOPNOTSUPP;
5257
+ }
5258
+
5259
+ return 0;
5260
+}
5261
+
5262
+static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo,
5263
+ const struct bpf_core_spec *spec,
5264
+ __u32 *val)
5265
+{
5266
+ const struct btf_type *t;
5267
+ const struct btf_enum *e;
5268
+
5269
+ switch (relo->kind) {
5270
+ case BPF_ENUMVAL_EXISTS:
5271
+ *val = spec ? 1 : 0;
5272
+ break;
5273
+ case BPF_ENUMVAL_VALUE:
5274
+ if (!spec)
5275
+ return -EUCLEAN; /* request instruction poisoning */
5276
+ t = btf__type_by_id(spec->btf, spec->spec[0].type_id);
5277
+ e = btf_enum(t) + spec->spec[0].idx;
5278
+ *val = e->val;
5279
+ break;
5280
+ default:
5281
+ return -EOPNOTSUPP;
5282
+ }
5283
+
5284
+ return 0;
5285
+}
5286
+
5287
+struct bpf_core_relo_res
5288
+{
5289
+ /* expected value in the instruction, unless validate == false */
5290
+ __u32 orig_val;
5291
+ /* new value that needs to be patched up to */
5292
+ __u32 new_val;
5293
+ /* relocation unsuccessful, poison instruction, but don't fail load */
5294
+ bool poison;
5295
+ /* some relocations can't be validated against orig_val */
5296
+ bool validate;
5297
+ /* for field byte offset relocations or the forms:
5298
+ * *(T *)(rX + <off>) = rY
5299
+ * rX = *(T *)(rY + <off>),
5300
+ * we remember original and resolved field size to adjust direct
5301
+ * memory loads of pointers and integers; this is necessary for 32-bit
5302
+ * host kernel architectures, but also allows to automatically
5303
+ * relocate fields that were resized from, e.g., u32 to u64, etc.
5304
+ */
5305
+ bool fail_memsz_adjust;
5306
+ __u32 orig_sz;
5307
+ __u32 orig_type_id;
5308
+ __u32 new_sz;
5309
+ __u32 new_type_id;
5310
+};
5311
+
5312
+/* Calculate original and target relocation values, given local and target
5313
+ * specs and relocation kind. These values are calculated for each candidate.
5314
+ * If there are multiple candidates, resulting values should all be consistent
5315
+ * with each other. Otherwise, libbpf will refuse to proceed due to ambiguity.
5316
+ * If instruction has to be poisoned, *poison will be set to true.
5317
+ */
5318
+static int bpf_core_calc_relo(const struct bpf_program *prog,
5319
+ const struct bpf_core_relo *relo,
5320
+ int relo_idx,
5321
+ const struct bpf_core_spec *local_spec,
5322
+ const struct bpf_core_spec *targ_spec,
5323
+ struct bpf_core_relo_res *res)
5324
+{
5325
+ int err = -EOPNOTSUPP;
5326
+
5327
+ res->orig_val = 0;
5328
+ res->new_val = 0;
5329
+ res->poison = false;
5330
+ res->validate = true;
5331
+ res->fail_memsz_adjust = false;
5332
+ res->orig_sz = res->new_sz = 0;
5333
+ res->orig_type_id = res->new_type_id = 0;
5334
+
5335
+ if (core_relo_is_field_based(relo->kind)) {
5336
+ err = bpf_core_calc_field_relo(prog, relo, local_spec,
5337
+ &res->orig_val, &res->orig_sz,
5338
+ &res->orig_type_id, &res->validate);
5339
+ err = err ?: bpf_core_calc_field_relo(prog, relo, targ_spec,
5340
+ &res->new_val, &res->new_sz,
5341
+ &res->new_type_id, NULL);
5342
+ if (err)
5343
+ goto done;
5344
+ /* Validate if it's safe to adjust load/store memory size.
5345
+ * Adjustments are performed only if original and new memory
5346
+ * sizes differ.
5347
+ */
5348
+ res->fail_memsz_adjust = false;
5349
+ if (res->orig_sz != res->new_sz) {
5350
+ const struct btf_type *orig_t, *new_t;
5351
+
5352
+ orig_t = btf__type_by_id(local_spec->btf, res->orig_type_id);
5353
+ new_t = btf__type_by_id(targ_spec->btf, res->new_type_id);
5354
+
5355
+ /* There are two use cases in which it's safe to
5356
+ * adjust load/store's mem size:
5357
+ * - reading a 32-bit kernel pointer, while on BPF
5358
+ * size pointers are always 64-bit; in this case
5359
+ * it's safe to "downsize" instruction size due to
5360
+ * pointer being treated as unsigned integer with
5361
+ * zero-extended upper 32-bits;
5362
+ * - reading unsigned integers, again due to
5363
+ * zero-extension is preserving the value correctly.
5364
+ *
5365
+ * In all other cases it's incorrect to attempt to
5366
+ * load/store field because read value will be
5367
+ * incorrect, so we poison relocated instruction.
5368
+ */
5369
+ if (btf_is_ptr(orig_t) && btf_is_ptr(new_t))
5370
+ goto done;
5371
+ if (btf_is_int(orig_t) && btf_is_int(new_t) &&
5372
+ btf_int_encoding(orig_t) != BTF_INT_SIGNED &&
5373
+ btf_int_encoding(new_t) != BTF_INT_SIGNED)
5374
+ goto done;
5375
+
5376
+ /* mark as invalid mem size adjustment, but this will
5377
+ * only be checked for LDX/STX/ST insns
5378
+ */
5379
+ res->fail_memsz_adjust = true;
5380
+ }
5381
+ } else if (core_relo_is_type_based(relo->kind)) {
5382
+ err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val);
5383
+ err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val);
5384
+ } else if (core_relo_is_enumval_based(relo->kind)) {
5385
+ err = bpf_core_calc_enumval_relo(relo, local_spec, &res->orig_val);
5386
+ err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val);
5387
+ }
5388
+
5389
+done:
5390
+ if (err == -EUCLEAN) {
5391
+ /* EUCLEAN is used to signal instruction poisoning request */
5392
+ res->poison = true;
5393
+ err = 0;
5394
+ } else if (err == -EOPNOTSUPP) {
5395
+ /* EOPNOTSUPP means unknown/unsupported relocation */
5396
+ pr_warn("prog '%s': relo #%d: unrecognized CO-RE relocation %s (%d) at insn #%d\n",
5397
+ prog->name, relo_idx, core_relo_kind_str(relo->kind),
5398
+ relo->kind, relo->insn_off / 8);
5399
+ }
5400
+
5401
+ return err;
5402
+}
5403
+
5404
+/*
5405
+ * Turn instruction for which CO_RE relocation failed into invalid one with
5406
+ * distinct signature.
5407
+ */
5408
+static void bpf_core_poison_insn(struct bpf_program *prog, int relo_idx,
5409
+ int insn_idx, struct bpf_insn *insn)
5410
+{
5411
+ pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n",
5412
+ prog->name, relo_idx, insn_idx);
5413
+ insn->code = BPF_JMP | BPF_CALL;
5414
+ insn->dst_reg = 0;
5415
+ insn->src_reg = 0;
5416
+ insn->off = 0;
5417
+ /* if this instruction is reachable (not a dead code),
5418
+ * verifier will complain with the following message:
5419
+ * invalid func unknown#195896080
5420
+ */
5421
+ insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */
5422
+}
5423
+
5424
+static bool is_ldimm64(struct bpf_insn *insn)
5425
+{
5426
+ return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
5427
+}
5428
+
5429
+static int insn_bpf_size_to_bytes(struct bpf_insn *insn)
5430
+{
5431
+ switch (BPF_SIZE(insn->code)) {
5432
+ case BPF_DW: return 8;
5433
+ case BPF_W: return 4;
5434
+ case BPF_H: return 2;
5435
+ case BPF_B: return 1;
5436
+ default: return -1;
5437
+ }
5438
+}
5439
+
5440
+static int insn_bytes_to_bpf_size(__u32 sz)
5441
+{
5442
+ switch (sz) {
5443
+ case 8: return BPF_DW;
5444
+ case 4: return BPF_W;
5445
+ case 2: return BPF_H;
5446
+ case 1: return BPF_B;
5447
+ default: return -1;
5448
+ }
5449
+}
5450
+
5451
+/*
5452
+ * Patch relocatable BPF instruction.
5453
+ *
5454
+ * Patched value is determined by relocation kind and target specification.
5455
+ * For existence relocations target spec will be NULL if field/type is not found.
5456
+ * Expected insn->imm value is determined using relocation kind and local
5457
+ * spec, and is checked before patching instruction. If actual insn->imm value
5458
+ * is wrong, bail out with error.
5459
+ *
5460
+ * Currently supported classes of BPF instruction are:
5461
+ * 1. rX = <imm> (assignment with immediate operand);
5462
+ * 2. rX += <imm> (arithmetic operations with immediate operand);
5463
+ * 3. rX = <imm64> (load with 64-bit immediate value);
5464
+ * 4. rX = *(T *)(rY + <off>), where T is one of {u8, u16, u32, u64};
5465
+ * 5. *(T *)(rX + <off>) = rY, where T is one of {u8, u16, u32, u64};
5466
+ * 6. *(T *)(rX + <off>) = <imm>, where T is one of {u8, u16, u32, u64}.
5467
+ */
5468
+static int bpf_core_patch_insn(struct bpf_program *prog,
5469
+ const struct bpf_core_relo *relo,
5470
+ int relo_idx,
5471
+ const struct bpf_core_relo_res *res)
5472
+{
5473
+ __u32 orig_val, new_val;
5474
+ struct bpf_insn *insn;
5475
+ int insn_idx;
5476
+ __u8 class;
5477
+
5478
+ if (relo->insn_off % BPF_INSN_SZ)
5479
+ return -EINVAL;
5480
+ insn_idx = relo->insn_off / BPF_INSN_SZ;
5481
+ /* adjust insn_idx from section frame of reference to the local
5482
+ * program's frame of reference; (sub-)program code is not yet
5483
+ * relocated, so it's enough to just subtract in-section offset
5484
+ */
5485
+ insn_idx = insn_idx - prog->sec_insn_off;
5486
+ insn = &prog->insns[insn_idx];
5487
+ class = BPF_CLASS(insn->code);
5488
+
5489
+ if (res->poison) {
5490
+poison:
5491
+ /* poison second part of ldimm64 to avoid confusing error from
5492
+ * verifier about "unknown opcode 00"
5493
+ */
5494
+ if (is_ldimm64(insn))
5495
+ bpf_core_poison_insn(prog, relo_idx, insn_idx + 1, insn + 1);
5496
+ bpf_core_poison_insn(prog, relo_idx, insn_idx, insn);
5497
+ return 0;
5498
+ }
5499
+
5500
+ orig_val = res->orig_val;
5501
+ new_val = res->new_val;
5502
+
5503
+ switch (class) {
5504
+ case BPF_ALU:
5505
+ case BPF_ALU64:
5506
+ if (BPF_SRC(insn->code) != BPF_K)
5507
+ return -EINVAL;
5508
+ if (res->validate && insn->imm != orig_val) {
5509
+ pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n",
5510
+ prog->name, relo_idx,
5511
+ insn_idx, insn->imm, orig_val, new_val);
5512
+ return -EINVAL;
5513
+ }
5514
+ orig_val = insn->imm;
5515
+ insn->imm = new_val;
5516
+ pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %u -> %u\n",
5517
+ prog->name, relo_idx, insn_idx,
5518
+ orig_val, new_val);
5519
+ break;
5520
+ case BPF_LDX:
5521
+ case BPF_ST:
5522
+ case BPF_STX:
5523
+ if (res->validate && insn->off != orig_val) {
5524
+ pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %u -> %u\n",
5525
+ prog->name, relo_idx, insn_idx, insn->off, orig_val, new_val);
5526
+ return -EINVAL;
5527
+ }
5528
+ if (new_val > SHRT_MAX) {
5529
+ pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %u\n",
5530
+ prog->name, relo_idx, insn_idx, new_val);
5531
+ return -ERANGE;
5532
+ }
5533
+ if (res->fail_memsz_adjust) {
5534
+ pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) accesses field incorrectly. "
5535
+ "Make sure you are accessing pointers, unsigned integers, or fields of matching type and size.\n",
5536
+ prog->name, relo_idx, insn_idx);
5537
+ goto poison;
5538
+ }
5539
+
5540
+ orig_val = insn->off;
5541
+ insn->off = new_val;
5542
+ pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n",
5543
+ prog->name, relo_idx, insn_idx, orig_val, new_val);
5544
+
5545
+ if (res->new_sz != res->orig_sz) {
5546
+ int insn_bytes_sz, insn_bpf_sz;
5547
+
5548
+ insn_bytes_sz = insn_bpf_size_to_bytes(insn);
5549
+ if (insn_bytes_sz != res->orig_sz) {
5550
+ pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) unexpected mem size: got %d, exp %u\n",
5551
+ prog->name, relo_idx, insn_idx, insn_bytes_sz, res->orig_sz);
5552
+ return -EINVAL;
5553
+ }
5554
+
5555
+ insn_bpf_sz = insn_bytes_to_bpf_size(res->new_sz);
5556
+ if (insn_bpf_sz < 0) {
5557
+ pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) invalid new mem size: %u\n",
5558
+ prog->name, relo_idx, insn_idx, res->new_sz);
5559
+ return -EINVAL;
5560
+ }
5561
+
5562
+ insn->code = BPF_MODE(insn->code) | insn_bpf_sz | BPF_CLASS(insn->code);
5563
+ pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) mem_sz %u -> %u\n",
5564
+ prog->name, relo_idx, insn_idx, res->orig_sz, res->new_sz);
5565
+ }
5566
+ break;
5567
+ case BPF_LD: {
5568
+ __u64 imm;
5569
+
5570
+ if (!is_ldimm64(insn) ||
5571
+ insn[0].src_reg != 0 || insn[0].off != 0 ||
5572
+ insn_idx + 1 >= prog->insns_cnt ||
5573
+ insn[1].code != 0 || insn[1].dst_reg != 0 ||
5574
+ insn[1].src_reg != 0 || insn[1].off != 0) {
5575
+ pr_warn("prog '%s': relo #%d: insn #%d (LDIMM64) has unexpected form\n",
5576
+ prog->name, relo_idx, insn_idx);
5577
+ return -EINVAL;
5578
+ }
5579
+
5580
+ imm = insn[0].imm + ((__u64)insn[1].imm << 32);
5581
+ if (res->validate && imm != orig_val) {
5582
+ pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %u -> %u\n",
5583
+ prog->name, relo_idx,
5584
+ insn_idx, (unsigned long long)imm,
5585
+ orig_val, new_val);
5586
+ return -EINVAL;
5587
+ }
5588
+
5589
+ insn[0].imm = new_val;
5590
+ insn[1].imm = 0; /* currently only 32-bit values are supported */
5591
+ pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %u\n",
5592
+ prog->name, relo_idx, insn_idx,
5593
+ (unsigned long long)imm, new_val);
5594
+ break;
5595
+ }
5596
+ default:
5597
+ pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:0x%x, src:0x%x, dst:0x%x, off:0x%x, imm:0x%x\n",
5598
+ prog->name, relo_idx, insn_idx, insn->code,
5599
+ insn->src_reg, insn->dst_reg, insn->off, insn->imm);
5600
+ return -EINVAL;
5601
+ }
5602
+
5603
+ return 0;
5604
+}
5605
+
5606
+/* Output spec definition in the format:
5607
+ * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>,
5608
+ * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b
5609
+ */
5610
+static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec)
5611
+{
5612
+ const struct btf_type *t;
5613
+ const struct btf_enum *e;
5614
+ const char *s;
5615
+ __u32 type_id;
5616
+ int i;
5617
+
5618
+ type_id = spec->root_type_id;
5619
+ t = btf__type_by_id(spec->btf, type_id);
5620
+ s = btf__name_by_offset(spec->btf, t->name_off);
5621
+
5622
+ libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s);
5623
+
5624
+ if (core_relo_is_type_based(spec->relo_kind))
5625
+ return;
5626
+
5627
+ if (core_relo_is_enumval_based(spec->relo_kind)) {
5628
+ t = skip_mods_and_typedefs(spec->btf, type_id, NULL);
5629
+ e = btf_enum(t) + spec->raw_spec[0];
5630
+ s = btf__name_by_offset(spec->btf, e->name_off);
5631
+
5632
+ libbpf_print(level, "::%s = %u", s, e->val);
5633
+ return;
5634
+ }
5635
+
5636
+ if (core_relo_is_field_based(spec->relo_kind)) {
5637
+ for (i = 0; i < spec->len; i++) {
5638
+ if (spec->spec[i].name)
5639
+ libbpf_print(level, ".%s", spec->spec[i].name);
5640
+ else if (i > 0 || spec->spec[i].idx > 0)
5641
+ libbpf_print(level, "[%u]", spec->spec[i].idx);
5642
+ }
5643
+
5644
+ libbpf_print(level, " (");
5645
+ for (i = 0; i < spec->raw_len; i++)
5646
+ libbpf_print(level, "%s%d", i == 0 ? "" : ":", spec->raw_spec[i]);
5647
+
5648
+ if (spec->bit_offset % 8)
5649
+ libbpf_print(level, " @ offset %u.%u)",
5650
+ spec->bit_offset / 8, spec->bit_offset % 8);
5651
+ else
5652
+ libbpf_print(level, " @ offset %u)", spec->bit_offset / 8);
5653
+ return;
5654
+ }
5655
+}
5656
+
5657
+static size_t bpf_core_hash_fn(const void *key, void *ctx)
5658
+{
5659
+ return (size_t)key;
5660
+}
5661
+
5662
+static bool bpf_core_equal_fn(const void *k1, const void *k2, void *ctx)
5663
+{
5664
+ return k1 == k2;
5665
+}
5666
+
5667
+static void *u32_as_hash_key(__u32 x)
5668
+{
5669
+ return (void *)(uintptr_t)x;
5670
+}
5671
+
5672
+/*
5673
+ * CO-RE relocate single instruction.
5674
+ *
5675
+ * The outline and important points of the algorithm:
5676
+ * 1. For given local type, find corresponding candidate target types.
5677
+ * Candidate type is a type with the same "essential" name, ignoring
5678
+ * everything after last triple underscore (___). E.g., `sample`,
5679
+ * `sample___flavor_one`, `sample___flavor_another_one`, are all candidates
5680
+ * for each other. Names with triple underscore are referred to as
5681
+ * "flavors" and are useful, among other things, to allow to
5682
+ * specify/support incompatible variations of the same kernel struct, which
5683
+ * might differ between different kernel versions and/or build
5684
+ * configurations.
5685
+ *
5686
+ * N.B. Struct "flavors" could be generated by bpftool's BTF-to-C
5687
+ * converter, when deduplicated BTF of a kernel still contains more than
5688
+ * one different types with the same name. In that case, ___2, ___3, etc
5689
+ * are appended starting from second name conflict. But start flavors are
5690
+ * also useful to be defined "locally", in BPF program, to extract same
5691
+ * data from incompatible changes between different kernel
5692
+ * versions/configurations. For instance, to handle field renames between
5693
+ * kernel versions, one can use two flavors of the struct name with the
5694
+ * same common name and use conditional relocations to extract that field,
5695
+ * depending on target kernel version.
5696
+ * 2. For each candidate type, try to match local specification to this
5697
+ * candidate target type. Matching involves finding corresponding
5698
+ * high-level spec accessors, meaning that all named fields should match,
5699
+ * as well as all array accesses should be within the actual bounds. Also,
5700
+ * types should be compatible (see bpf_core_fields_are_compat for details).
5701
+ * 3. It is supported and expected that there might be multiple flavors
5702
+ * matching the spec. As long as all the specs resolve to the same set of
5703
+ * offsets across all candidates, there is no error. If there is any
5704
+ * ambiguity, CO-RE relocation will fail. This is necessary to accomodate
5705
+ * imprefection of BTF deduplication, which can cause slight duplication of
5706
+ * the same BTF type, if some directly or indirectly referenced (by
5707
+ * pointer) type gets resolved to different actual types in different
5708
+ * object files. If such situation occurs, deduplicated BTF will end up
5709
+ * with two (or more) structurally identical types, which differ only in
5710
+ * types they refer to through pointer. This should be OK in most cases and
5711
+ * is not an error.
5712
+ * 4. Candidate types search is performed by linearly scanning through all
5713
+ * types in target BTF. It is anticipated that this is overall more
5714
+ * efficient memory-wise and not significantly worse (if not better)
5715
+ * CPU-wise compared to prebuilding a map from all local type names to
5716
+ * a list of candidate type names. It's also sped up by caching resolved
5717
+ * list of matching candidates per each local "root" type ID, that has at
5718
+ * least one bpf_core_relo associated with it. This list is shared
5719
+ * between multiple relocations for the same type ID and is updated as some
5720
+ * of the candidates are pruned due to structural incompatibility.
5721
+ */
5722
+static int bpf_core_apply_relo(struct bpf_program *prog,
5723
+ const struct bpf_core_relo *relo,
5724
+ int relo_idx,
5725
+ const struct btf *local_btf,
5726
+ const struct btf *targ_btf,
5727
+ struct hashmap *cand_cache)
5728
+{
5729
+ struct bpf_core_spec local_spec, cand_spec, targ_spec = {};
5730
+ const void *type_key = u32_as_hash_key(relo->type_id);
5731
+ struct bpf_core_relo_res cand_res, targ_res;
5732
+ const struct btf_type *local_type;
5733
+ const char *local_name;
5734
+ struct ids_vec *cand_ids;
5735
+ __u32 local_id, cand_id;
5736
+ const char *spec_str;
5737
+ int i, j, err;
5738
+
5739
+ local_id = relo->type_id;
5740
+ local_type = btf__type_by_id(local_btf, local_id);
5741
+ if (!local_type)
5742
+ return -EINVAL;
5743
+
5744
+ local_name = btf__name_by_offset(local_btf, local_type->name_off);
5745
+ if (!local_name)
5746
+ return -EINVAL;
5747
+
5748
+ spec_str = btf__name_by_offset(local_btf, relo->access_str_off);
5749
+ if (str_is_empty(spec_str))
5750
+ return -EINVAL;
5751
+
5752
+ err = bpf_core_parse_spec(local_btf, local_id, spec_str, relo->kind, &local_spec);
5753
+ if (err) {
5754
+ pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n",
5755
+ prog->name, relo_idx, local_id, btf_kind_str(local_type),
5756
+ str_is_empty(local_name) ? "<anon>" : local_name,
5757
+ spec_str, err);
5758
+ return -EINVAL;
5759
+ }
5760
+
5761
+ pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog->name,
5762
+ relo_idx, core_relo_kind_str(relo->kind), relo->kind);
5763
+ bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec);
5764
+ libbpf_print(LIBBPF_DEBUG, "\n");
5765
+
5766
+ /* TYPE_ID_LOCAL relo is special and doesn't need candidate search */
5767
+ if (relo->kind == BPF_TYPE_ID_LOCAL) {
5768
+ targ_res.validate = true;
5769
+ targ_res.poison = false;
5770
+ targ_res.orig_val = local_spec.root_type_id;
5771
+ targ_res.new_val = local_spec.root_type_id;
5772
+ goto patch_insn;
5773
+ }
5774
+
5775
+ /* libbpf doesn't support candidate search for anonymous types */
5776
+ if (str_is_empty(spec_str)) {
5777
+ pr_warn("prog '%s': relo #%d: <%s> (%d) relocation doesn't support anonymous types\n",
5778
+ prog->name, relo_idx, core_relo_kind_str(relo->kind), relo->kind);
5779
+ return -EOPNOTSUPP;
5780
+ }
5781
+
5782
+ if (!hashmap__find(cand_cache, type_key, (void **)&cand_ids)) {
5783
+ cand_ids = bpf_core_find_cands(local_btf, local_id, targ_btf);
5784
+ if (IS_ERR(cand_ids)) {
5785
+ pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld",
5786
+ prog->name, relo_idx, local_id, btf_kind_str(local_type),
5787
+ local_name, PTR_ERR(cand_ids));
5788
+ return PTR_ERR(cand_ids);
5789
+ }
5790
+ err = hashmap__set(cand_cache, type_key, cand_ids, NULL, NULL);
5791
+ if (err) {
5792
+ bpf_core_free_cands(cand_ids);
5793
+ return err;
5794
+ }
5795
+ }
5796
+
5797
+ for (i = 0, j = 0; i < cand_ids->len; i++) {
5798
+ cand_id = cand_ids->data[i];
5799
+ err = bpf_core_spec_match(&local_spec, targ_btf, cand_id, &cand_spec);
5800
+ if (err < 0) {
5801
+ pr_warn("prog '%s': relo #%d: error matching candidate #%d ",
5802
+ prog->name, relo_idx, i);
5803
+ bpf_core_dump_spec(LIBBPF_WARN, &cand_spec);
5804
+ libbpf_print(LIBBPF_WARN, ": %d\n", err);
5805
+ return err;
5806
+ }
5807
+
5808
+ pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog->name,
5809
+ relo_idx, err == 0 ? "non-matching" : "matching", i);
5810
+ bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec);
5811
+ libbpf_print(LIBBPF_DEBUG, "\n");
5812
+
5813
+ if (err == 0)
5814
+ continue;
5815
+
5816
+ err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, &cand_spec, &cand_res);
5817
+ if (err)
5818
+ return err;
5819
+
5820
+ if (j == 0) {
5821
+ targ_res = cand_res;
5822
+ targ_spec = cand_spec;
5823
+ } else if (cand_spec.bit_offset != targ_spec.bit_offset) {
5824
+ /* if there are many field relo candidates, they
5825
+ * should all resolve to the same bit offset
5826
+ */
5827
+ pr_warn("prog '%s': relo #%d: field offset ambiguity: %u != %u\n",
5828
+ prog->name, relo_idx, cand_spec.bit_offset,
5829
+ targ_spec.bit_offset);
5830
+ return -EINVAL;
5831
+ } else if (cand_res.poison != targ_res.poison || cand_res.new_val != targ_res.new_val) {
5832
+ /* all candidates should result in the same relocation
5833
+ * decision and value, otherwise it's dangerous to
5834
+ * proceed due to ambiguity
5835
+ */
5836
+ pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %u != %s %u\n",
5837
+ prog->name, relo_idx,
5838
+ cand_res.poison ? "failure" : "success", cand_res.new_val,
5839
+ targ_res.poison ? "failure" : "success", targ_res.new_val);
5840
+ return -EINVAL;
5841
+ }
5842
+
5843
+ cand_ids->data[j++] = cand_spec.root_type_id;
5844
+ }
5845
+
5846
+ /*
5847
+ * For BPF_FIELD_EXISTS relo or when used BPF program has field
5848
+ * existence checks or kernel version/config checks, it's expected
5849
+ * that we might not find any candidates. In this case, if field
5850
+ * wasn't found in any candidate, the list of candidates shouldn't
5851
+ * change at all, we'll just handle relocating appropriately,
5852
+ * depending on relo's kind.
5853
+ */
5854
+ if (j > 0)
5855
+ cand_ids->len = j;
5856
+
5857
+ /*
5858
+ * If no candidates were found, it might be both a programmer error,
5859
+ * as well as expected case, depending whether instruction w/
5860
+ * relocation is guarded in some way that makes it unreachable (dead
5861
+ * code) if relocation can't be resolved. This is handled in
5862
+ * bpf_core_patch_insn() uniformly by replacing that instruction with
5863
+ * BPF helper call insn (using invalid helper ID). If that instruction
5864
+ * is indeed unreachable, then it will be ignored and eliminated by
5865
+ * verifier. If it was an error, then verifier will complain and point
5866
+ * to a specific instruction number in its log.
5867
+ */
5868
+ if (j == 0) {
5869
+ pr_debug("prog '%s': relo #%d: no matching targets found\n",
5870
+ prog->name, relo_idx);
5871
+
5872
+ /* calculate single target relo result explicitly */
5873
+ err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, NULL, &targ_res);
5874
+ if (err)
5875
+ return err;
5876
+ }
5877
+
5878
+patch_insn:
5879
+ /* bpf_core_patch_insn() should know how to handle missing targ_spec */
5880
+ err = bpf_core_patch_insn(prog, relo, relo_idx, &targ_res);
5881
+ if (err) {
5882
+ pr_warn("prog '%s': relo #%d: failed to patch insn at offset %d: %d\n",
5883
+ prog->name, relo_idx, relo->insn_off, err);
5884
+ return -EINVAL;
5885
+ }
5886
+
5887
+ return 0;
5888
+}
5889
+
5890
+static int
5891
+bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
5892
+{
5893
+ const struct btf_ext_info_sec *sec;
5894
+ const struct bpf_core_relo *rec;
5895
+ const struct btf_ext_info *seg;
5896
+ struct hashmap_entry *entry;
5897
+ struct hashmap *cand_cache = NULL;
5898
+ struct bpf_program *prog;
5899
+ struct btf *targ_btf;
5900
+ const char *sec_name;
5901
+ int i, err = 0, insn_idx, sec_idx;
5902
+
5903
+ if (obj->btf_ext->core_relo_info.len == 0)
5904
+ return 0;
5905
+
5906
+ if (targ_btf_path)
5907
+ targ_btf = btf__parse(targ_btf_path, NULL);
5908
+ else
5909
+ targ_btf = obj->btf_vmlinux;
5910
+ if (IS_ERR_OR_NULL(targ_btf)) {
5911
+ pr_warn("failed to get target BTF: %ld\n", PTR_ERR(targ_btf));
5912
+ return PTR_ERR(targ_btf);
5913
+ }
5914
+
5915
+ cand_cache = hashmap__new(bpf_core_hash_fn, bpf_core_equal_fn, NULL);
5916
+ if (IS_ERR(cand_cache)) {
5917
+ err = PTR_ERR(cand_cache);
5918
+ goto out;
5919
+ }
5920
+
5921
+ seg = &obj->btf_ext->core_relo_info;
5922
+ for_each_btf_ext_sec(seg, sec) {
5923
+ sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
5924
+ if (str_is_empty(sec_name)) {
5925
+ err = -EINVAL;
5926
+ goto out;
5927
+ }
5928
+ /* bpf_object's ELF is gone by now so it's not easy to find
5929
+ * section index by section name, but we can find *any*
5930
+ * bpf_program within desired section name and use it's
5931
+ * prog->sec_idx to do a proper search by section index and
5932
+ * instruction offset
5933
+ */
5934
+ prog = NULL;
5935
+ for (i = 0; i < obj->nr_programs; i++) {
5936
+ if (strcmp(obj->programs[i].sec_name, sec_name) == 0) {
5937
+ prog = &obj->programs[i];
5938
+ break;
5939
+ }
5940
+ }
5941
+ if (!prog) {
5942
+ pr_warn("sec '%s': failed to find a BPF program\n", sec_name);
5943
+ return -ENOENT;
5944
+ }
5945
+ sec_idx = prog->sec_idx;
5946
+
5947
+ pr_debug("sec '%s': found %d CO-RE relocations\n",
5948
+ sec_name, sec->num_info);
5949
+
5950
+ for_each_btf_ext_rec(seg, sec, i, rec) {
5951
+ insn_idx = rec->insn_off / BPF_INSN_SZ;
5952
+ prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
5953
+ if (!prog) {
5954
+ /* When __weak subprog is "overridden" by another instance
5955
+ * of the subprog from a different object file, linker still
5956
+ * appends all the .BTF.ext info that used to belong to that
5957
+ * eliminated subprogram.
5958
+ * This is similar to what x86-64 linker does for relocations.
5959
+ * So just ignore such relocations just like we ignore
5960
+ * subprog instructions when discovering subprograms.
5961
+ */
5962
+ pr_debug("sec '%s': skipping CO-RE relocation #%d for insn #%d belonging to eliminated weak subprogram\n",
5963
+ sec_name, i, insn_idx);
5964
+ continue;
5965
+ }
5966
+ /* no need to apply CO-RE relocation if the program is
5967
+ * not going to be loaded
5968
+ */
5969
+ if (!prog->load)
5970
+ continue;
5971
+
5972
+ err = bpf_core_apply_relo(prog, rec, i, obj->btf,
5973
+ targ_btf, cand_cache);
5974
+ if (err) {
5975
+ pr_warn("prog '%s': relo #%d: failed to relocate: %d\n",
5976
+ prog->name, i, err);
5977
+ goto out;
5978
+ }
5979
+ }
5980
+ }
5981
+
5982
+out:
5983
+ /* obj->btf_vmlinux is freed at the end of object load phase */
5984
+ if (targ_btf != obj->btf_vmlinux)
5985
+ btf__free(targ_btf);
5986
+ if (!IS_ERR_OR_NULL(cand_cache)) {
5987
+ hashmap__for_each_entry(cand_cache, entry, i) {
5988
+ bpf_core_free_cands(entry->value);
5989
+ }
5990
+ hashmap__free(cand_cache);
5991
+ }
5992
+ return err;
5993
+}
5994
+
5995
+/* Relocate data references within program code:
5996
+ * - map references;
5997
+ * - global variable references;
5998
+ * - extern references.
5999
+ */
6000
+static int
6001
+bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
6002
+{
6003
+ int i;
6004
+
6005
+ for (i = 0; i < prog->nr_reloc; i++) {
6006
+ struct reloc_desc *relo = &prog->reloc_desc[i];
6007
+ struct bpf_insn *insn = &prog->insns[relo->insn_idx];
6008
+ struct extern_desc *ext;
6009
+
6010
+ switch (relo->type) {
6011
+ case RELO_LD64:
6012
+ insn[0].src_reg = BPF_PSEUDO_MAP_FD;
6013
+ insn[0].imm = obj->maps[relo->map_idx].fd;
6014
+ relo->processed = true;
6015
+ break;
6016
+ case RELO_DATA:
6017
+ insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
6018
+ insn[1].imm = insn[0].imm + relo->sym_off;
6019
+ insn[0].imm = obj->maps[relo->map_idx].fd;
6020
+ relo->processed = true;
6021
+ break;
6022
+ case RELO_EXTERN:
6023
+ ext = &obj->externs[relo->sym_off];
6024
+ if (ext->type == EXT_KCFG) {
6025
+ insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
6026
+ insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
6027
+ insn[1].imm = ext->kcfg.data_off;
6028
+ } else /* EXT_KSYM */ {
6029
+ if (ext->ksym.type_id) { /* typed ksyms */
6030
+ insn[0].src_reg = BPF_PSEUDO_BTF_ID;
6031
+ insn[0].imm = ext->ksym.vmlinux_btf_id;
6032
+ } else { /* typeless ksyms */
6033
+ insn[0].imm = (__u32)ext->ksym.addr;
6034
+ insn[1].imm = ext->ksym.addr >> 32;
6035
+ }
6036
+ }
6037
+ relo->processed = true;
6038
+ break;
6039
+ case RELO_CALL:
6040
+ /* will be handled as a follow up pass */
6041
+ break;
6042
+ default:
6043
+ pr_warn("prog '%s': relo #%d: bad relo type %d\n",
6044
+ prog->name, i, relo->type);
6045
+ return -EINVAL;
6046
+ }
6047
+ }
6048
+
6049
+ return 0;
6050
+}
6051
+
6052
+static int adjust_prog_btf_ext_info(const struct bpf_object *obj,
6053
+ const struct bpf_program *prog,
6054
+ const struct btf_ext_info *ext_info,
6055
+ void **prog_info, __u32 *prog_rec_cnt,
6056
+ __u32 *prog_rec_sz)
6057
+{
6058
+ void *copy_start = NULL, *copy_end = NULL;
6059
+ void *rec, *rec_end, *new_prog_info;
6060
+ const struct btf_ext_info_sec *sec;
6061
+ size_t old_sz, new_sz;
6062
+ const char *sec_name;
6063
+ int i, off_adj;
6064
+
6065
+ for_each_btf_ext_sec(ext_info, sec) {
6066
+ sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
6067
+ if (!sec_name)
6068
+ return -EINVAL;
6069
+ if (strcmp(sec_name, prog->sec_name) != 0)
6070
+ continue;
6071
+
6072
+ for_each_btf_ext_rec(ext_info, sec, i, rec) {
6073
+ __u32 insn_off = *(__u32 *)rec / BPF_INSN_SZ;
6074
+
6075
+ if (insn_off < prog->sec_insn_off)
6076
+ continue;
6077
+ if (insn_off >= prog->sec_insn_off + prog->sec_insn_cnt)
6078
+ break;
6079
+
6080
+ if (!copy_start)
6081
+ copy_start = rec;
6082
+ copy_end = rec + ext_info->rec_size;
6083
+ }
6084
+
6085
+ if (!copy_start)
6086
+ return -ENOENT;
6087
+
6088
+ /* append func/line info of a given (sub-)program to the main
6089
+ * program func/line info
6090
+ */
6091
+ old_sz = (size_t)(*prog_rec_cnt) * ext_info->rec_size;
6092
+ new_sz = old_sz + (copy_end - copy_start);
6093
+ new_prog_info = realloc(*prog_info, new_sz);
6094
+ if (!new_prog_info)
6095
+ return -ENOMEM;
6096
+ *prog_info = new_prog_info;
6097
+ *prog_rec_cnt = new_sz / ext_info->rec_size;
6098
+ memcpy(new_prog_info + old_sz, copy_start, copy_end - copy_start);
6099
+
6100
+ /* Kernel instruction offsets are in units of 8-byte
6101
+ * instructions, while .BTF.ext instruction offsets generated
6102
+ * by Clang are in units of bytes. So convert Clang offsets
6103
+ * into kernel offsets and adjust offset according to program
6104
+ * relocated position.
6105
+ */
6106
+ off_adj = prog->sub_insn_off - prog->sec_insn_off;
6107
+ rec = new_prog_info + old_sz;
6108
+ rec_end = new_prog_info + new_sz;
6109
+ for (; rec < rec_end; rec += ext_info->rec_size) {
6110
+ __u32 *insn_off = rec;
6111
+
6112
+ *insn_off = *insn_off / BPF_INSN_SZ + off_adj;
6113
+ }
6114
+ *prog_rec_sz = ext_info->rec_size;
6115
+ return 0;
6116
+ }
6117
+
6118
+ return -ENOENT;
6119
+}
6120
+
6121
+static int
6122
+reloc_prog_func_and_line_info(const struct bpf_object *obj,
6123
+ struct bpf_program *main_prog,
6124
+ const struct bpf_program *prog)
6125
+{
6126
+ int err;
6127
+
6128
+ /* no .BTF.ext relocation if .BTF.ext is missing or kernel doesn't
6129
+ * supprot func/line info
6130
+ */
6131
+ if (!obj->btf_ext || !kernel_supports(FEAT_BTF_FUNC))
6132
+ return 0;
6133
+
6134
+ /* only attempt func info relocation if main program's func_info
6135
+ * relocation was successful
6136
+ */
6137
+ if (main_prog != prog && !main_prog->func_info)
6138
+ goto line_info;
6139
+
6140
+ err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->func_info,
6141
+ &main_prog->func_info,
6142
+ &main_prog->func_info_cnt,
6143
+ &main_prog->func_info_rec_size);
6144
+ if (err) {
6145
+ if (err != -ENOENT) {
6146
+ pr_warn("prog '%s': error relocating .BTF.ext function info: %d\n",
6147
+ prog->name, err);
6148
+ return err;
6149
+ }
6150
+ if (main_prog->func_info) {
6151
+ /*
6152
+ * Some info has already been found but has problem
6153
+ * in the last btf_ext reloc. Must have to error out.
6154
+ */
6155
+ pr_warn("prog '%s': missing .BTF.ext function info.\n", prog->name);
6156
+ return err;
6157
+ }
6158
+ /* Have problem loading the very first info. Ignore the rest. */
6159
+ pr_warn("prog '%s': missing .BTF.ext function info for the main program, skipping all of .BTF.ext func info.\n",
6160
+ prog->name);
6161
+ }
6162
+
6163
+line_info:
6164
+ /* don't relocate line info if main program's relocation failed */
6165
+ if (main_prog != prog && !main_prog->line_info)
6166
+ return 0;
6167
+
6168
+ err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->line_info,
6169
+ &main_prog->line_info,
6170
+ &main_prog->line_info_cnt,
6171
+ &main_prog->line_info_rec_size);
6172
+ if (err) {
6173
+ if (err != -ENOENT) {
6174
+ pr_warn("prog '%s': error relocating .BTF.ext line info: %d\n",
6175
+ prog->name, err);
6176
+ return err;
6177
+ }
6178
+ if (main_prog->line_info) {
6179
+ /*
6180
+ * Some info has already been found but has problem
6181
+ * in the last btf_ext reloc. Must have to error out.
6182
+ */
6183
+ pr_warn("prog '%s': missing .BTF.ext line info.\n", prog->name);
6184
+ return err;
6185
+ }
6186
+ /* Have problem loading the very first info. Ignore the rest. */
6187
+ pr_warn("prog '%s': missing .BTF.ext line info for the main program, skipping all of .BTF.ext line info.\n",
6188
+ prog->name);
6189
+ }
6190
+ return 0;
6191
+}
6192
+
6193
+static int cmp_relo_by_insn_idx(const void *key, const void *elem)
6194
+{
6195
+ size_t insn_idx = *(const size_t *)key;
6196
+ const struct reloc_desc *relo = elem;
6197
+
6198
+ if (insn_idx == relo->insn_idx)
6199
+ return 0;
6200
+ return insn_idx < relo->insn_idx ? -1 : 1;
6201
+}
6202
+
6203
+static struct reloc_desc *find_prog_insn_relo(const struct bpf_program *prog, size_t insn_idx)
6204
+{
6205
+ return bsearch(&insn_idx, prog->reloc_desc, prog->nr_reloc,
6206
+ sizeof(*prog->reloc_desc), cmp_relo_by_insn_idx);
6207
+}
6208
+
6209
+static int
6210
+bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
6211
+ struct bpf_program *prog)
6212
+{
6213
+ size_t sub_insn_idx, insn_idx, new_cnt;
6214
+ struct bpf_program *subprog;
6215
+ struct bpf_insn *insns, *insn;
6216
+ struct reloc_desc *relo;
6217
+ int err;
6218
+
6219
+ err = reloc_prog_func_and_line_info(obj, main_prog, prog);
6220
+ if (err)
6221
+ return err;
6222
+
6223
+ for (insn_idx = 0; insn_idx < prog->sec_insn_cnt; insn_idx++) {
6224
+ insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
6225
+ if (!insn_is_subprog_call(insn))
6226
+ continue;
6227
+
6228
+ relo = find_prog_insn_relo(prog, insn_idx);
6229
+ if (relo && relo->type != RELO_CALL) {
6230
+ pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n",
6231
+ prog->name, insn_idx, relo->type);
6232
+ return -LIBBPF_ERRNO__RELOC;
6233
+ }
6234
+ if (relo) {
6235
+ /* sub-program instruction index is a combination of
6236
+ * an offset of a symbol pointed to by relocation and
6237
+ * call instruction's imm field; for global functions,
6238
+ * call always has imm = -1, but for static functions
6239
+ * relocation is against STT_SECTION and insn->imm
6240
+ * points to a start of a static function
6241
+ */
6242
+ sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1;
6243
+ } else {
6244
+ /* if subprogram call is to a static function within
6245
+ * the same ELF section, there won't be any relocation
6246
+ * emitted, but it also means there is no additional
6247
+ * offset necessary, insns->imm is relative to
6248
+ * instruction's original position within the section
6249
+ */
6250
+ sub_insn_idx = prog->sec_insn_off + insn_idx + insn->imm + 1;
6251
+ }
6252
+
6253
+ /* we enforce that sub-programs should be in .text section */
6254
+ subprog = find_prog_by_sec_insn(obj, obj->efile.text_shndx, sub_insn_idx);
6255
+ if (!subprog) {
6256
+ pr_warn("prog '%s': no .text section found yet sub-program call exists\n",
6257
+ prog->name);
6258
+ return -LIBBPF_ERRNO__RELOC;
6259
+ }
6260
+
6261
+ /* if it's the first call instruction calling into this
6262
+ * subprogram (meaning this subprog hasn't been processed
6263
+ * yet) within the context of current main program:
6264
+ * - append it at the end of main program's instructions blog;
6265
+ * - process is recursively, while current program is put on hold;
6266
+ * - if that subprogram calls some other not yet processes
6267
+ * subprogram, same thing will happen recursively until
6268
+ * there are no more unprocesses subprograms left to append
6269
+ * and relocate.
6270
+ */
6271
+ if (subprog->sub_insn_off == 0) {
6272
+ subprog->sub_insn_off = main_prog->insns_cnt;
6273
+
6274
+ new_cnt = main_prog->insns_cnt + subprog->insns_cnt;
6275
+ insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns));
6276
+ if (!insns) {
6277
+ pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name);
6278
+ return -ENOMEM;
6279
+ }
6280
+ main_prog->insns = insns;
6281
+ main_prog->insns_cnt = new_cnt;
6282
+
6283
+ memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns,
6284
+ subprog->insns_cnt * sizeof(*insns));
6285
+
6286
+ pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n",
6287
+ main_prog->name, subprog->insns_cnt, subprog->name);
6288
+
6289
+ err = bpf_object__reloc_code(obj, main_prog, subprog);
12486290 if (err)
12496291 return err;
12506292 }
6293
+
6294
+ /* main_prog->insns memory could have been re-allocated, so
6295
+ * calculate pointer again
6296
+ */
6297
+ insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
6298
+ /* calculate correct instruction position within current main
6299
+ * prog; each main prog can have a different set of
6300
+ * subprograms appended (potentially in different order as
6301
+ * well), so position of any subprog can be different for
6302
+ * different main programs */
6303
+ insn->imm = subprog->sub_insn_off - (prog->sub_insn_off + insn_idx) - 1;
6304
+
6305
+ if (relo)
6306
+ relo->processed = true;
6307
+
6308
+ pr_debug("prog '%s': insn #%zu relocated, imm %d points to subprog '%s' (now at %zu offset)\n",
6309
+ prog->name, insn_idx, insn->imm, subprog->name, subprog->sub_insn_off);
12516310 }
12526311
1253
- zfree(&prog->reloc_desc);
1254
- prog->nr_reloc = 0;
12556312 return 0;
12566313 }
12576314
6315
+/*
6316
+ * Relocate sub-program calls.
6317
+ *
6318
+ * Algorithm operates as follows. Each entry-point BPF program (referred to as
6319
+ * main prog) is processed separately. For each subprog (non-entry functions,
6320
+ * that can be called from either entry progs or other subprogs) gets their
6321
+ * sub_insn_off reset to zero. This serves as indicator that this subprogram
6322
+ * hasn't been yet appended and relocated within current main prog. Once its
6323
+ * relocated, sub_insn_off will point at the position within current main prog
6324
+ * where given subprog was appended. This will further be used to relocate all
6325
+ * the call instructions jumping into this subprog.
6326
+ *
6327
+ * We start with main program and process all call instructions. If the call
6328
+ * is into a subprog that hasn't been processed (i.e., subprog->sub_insn_off
6329
+ * is zero), subprog instructions are appended at the end of main program's
6330
+ * instruction array. Then main program is "put on hold" while we recursively
6331
+ * process newly appended subprogram. If that subprogram calls into another
6332
+ * subprogram that hasn't been appended, new subprogram is appended again to
6333
+ * the *main* prog's instructions (subprog's instructions are always left
6334
+ * untouched, as they need to be in unmodified state for subsequent main progs
6335
+ * and subprog instructions are always sent only as part of a main prog) and
6336
+ * the process continues recursively. Once all the subprogs called from a main
6337
+ * prog or any of its subprogs are appended (and relocated), all their
6338
+ * positions within finalized instructions array are known, so it's easy to
6339
+ * rewrite call instructions with correct relative offsets, corresponding to
6340
+ * desired target subprog.
6341
+ *
6342
+ * Its important to realize that some subprogs might not be called from some
6343
+ * main prog and any of its called/used subprogs. Those will keep their
6344
+ * subprog->sub_insn_off as zero at all times and won't be appended to current
6345
+ * main prog and won't be relocated within the context of current main prog.
6346
+ * They might still be used from other main progs later.
6347
+ *
6348
+ * Visually this process can be shown as below. Suppose we have two main
6349
+ * programs mainA and mainB and BPF object contains three subprogs: subA,
6350
+ * subB, and subC. mainA calls only subA, mainB calls only subC, but subA and
6351
+ * subC both call subB:
6352
+ *
6353
+ * +--------+ +-------+
6354
+ * | v v |
6355
+ * +--+---+ +--+-+-+ +---+--+
6356
+ * | subA | | subB | | subC |
6357
+ * +--+---+ +------+ +---+--+
6358
+ * ^ ^
6359
+ * | |
6360
+ * +---+-------+ +------+----+
6361
+ * | mainA | | mainB |
6362
+ * +-----------+ +-----------+
6363
+ *
6364
+ * We'll start relocating mainA, will find subA, append it and start
6365
+ * processing sub A recursively:
6366
+ *
6367
+ * +-----------+------+
6368
+ * | mainA | subA |
6369
+ * +-----------+------+
6370
+ *
6371
+ * At this point we notice that subB is used from subA, so we append it and
6372
+ * relocate (there are no further subcalls from subB):
6373
+ *
6374
+ * +-----------+------+------+
6375
+ * | mainA | subA | subB |
6376
+ * +-----------+------+------+
6377
+ *
6378
+ * At this point, we relocate subA calls, then go one level up and finish with
6379
+ * relocatin mainA calls. mainA is done.
6380
+ *
6381
+ * For mainB process is similar but results in different order. We start with
6382
+ * mainB and skip subA and subB, as mainB never calls them (at least
6383
+ * directly), but we see subC is needed, so we append and start processing it:
6384
+ *
6385
+ * +-----------+------+
6386
+ * | mainB | subC |
6387
+ * +-----------+------+
6388
+ * Now we see subC needs subB, so we go back to it, append and relocate it:
6389
+ *
6390
+ * +-----------+------+------+
6391
+ * | mainB | subC | subB |
6392
+ * +-----------+------+------+
6393
+ *
6394
+ * At this point we unwind recursion, relocate calls in subC, then in mainB.
6395
+ */
6396
+static int
6397
+bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog)
6398
+{
6399
+ struct bpf_program *subprog;
6400
+ int i, j, err;
6401
+
6402
+ /* mark all subprogs as not relocated (yet) within the context of
6403
+ * current main program
6404
+ */
6405
+ for (i = 0; i < obj->nr_programs; i++) {
6406
+ subprog = &obj->programs[i];
6407
+ if (!prog_is_subprog(obj, subprog))
6408
+ continue;
6409
+
6410
+ subprog->sub_insn_off = 0;
6411
+ for (j = 0; j < subprog->nr_reloc; j++)
6412
+ if (subprog->reloc_desc[j].type == RELO_CALL)
6413
+ subprog->reloc_desc[j].processed = false;
6414
+ }
6415
+
6416
+ err = bpf_object__reloc_code(obj, prog, prog);
6417
+ if (err)
6418
+ return err;
6419
+
6420
+
6421
+ return 0;
6422
+}
12586423
12596424 static int
1260
-bpf_object__relocate(struct bpf_object *obj)
6425
+bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
12616426 {
12626427 struct bpf_program *prog;
12636428 size_t i;
12646429 int err;
12656430
1266
- for (i = 0; i < obj->nr_programs; i++) {
1267
- prog = &obj->programs[i];
1268
-
1269
- err = bpf_program__relocate(prog, obj);
6431
+ if (obj->btf_ext) {
6432
+ err = bpf_object__relocate_core(obj, targ_btf_path);
12706433 if (err) {
1271
- pr_warning("failed to relocate '%s'\n",
1272
- prog->section_name);
6434
+ pr_warn("failed to perform CO-RE relocations: %d\n",
6435
+ err);
12736436 return err;
12746437 }
6438
+ }
6439
+ /* relocate data references first for all programs and sub-programs,
6440
+ * as they don't change relative to code locations, so subsequent
6441
+ * subprogram processing won't need to re-calculate any of them
6442
+ */
6443
+ for (i = 0; i < obj->nr_programs; i++) {
6444
+ prog = &obj->programs[i];
6445
+ err = bpf_object__relocate_data(obj, prog);
6446
+ if (err) {
6447
+ pr_warn("prog '%s': failed to relocate data references: %d\n",
6448
+ prog->name, err);
6449
+ return err;
6450
+ }
6451
+ }
6452
+ /* now relocate subprogram calls and append used subprograms to main
6453
+ * programs; each copy of subprogram code needs to be relocated
6454
+ * differently for each main program, because its code location might
6455
+ * have changed
6456
+ */
6457
+ for (i = 0; i < obj->nr_programs; i++) {
6458
+ prog = &obj->programs[i];
6459
+ /* sub-program's sub-calls are relocated within the context of
6460
+ * its main program only
6461
+ */
6462
+ if (prog_is_subprog(obj, prog))
6463
+ continue;
6464
+
6465
+ err = bpf_object__relocate_calls(obj, prog);
6466
+ if (err) {
6467
+ pr_warn("prog '%s': failed to relocate calls: %d\n",
6468
+ prog->name, err);
6469
+ return err;
6470
+ }
6471
+ }
6472
+ /* free up relocation descriptors */
6473
+ for (i = 0; i < obj->nr_programs; i++) {
6474
+ prog = &obj->programs[i];
6475
+ zfree(&prog->reloc_desc);
6476
+ prog->nr_reloc = 0;
12756477 }
12766478 return 0;
12776479 }
12786480
1279
-static int bpf_object__collect_reloc(struct bpf_object *obj)
6481
+static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
6482
+ GElf_Shdr *shdr, Elf_Data *data);
6483
+
6484
+static int bpf_object__collect_map_relos(struct bpf_object *obj,
6485
+ GElf_Shdr *shdr, Elf_Data *data)
12806486 {
1281
- int i, err;
6487
+ const int bpf_ptr_sz = 8, host_ptr_sz = sizeof(void *);
6488
+ int i, j, nrels, new_sz;
6489
+ const struct btf_var_secinfo *vi = NULL;
6490
+ const struct btf_type *sec, *var, *def;
6491
+ struct bpf_map *map = NULL, *targ_map;
6492
+ const struct btf_member *member;
6493
+ const char *name, *mname;
6494
+ Elf_Data *symbols;
6495
+ unsigned int moff;
6496
+ GElf_Sym sym;
6497
+ GElf_Rel rel;
6498
+ void *tmp;
12826499
1283
- if (!obj_elf_valid(obj)) {
1284
- pr_warning("Internal error: elf object is closed\n");
1285
- return -LIBBPF_ERRNO__INTERNAL;
1286
- }
6500
+ if (!obj->efile.btf_maps_sec_btf_id || !obj->btf)
6501
+ return -EINVAL;
6502
+ sec = btf__type_by_id(obj->btf, obj->efile.btf_maps_sec_btf_id);
6503
+ if (!sec)
6504
+ return -EINVAL;
12876505
1288
- for (i = 0; i < obj->efile.nr_reloc; i++) {
1289
- GElf_Shdr *shdr = &obj->efile.reloc[i].shdr;
1290
- Elf_Data *data = obj->efile.reloc[i].data;
1291
- int idx = shdr->sh_info;
1292
- struct bpf_program *prog;
1293
-
1294
- if (shdr->sh_type != SHT_REL) {
1295
- pr_warning("internal error at %d\n", __LINE__);
1296
- return -LIBBPF_ERRNO__INTERNAL;
6506
+ symbols = obj->efile.symbols;
6507
+ nrels = shdr->sh_size / shdr->sh_entsize;
6508
+ for (i = 0; i < nrels; i++) {
6509
+ if (!gelf_getrel(data, i, &rel)) {
6510
+ pr_warn(".maps relo #%d: failed to get ELF relo\n", i);
6511
+ return -LIBBPF_ERRNO__FORMAT;
12976512 }
1298
-
1299
- prog = bpf_object__find_prog_by_idx(obj, idx);
1300
- if (!prog) {
1301
- pr_warning("relocation failed: no section(%d)\n", idx);
6513
+ if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) {
6514
+ pr_warn(".maps relo #%d: symbol %zx not found\n",
6515
+ i, (size_t)GELF_R_SYM(rel.r_info));
6516
+ return -LIBBPF_ERRNO__FORMAT;
6517
+ }
6518
+ name = elf_sym_str(obj, sym.st_name) ?: "<?>";
6519
+ if (sym.st_shndx != obj->efile.btf_maps_shndx) {
6520
+ pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n",
6521
+ i, name);
13026522 return -LIBBPF_ERRNO__RELOC;
13036523 }
13046524
1305
- err = bpf_program__collect_reloc(prog,
1306
- shdr, data,
1307
- obj);
6525
+ pr_debug(".maps relo #%d: for %zd value %zd rel.r_offset %zu name %d ('%s')\n",
6526
+ i, (ssize_t)(rel.r_info >> 32), (size_t)sym.st_value,
6527
+ (size_t)rel.r_offset, sym.st_name, name);
6528
+
6529
+ for (j = 0; j < obj->nr_maps; j++) {
6530
+ map = &obj->maps[j];
6531
+ if (map->sec_idx != obj->efile.btf_maps_shndx)
6532
+ continue;
6533
+
6534
+ vi = btf_var_secinfos(sec) + map->btf_var_idx;
6535
+ if (vi->offset <= rel.r_offset &&
6536
+ rel.r_offset + bpf_ptr_sz <= vi->offset + vi->size)
6537
+ break;
6538
+ }
6539
+ if (j == obj->nr_maps) {
6540
+ pr_warn(".maps relo #%d: cannot find map '%s' at rel.r_offset %zu\n",
6541
+ i, name, (size_t)rel.r_offset);
6542
+ return -EINVAL;
6543
+ }
6544
+
6545
+ if (!bpf_map_type__is_map_in_map(map->def.type))
6546
+ return -EINVAL;
6547
+ if (map->def.type == BPF_MAP_TYPE_HASH_OF_MAPS &&
6548
+ map->def.key_size != sizeof(int)) {
6549
+ pr_warn(".maps relo #%d: hash-of-maps '%s' should have key size %zu.\n",
6550
+ i, map->name, sizeof(int));
6551
+ return -EINVAL;
6552
+ }
6553
+
6554
+ targ_map = bpf_object__find_map_by_name(obj, name);
6555
+ if (!targ_map)
6556
+ return -ESRCH;
6557
+
6558
+ var = btf__type_by_id(obj->btf, vi->type);
6559
+ def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
6560
+ if (btf_vlen(def) == 0)
6561
+ return -EINVAL;
6562
+ member = btf_members(def) + btf_vlen(def) - 1;
6563
+ mname = btf__name_by_offset(obj->btf, member->name_off);
6564
+ if (strcmp(mname, "values"))
6565
+ return -EINVAL;
6566
+
6567
+ moff = btf_member_bit_offset(def, btf_vlen(def) - 1) / 8;
6568
+ if (rel.r_offset - vi->offset < moff)
6569
+ return -EINVAL;
6570
+
6571
+ moff = rel.r_offset - vi->offset - moff;
6572
+ /* here we use BPF pointer size, which is always 64 bit, as we
6573
+ * are parsing ELF that was built for BPF target
6574
+ */
6575
+ if (moff % bpf_ptr_sz)
6576
+ return -EINVAL;
6577
+ moff /= bpf_ptr_sz;
6578
+ if (moff >= map->init_slots_sz) {
6579
+ new_sz = moff + 1;
6580
+ tmp = libbpf_reallocarray(map->init_slots, new_sz, host_ptr_sz);
6581
+ if (!tmp)
6582
+ return -ENOMEM;
6583
+ map->init_slots = tmp;
6584
+ memset(map->init_slots + map->init_slots_sz, 0,
6585
+ (new_sz - map->init_slots_sz) * host_ptr_sz);
6586
+ map->init_slots_sz = new_sz;
6587
+ }
6588
+ map->init_slots[moff] = targ_map;
6589
+
6590
+ pr_debug(".maps relo #%d: map '%s' slot [%d] points to map '%s'\n",
6591
+ i, map->name, moff, name);
6592
+ }
6593
+
6594
+ return 0;
6595
+}
6596
+
6597
+static int cmp_relocs(const void *_a, const void *_b)
6598
+{
6599
+ const struct reloc_desc *a = _a;
6600
+ const struct reloc_desc *b = _b;
6601
+
6602
+ if (a->insn_idx != b->insn_idx)
6603
+ return a->insn_idx < b->insn_idx ? -1 : 1;
6604
+
6605
+ /* no two relocations should have the same insn_idx, but ... */
6606
+ if (a->type != b->type)
6607
+ return a->type < b->type ? -1 : 1;
6608
+
6609
+ return 0;
6610
+}
6611
+
6612
+static int bpf_object__collect_relos(struct bpf_object *obj)
6613
+{
6614
+ int i, err;
6615
+
6616
+ for (i = 0; i < obj->efile.nr_reloc_sects; i++) {
6617
+ GElf_Shdr *shdr = &obj->efile.reloc_sects[i].shdr;
6618
+ Elf_Data *data = obj->efile.reloc_sects[i].data;
6619
+ int idx = shdr->sh_info;
6620
+
6621
+ if (shdr->sh_type != SHT_REL) {
6622
+ pr_warn("internal error at %d\n", __LINE__);
6623
+ return -LIBBPF_ERRNO__INTERNAL;
6624
+ }
6625
+
6626
+ if (idx == obj->efile.st_ops_shndx)
6627
+ err = bpf_object__collect_st_ops_relos(obj, shdr, data);
6628
+ else if (idx == obj->efile.btf_maps_shndx)
6629
+ err = bpf_object__collect_map_relos(obj, shdr, data);
6630
+ else
6631
+ err = bpf_object__collect_prog_relos(obj, shdr, data);
13086632 if (err)
13096633 return err;
6634
+ }
6635
+
6636
+ for (i = 0; i < obj->nr_programs; i++) {
6637
+ struct bpf_program *p = &obj->programs[i];
6638
+
6639
+ if (!p->nr_reloc)
6640
+ continue;
6641
+
6642
+ qsort(p->reloc_desc, p->nr_reloc, sizeof(*p->reloc_desc), cmp_relocs);
6643
+ }
6644
+ return 0;
6645
+}
6646
+
6647
+static bool insn_is_helper_call(struct bpf_insn *insn, enum bpf_func_id *func_id)
6648
+{
6649
+ if (BPF_CLASS(insn->code) == BPF_JMP &&
6650
+ BPF_OP(insn->code) == BPF_CALL &&
6651
+ BPF_SRC(insn->code) == BPF_K &&
6652
+ insn->src_reg == 0 &&
6653
+ insn->dst_reg == 0) {
6654
+ *func_id = insn->imm;
6655
+ return true;
6656
+ }
6657
+ return false;
6658
+}
6659
+
6660
+static int bpf_object__sanitize_prog(struct bpf_object* obj, struct bpf_program *prog)
6661
+{
6662
+ struct bpf_insn *insn = prog->insns;
6663
+ enum bpf_func_id func_id;
6664
+ int i;
6665
+
6666
+ for (i = 0; i < prog->insns_cnt; i++, insn++) {
6667
+ if (!insn_is_helper_call(insn, &func_id))
6668
+ continue;
6669
+
6670
+ /* on kernels that don't yet support
6671
+ * bpf_probe_read_{kernel,user}[_str] helpers, fall back
6672
+ * to bpf_probe_read() which works well for old kernels
6673
+ */
6674
+ switch (func_id) {
6675
+ case BPF_FUNC_probe_read_kernel:
6676
+ case BPF_FUNC_probe_read_user:
6677
+ if (!kernel_supports(FEAT_PROBE_READ_KERN))
6678
+ insn->imm = BPF_FUNC_probe_read;
6679
+ break;
6680
+ case BPF_FUNC_probe_read_kernel_str:
6681
+ case BPF_FUNC_probe_read_user_str:
6682
+ if (!kernel_supports(FEAT_PROBE_READ_KERN))
6683
+ insn->imm = BPF_FUNC_probe_read_str;
6684
+ break;
6685
+ default:
6686
+ break;
6687
+ }
13106688 }
13116689 return 0;
13126690 }
13136691
13146692 static int
1315
-load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type,
1316
- const char *name, struct bpf_insn *insns, int insns_cnt,
1317
- char *license, u32 kern_version, int *pfd, int prog_ifindex)
6693
+load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
6694
+ char *license, __u32 kern_version, int *pfd)
13186695 {
13196696 struct bpf_load_program_attr load_attr;
13206697 char *cp, errmsg[STRERR_BUFSIZE];
1321
- char *log_buf;
1322
- int ret;
6698
+ size_t log_buf_size = 0;
6699
+ char *log_buf = NULL;
6700
+ int btf_fd, ret;
6701
+
6702
+ if (!insns || !insns_cnt)
6703
+ return -EINVAL;
13236704
13246705 memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
1325
- load_attr.prog_type = type;
1326
- load_attr.expected_attach_type = expected_attach_type;
1327
- load_attr.name = name;
6706
+ load_attr.prog_type = prog->type;
6707
+ /* old kernels might not support specifying expected_attach_type */
6708
+ if (!kernel_supports(FEAT_EXP_ATTACH_TYPE) && prog->sec_def &&
6709
+ prog->sec_def->is_exp_attach_type_optional)
6710
+ load_attr.expected_attach_type = 0;
6711
+ else
6712
+ load_attr.expected_attach_type = prog->expected_attach_type;
6713
+ if (kernel_supports(FEAT_PROG_NAME))
6714
+ load_attr.name = prog->name;
13286715 load_attr.insns = insns;
13296716 load_attr.insns_cnt = insns_cnt;
13306717 load_attr.license = license;
1331
- load_attr.kern_version = kern_version;
1332
- load_attr.prog_ifindex = prog_ifindex;
6718
+ if (prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
6719
+ prog->type == BPF_PROG_TYPE_LSM) {
6720
+ load_attr.attach_btf_id = prog->attach_btf_id;
6721
+ } else if (prog->type == BPF_PROG_TYPE_TRACING ||
6722
+ prog->type == BPF_PROG_TYPE_EXT) {
6723
+ load_attr.attach_prog_fd = prog->attach_prog_fd;
6724
+ load_attr.attach_btf_id = prog->attach_btf_id;
6725
+ } else {
6726
+ load_attr.kern_version = kern_version;
6727
+ load_attr.prog_ifindex = prog->prog_ifindex;
6728
+ }
6729
+ /* specify func_info/line_info only if kernel supports them */
6730
+ btf_fd = bpf_object__btf_fd(prog->obj);
6731
+ if (btf_fd >= 0 && kernel_supports(FEAT_BTF_FUNC)) {
6732
+ load_attr.prog_btf_fd = btf_fd;
6733
+ load_attr.func_info = prog->func_info;
6734
+ load_attr.func_info_rec_size = prog->func_info_rec_size;
6735
+ load_attr.func_info_cnt = prog->func_info_cnt;
6736
+ load_attr.line_info = prog->line_info;
6737
+ load_attr.line_info_rec_size = prog->line_info_rec_size;
6738
+ load_attr.line_info_cnt = prog->line_info_cnt;
6739
+ }
6740
+ load_attr.log_level = prog->log_level;
6741
+ load_attr.prog_flags = prog->prog_flags;
13336742
1334
- if (!load_attr.insns || !load_attr.insns_cnt)
1335
- return -EINVAL;
6743
+retry_load:
6744
+ if (log_buf_size) {
6745
+ log_buf = malloc(log_buf_size);
6746
+ if (!log_buf)
6747
+ return -ENOMEM;
13366748
1337
- log_buf = malloc(BPF_LOG_BUF_SIZE);
1338
- if (!log_buf)
1339
- pr_warning("Alloc log buffer for bpf loader error, continue without log\n");
6749
+ *log_buf = 0;
6750
+ }
13406751
1341
- ret = bpf_load_program_xattr(&load_attr, log_buf, BPF_LOG_BUF_SIZE);
6752
+ ret = bpf_load_program_xattr(&load_attr, log_buf, log_buf_size);
13426753
13436754 if (ret >= 0) {
6755
+ if (log_buf && load_attr.log_level)
6756
+ pr_debug("verifier log:\n%s", log_buf);
6757
+
6758
+ if (prog->obj->rodata_map_idx >= 0 &&
6759
+ kernel_supports(FEAT_PROG_BIND_MAP)) {
6760
+ struct bpf_map *rodata_map =
6761
+ &prog->obj->maps[prog->obj->rodata_map_idx];
6762
+
6763
+ if (bpf_prog_bind_map(ret, bpf_map__fd(rodata_map), NULL)) {
6764
+ cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
6765
+ pr_warn("prog '%s': failed to bind .rodata map: %s\n",
6766
+ prog->name, cp);
6767
+ /* Don't fail hard if can't bind rodata. */
6768
+ }
6769
+ }
6770
+
13446771 *pfd = ret;
13456772 ret = 0;
13466773 goto out;
13476774 }
13486775
1349
- ret = -LIBBPF_ERRNO__LOAD;
1350
- cp = str_error(errno, errmsg, sizeof(errmsg));
1351
- pr_warning("load bpf program failed: %s\n", cp);
6776
+ if (!log_buf || errno == ENOSPC) {
6777
+ log_buf_size = max((size_t)BPF_LOG_BUF_SIZE,
6778
+ log_buf_size << 1);
6779
+
6780
+ free(log_buf);
6781
+ goto retry_load;
6782
+ }
6783
+ ret = errno ? -errno : -LIBBPF_ERRNO__LOAD;
6784
+ cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
6785
+ pr_warn("load bpf program failed: %s\n", cp);
6786
+ pr_perm_msg(ret);
13526787
13536788 if (log_buf && log_buf[0] != '\0') {
13546789 ret = -LIBBPF_ERRNO__VERIFY;
1355
- pr_warning("-- BEGIN DUMP LOG ---\n");
1356
- pr_warning("\n%s\n", log_buf);
1357
- pr_warning("-- END LOG --\n");
6790
+ pr_warn("-- BEGIN DUMP LOG ---\n");
6791
+ pr_warn("\n%s\n", log_buf);
6792
+ pr_warn("-- END LOG --\n");
13586793 } else if (load_attr.insns_cnt >= BPF_MAXINSNS) {
1359
- pr_warning("Program too large (%zu insns), at most %d insns\n",
1360
- load_attr.insns_cnt, BPF_MAXINSNS);
6794
+ pr_warn("Program too large (%zu insns), at most %d insns\n",
6795
+ load_attr.insns_cnt, BPF_MAXINSNS);
13616796 ret = -LIBBPF_ERRNO__PROG2BIG;
1362
- } else {
6797
+ } else if (load_attr.prog_type != BPF_PROG_TYPE_KPROBE) {
13636798 /* Wrong program type? */
1364
- if (load_attr.prog_type != BPF_PROG_TYPE_KPROBE) {
1365
- int fd;
6799
+ int fd;
13666800
1367
- load_attr.prog_type = BPF_PROG_TYPE_KPROBE;
1368
- load_attr.expected_attach_type = 0;
1369
- fd = bpf_load_program_xattr(&load_attr, NULL, 0);
1370
- if (fd >= 0) {
1371
- close(fd);
1372
- ret = -LIBBPF_ERRNO__PROGTYPE;
1373
- goto out;
1374
- }
6801
+ load_attr.prog_type = BPF_PROG_TYPE_KPROBE;
6802
+ load_attr.expected_attach_type = 0;
6803
+ fd = bpf_load_program_xattr(&load_attr, NULL, 0);
6804
+ if (fd >= 0) {
6805
+ close(fd);
6806
+ ret = -LIBBPF_ERRNO__PROGTYPE;
6807
+ goto out;
13756808 }
1376
-
1377
- if (log_buf)
1378
- ret = -LIBBPF_ERRNO__KVER;
13796809 }
13806810
13816811 out:
....@@ -1383,22 +6813,36 @@
13836813 return ret;
13846814 }
13856815
1386
-static int
1387
-bpf_program__load(struct bpf_program *prog,
1388
- char *license, u32 kern_version)
6816
+static int libbpf_find_attach_btf_id(struct bpf_program *prog);
6817
+
6818
+int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
13896819 {
1390
- int err = 0, fd, i;
6820
+ int err = 0, fd, i, btf_id;
6821
+
6822
+ if (prog->obj->loaded) {
6823
+ pr_warn("prog '%s': can't load after object was loaded\n", prog->name);
6824
+ return -EINVAL;
6825
+ }
6826
+
6827
+ if ((prog->type == BPF_PROG_TYPE_TRACING ||
6828
+ prog->type == BPF_PROG_TYPE_LSM ||
6829
+ prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) {
6830
+ btf_id = libbpf_find_attach_btf_id(prog);
6831
+ if (btf_id <= 0)
6832
+ return btf_id;
6833
+ prog->attach_btf_id = btf_id;
6834
+ }
13916835
13926836 if (prog->instances.nr < 0 || !prog->instances.fds) {
13936837 if (prog->preprocessor) {
1394
- pr_warning("Internal error: can't load program '%s'\n",
1395
- prog->section_name);
6838
+ pr_warn("Internal error: can't load program '%s'\n",
6839
+ prog->name);
13966840 return -LIBBPF_ERRNO__INTERNAL;
13976841 }
13986842
13996843 prog->instances.fds = malloc(sizeof(int));
14006844 if (!prog->instances.fds) {
1401
- pr_warning("Not enough memory for BPF fds\n");
6845
+ pr_warn("Not enough memory for BPF fds\n");
14026846 return -ENOMEM;
14036847 }
14046848 prog->instances.nr = 1;
....@@ -1407,13 +6851,11 @@
14076851
14086852 if (!prog->preprocessor) {
14096853 if (prog->instances.nr != 1) {
1410
- pr_warning("Program '%s' is inconsistent: nr(%d) != 1\n",
1411
- prog->section_name, prog->instances.nr);
6854
+ pr_warn("prog '%s': inconsistent nr(%d) != 1\n",
6855
+ prog->name, prog->instances.nr);
14126856 }
1413
- err = load_program(prog->type, prog->expected_attach_type,
1414
- prog->name, prog->insns, prog->insns_cnt,
1415
- license, kern_version, &fd,
1416
- prog->prog_ifindex);
6857
+ err = load_program(prog, prog->insns, prog->insns_cnt,
6858
+ license, kern_ver, &fd);
14176859 if (!err)
14186860 prog->instances.fds[0] = fd;
14196861 goto out;
....@@ -1423,33 +6865,29 @@
14236865 struct bpf_prog_prep_result result;
14246866 bpf_program_prep_t preprocessor = prog->preprocessor;
14256867
1426
- bzero(&result, sizeof(result));
6868
+ memset(&result, 0, sizeof(result));
14276869 err = preprocessor(prog, i, prog->insns,
14286870 prog->insns_cnt, &result);
14296871 if (err) {
1430
- pr_warning("Preprocessing the %dth instance of program '%s' failed\n",
1431
- i, prog->section_name);
6872
+ pr_warn("Preprocessing the %dth instance of program '%s' failed\n",
6873
+ i, prog->name);
14326874 goto out;
14336875 }
14346876
14356877 if (!result.new_insn_ptr || !result.new_insn_cnt) {
14366878 pr_debug("Skip loading the %dth instance of program '%s'\n",
1437
- i, prog->section_name);
6879
+ i, prog->name);
14386880 prog->instances.fds[i] = -1;
14396881 if (result.pfd)
14406882 *result.pfd = -1;
14416883 continue;
14426884 }
14436885
1444
- err = load_program(prog->type, prog->expected_attach_type,
1445
- prog->name, result.new_insn_ptr,
1446
- result.new_insn_cnt,
1447
- license, kern_version, &fd,
1448
- prog->prog_ifindex);
1449
-
6886
+ err = load_program(prog, result.new_insn_ptr,
6887
+ result.new_insn_cnt, license, kern_ver, &fd);
14506888 if (err) {
1451
- pr_warning("Loading the %dth instance of program '%s' failed\n",
1452
- i, prog->section_name);
6889
+ pr_warn("Loading the %dth instance of program '%s' failed\n",
6890
+ i, prog->name);
14536891 goto out;
14546892 }
14556893
....@@ -1459,117 +6897,140 @@
14596897 }
14606898 out:
14616899 if (err)
1462
- pr_warning("failed to load program '%s'\n",
1463
- prog->section_name);
6900
+ pr_warn("failed to load program '%s'\n", prog->name);
14646901 zfree(&prog->insns);
14656902 prog->insns_cnt = 0;
14666903 return err;
14676904 }
14686905
1469
-static bool bpf_program__is_function_storage(struct bpf_program *prog,
1470
- struct bpf_object *obj)
1471
-{
1472
- return prog->idx == obj->efile.text_shndx && obj->has_pseudo_calls;
1473
-}
1474
-
14756906 static int
1476
-bpf_object__load_progs(struct bpf_object *obj)
6907
+bpf_object__load_progs(struct bpf_object *obj, int log_level)
14776908 {
6909
+ struct bpf_program *prog;
14786910 size_t i;
14796911 int err;
14806912
14816913 for (i = 0; i < obj->nr_programs; i++) {
1482
- if (bpf_program__is_function_storage(&obj->programs[i], obj))
6914
+ prog = &obj->programs[i];
6915
+ err = bpf_object__sanitize_prog(obj, prog);
6916
+ if (err)
6917
+ return err;
6918
+ }
6919
+
6920
+ for (i = 0; i < obj->nr_programs; i++) {
6921
+ prog = &obj->programs[i];
6922
+ if (prog_is_subprog(obj, prog))
14836923 continue;
1484
- err = bpf_program__load(&obj->programs[i],
1485
- obj->license,
1486
- obj->kern_version);
6924
+ if (!prog->load) {
6925
+ pr_debug("prog '%s': skipped loading\n", prog->name);
6926
+ continue;
6927
+ }
6928
+ prog->log_level |= log_level;
6929
+ err = bpf_program__load(prog, obj->license, obj->kern_version);
14876930 if (err)
14886931 return err;
14896932 }
14906933 return 0;
14916934 }
14926935
1493
-static bool bpf_prog_type__needs_kver(enum bpf_prog_type type)
1494
-{
1495
- switch (type) {
1496
- case BPF_PROG_TYPE_SOCKET_FILTER:
1497
- case BPF_PROG_TYPE_SCHED_CLS:
1498
- case BPF_PROG_TYPE_SCHED_ACT:
1499
- case BPF_PROG_TYPE_XDP:
1500
- case BPF_PROG_TYPE_CGROUP_SKB:
1501
- case BPF_PROG_TYPE_CGROUP_SOCK:
1502
- case BPF_PROG_TYPE_LWT_IN:
1503
- case BPF_PROG_TYPE_LWT_OUT:
1504
- case BPF_PROG_TYPE_LWT_XMIT:
1505
- case BPF_PROG_TYPE_LWT_SEG6LOCAL:
1506
- case BPF_PROG_TYPE_SOCK_OPS:
1507
- case BPF_PROG_TYPE_SK_SKB:
1508
- case BPF_PROG_TYPE_CGROUP_DEVICE:
1509
- case BPF_PROG_TYPE_SK_MSG:
1510
- case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
1511
- case BPF_PROG_TYPE_LIRC_MODE2:
1512
- case BPF_PROG_TYPE_SK_REUSEPORT:
1513
- return false;
1514
- case BPF_PROG_TYPE_UNSPEC:
1515
- case BPF_PROG_TYPE_KPROBE:
1516
- case BPF_PROG_TYPE_TRACEPOINT:
1517
- case BPF_PROG_TYPE_PERF_EVENT:
1518
- case BPF_PROG_TYPE_RAW_TRACEPOINT:
1519
- default:
1520
- return true;
1521
- }
1522
-}
1523
-
1524
-static int bpf_object__validate(struct bpf_object *obj, bool needs_kver)
1525
-{
1526
- if (needs_kver && obj->kern_version == 0) {
1527
- pr_warning("%s doesn't provide kernel version\n",
1528
- obj->path);
1529
- return -LIBBPF_ERRNO__KVERSION;
1530
- }
1531
- return 0;
1532
-}
6936
+static const struct bpf_sec_def *find_sec_def(const char *sec_name);
15336937
15346938 static struct bpf_object *
1535
-__bpf_object__open(const char *path, void *obj_buf, size_t obj_buf_sz,
1536
- bool needs_kver)
6939
+__bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
6940
+ const struct bpf_object_open_opts *opts)
15376941 {
6942
+ const char *obj_name, *kconfig;
6943
+ struct bpf_program *prog;
15386944 struct bpf_object *obj;
6945
+ char tmp_name[64];
15396946 int err;
15406947
15416948 if (elf_version(EV_CURRENT) == EV_NONE) {
1542
- pr_warning("failed to init libelf for %s\n", path);
6949
+ pr_warn("failed to init libelf for %s\n",
6950
+ path ? : "(mem buf)");
15436951 return ERR_PTR(-LIBBPF_ERRNO__LIBELF);
15446952 }
15456953
1546
- obj = bpf_object__new(path, obj_buf, obj_buf_sz);
6954
+ if (!OPTS_VALID(opts, bpf_object_open_opts))
6955
+ return ERR_PTR(-EINVAL);
6956
+
6957
+ obj_name = OPTS_GET(opts, object_name, NULL);
6958
+ if (obj_buf) {
6959
+ if (!obj_name) {
6960
+ snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx",
6961
+ (unsigned long)obj_buf,
6962
+ (unsigned long)obj_buf_sz);
6963
+ obj_name = tmp_name;
6964
+ }
6965
+ path = obj_name;
6966
+ pr_debug("loading object '%s' from buffer\n", obj_name);
6967
+ }
6968
+
6969
+ obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name);
15476970 if (IS_ERR(obj))
15486971 return obj;
15496972
1550
- CHECK_ERR(bpf_object__elf_init(obj), err, out);
1551
- CHECK_ERR(bpf_object__check_endianness(obj), err, out);
1552
- CHECK_ERR(bpf_object__elf_collect(obj), err, out);
1553
- CHECK_ERR(bpf_object__collect_reloc(obj), err, out);
1554
- CHECK_ERR(bpf_object__validate(obj, needs_kver), err, out);
6973
+ kconfig = OPTS_GET(opts, kconfig, NULL);
6974
+ if (kconfig) {
6975
+ obj->kconfig = strdup(kconfig);
6976
+ if (!obj->kconfig) {
6977
+ err = -ENOMEM;
6978
+ goto out;
6979
+ }
6980
+ }
15556981
6982
+ err = bpf_object__elf_init(obj);
6983
+ err = err ? : bpf_object__check_endianness(obj);
6984
+ err = err ? : bpf_object__elf_collect(obj);
6985
+ err = err ? : bpf_object__collect_externs(obj);
6986
+ err = err ? : bpf_object__finalize_btf(obj);
6987
+ err = err ? : bpf_object__init_maps(obj, opts);
6988
+ err = err ? : bpf_object__collect_relos(obj);
6989
+ if (err)
6990
+ goto out;
15566991 bpf_object__elf_finish(obj);
6992
+
6993
+ bpf_object__for_each_program(prog, obj) {
6994
+ prog->sec_def = find_sec_def(prog->sec_name);
6995
+ if (!prog->sec_def)
6996
+ /* couldn't guess, but user might manually specify */
6997
+ continue;
6998
+
6999
+ if (prog->sec_def->is_sleepable)
7000
+ prog->prog_flags |= BPF_F_SLEEPABLE;
7001
+ bpf_program__set_type(prog, prog->sec_def->prog_type);
7002
+ bpf_program__set_expected_attach_type(prog,
7003
+ prog->sec_def->expected_attach_type);
7004
+
7005
+ if (prog->sec_def->prog_type == BPF_PROG_TYPE_TRACING ||
7006
+ prog->sec_def->prog_type == BPF_PROG_TYPE_EXT)
7007
+ prog->attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0);
7008
+ }
7009
+
15577010 return obj;
15587011 out:
15597012 bpf_object__close(obj);
15607013 return ERR_PTR(err);
15617014 }
15627015
1563
-struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr)
7016
+static struct bpf_object *
7017
+__bpf_object__open_xattr(struct bpf_object_open_attr *attr, int flags)
15647018 {
7019
+ DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
7020
+ .relaxed_maps = flags & MAPS_RELAX_COMPAT,
7021
+ );
7022
+
15657023 /* param validation */
15667024 if (!attr->file)
15677025 return NULL;
15687026
15697027 pr_debug("loading %s\n", attr->file);
7028
+ return __bpf_object__open(attr->file, NULL, 0, &opts);
7029
+}
15707030
1571
- return __bpf_object__open(attr->file, NULL, 0,
1572
- bpf_prog_type__needs_kver(attr->prog_type));
7031
+struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr)
7032
+{
7033
+ return __bpf_object__open_xattr(attr, 0);
15737034 }
15747035
15757036 struct bpf_object *bpf_object__open(const char *path)
....@@ -1582,27 +7043,42 @@
15827043 return bpf_object__open_xattr(&attr);
15837044 }
15847045
1585
-struct bpf_object *bpf_object__open_buffer(void *obj_buf,
1586
- size_t obj_buf_sz,
1587
- const char *name)
7046
+struct bpf_object *
7047
+bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts)
15887048 {
1589
- char tmp_name[64];
7049
+ if (!path)
7050
+ return ERR_PTR(-EINVAL);
15907051
1591
- /* param validation */
1592
- if (!obj_buf || obj_buf_sz <= 0)
7052
+ pr_debug("loading %s\n", path);
7053
+
7054
+ return __bpf_object__open(path, NULL, 0, opts);
7055
+}
7056
+
7057
+struct bpf_object *
7058
+bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
7059
+ const struct bpf_object_open_opts *opts)
7060
+{
7061
+ if (!obj_buf || obj_buf_sz == 0)
7062
+ return ERR_PTR(-EINVAL);
7063
+
7064
+ return __bpf_object__open(NULL, obj_buf, obj_buf_sz, opts);
7065
+}
7066
+
7067
+struct bpf_object *
7068
+bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz,
7069
+ const char *name)
7070
+{
7071
+ DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
7072
+ .object_name = name,
7073
+ /* wrong default, but backwards-compatible */
7074
+ .relaxed_maps = true,
7075
+ );
7076
+
7077
+ /* returning NULL is wrong, but backwards-compatible */
7078
+ if (!obj_buf || obj_buf_sz == 0)
15937079 return NULL;
15947080
1595
- if (!name) {
1596
- snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx",
1597
- (unsigned long)obj_buf,
1598
- (unsigned long)obj_buf_sz);
1599
- tmp_name[sizeof(tmp_name) - 1] = '\0';
1600
- name = tmp_name;
1601
- }
1602
- pr_debug("loading object '%s' from buffer\n",
1603
- name);
1604
-
1605
- return __bpf_object__open(name, obj_buf, obj_buf_sz, true);
7081
+ return bpf_object__open_mem(obj_buf, obj_buf_sz, &opts);
16067082 }
16077083
16087084 int bpf_object__unload(struct bpf_object *obj)
....@@ -1612,8 +7088,11 @@
16127088 if (!obj)
16137089 return -EINVAL;
16147090
1615
- for (i = 0; i < obj->nr_maps; i++)
7091
+ for (i = 0; i < obj->nr_maps; i++) {
16167092 zclose(obj->maps[i].fd);
7093
+ if (obj->maps[i].st_ops)
7094
+ zfree(&obj->maps[i].st_ops->kern_vdata);
7095
+ }
16177096
16187097 for (i = 0; i < obj->nr_programs; i++)
16197098 bpf_program__unload(&obj->programs[i]);
....@@ -1621,28 +7100,294 @@
16217100 return 0;
16227101 }
16237102
1624
-int bpf_object__load(struct bpf_object *obj)
7103
+static int bpf_object__sanitize_maps(struct bpf_object *obj)
16257104 {
1626
- int err;
7105
+ struct bpf_map *m;
16277106
7107
+ bpf_object__for_each_map(m, obj) {
7108
+ if (!bpf_map__is_internal(m))
7109
+ continue;
7110
+ if (!kernel_supports(FEAT_GLOBAL_DATA)) {
7111
+ pr_warn("kernel doesn't support global data\n");
7112
+ return -ENOTSUP;
7113
+ }
7114
+ if (!kernel_supports(FEAT_ARRAY_MMAP))
7115
+ m->def.map_flags ^= BPF_F_MMAPABLE;
7116
+ }
7117
+
7118
+ return 0;
7119
+}
7120
+
7121
+static int bpf_object__read_kallsyms_file(struct bpf_object *obj)
7122
+{
7123
+ char sym_type, sym_name[500];
7124
+ unsigned long long sym_addr;
7125
+ struct extern_desc *ext;
7126
+ int ret, err = 0;
7127
+ FILE *f;
7128
+
7129
+ f = fopen("/proc/kallsyms", "r");
7130
+ if (!f) {
7131
+ err = -errno;
7132
+ pr_warn("failed to open /proc/kallsyms: %d\n", err);
7133
+ return err;
7134
+ }
7135
+
7136
+ while (true) {
7137
+ ret = fscanf(f, "%llx %c %499s%*[^\n]\n",
7138
+ &sym_addr, &sym_type, sym_name);
7139
+ if (ret == EOF && feof(f))
7140
+ break;
7141
+ if (ret != 3) {
7142
+ pr_warn("failed to read kallsyms entry: %d\n", ret);
7143
+ err = -EINVAL;
7144
+ goto out;
7145
+ }
7146
+
7147
+ ext = find_extern_by_name(obj, sym_name);
7148
+ if (!ext || ext->type != EXT_KSYM)
7149
+ continue;
7150
+
7151
+ if (ext->is_set && ext->ksym.addr != sym_addr) {
7152
+ pr_warn("extern (ksym) '%s' resolution is ambiguous: 0x%llx or 0x%llx\n",
7153
+ sym_name, ext->ksym.addr, sym_addr);
7154
+ err = -EINVAL;
7155
+ goto out;
7156
+ }
7157
+ if (!ext->is_set) {
7158
+ ext->is_set = true;
7159
+ ext->ksym.addr = sym_addr;
7160
+ pr_debug("extern (ksym) %s=0x%llx\n", sym_name, sym_addr);
7161
+ }
7162
+ }
7163
+
7164
+out:
7165
+ fclose(f);
7166
+ return err;
7167
+}
7168
+
7169
+static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj)
7170
+{
7171
+ struct extern_desc *ext;
7172
+ int i, id;
7173
+
7174
+ for (i = 0; i < obj->nr_extern; i++) {
7175
+ const struct btf_type *targ_var, *targ_type;
7176
+ __u32 targ_type_id, local_type_id;
7177
+ const char *targ_var_name;
7178
+ int ret;
7179
+
7180
+ ext = &obj->externs[i];
7181
+ if (ext->type != EXT_KSYM || !ext->ksym.type_id)
7182
+ continue;
7183
+
7184
+ id = btf__find_by_name_kind(obj->btf_vmlinux, ext->name,
7185
+ BTF_KIND_VAR);
7186
+ if (id <= 0) {
7187
+ pr_warn("extern (ksym) '%s': failed to find BTF ID in vmlinux BTF.\n",
7188
+ ext->name);
7189
+ return -ESRCH;
7190
+ }
7191
+
7192
+ /* find local type_id */
7193
+ local_type_id = ext->ksym.type_id;
7194
+
7195
+ /* find target type_id */
7196
+ targ_var = btf__type_by_id(obj->btf_vmlinux, id);
7197
+ targ_var_name = btf__name_by_offset(obj->btf_vmlinux,
7198
+ targ_var->name_off);
7199
+ targ_type = skip_mods_and_typedefs(obj->btf_vmlinux,
7200
+ targ_var->type,
7201
+ &targ_type_id);
7202
+
7203
+ ret = bpf_core_types_are_compat(obj->btf, local_type_id,
7204
+ obj->btf_vmlinux, targ_type_id);
7205
+ if (ret <= 0) {
7206
+ const struct btf_type *local_type;
7207
+ const char *targ_name, *local_name;
7208
+
7209
+ local_type = btf__type_by_id(obj->btf, local_type_id);
7210
+ local_name = btf__name_by_offset(obj->btf,
7211
+ local_type->name_off);
7212
+ targ_name = btf__name_by_offset(obj->btf_vmlinux,
7213
+ targ_type->name_off);
7214
+
7215
+ pr_warn("extern (ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n",
7216
+ ext->name, local_type_id,
7217
+ btf_kind_str(local_type), local_name, targ_type_id,
7218
+ btf_kind_str(targ_type), targ_name);
7219
+ return -EINVAL;
7220
+ }
7221
+
7222
+ ext->is_set = true;
7223
+ ext->ksym.vmlinux_btf_id = id;
7224
+ pr_debug("extern (ksym) '%s': resolved to [%d] %s %s\n",
7225
+ ext->name, id, btf_kind_str(targ_var), targ_var_name);
7226
+ }
7227
+ return 0;
7228
+}
7229
+
7230
+static int bpf_object__resolve_externs(struct bpf_object *obj,
7231
+ const char *extra_kconfig)
7232
+{
7233
+ bool need_config = false, need_kallsyms = false;
7234
+ bool need_vmlinux_btf = false;
7235
+ struct extern_desc *ext;
7236
+ void *kcfg_data = NULL;
7237
+ int err, i;
7238
+
7239
+ if (obj->nr_extern == 0)
7240
+ return 0;
7241
+
7242
+ if (obj->kconfig_map_idx >= 0)
7243
+ kcfg_data = obj->maps[obj->kconfig_map_idx].mmaped;
7244
+
7245
+ for (i = 0; i < obj->nr_extern; i++) {
7246
+ ext = &obj->externs[i];
7247
+
7248
+ if (ext->type == EXT_KCFG &&
7249
+ strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) {
7250
+ void *ext_val = kcfg_data + ext->kcfg.data_off;
7251
+ __u32 kver = get_kernel_version();
7252
+
7253
+ if (!kver) {
7254
+ pr_warn("failed to get kernel version\n");
7255
+ return -EINVAL;
7256
+ }
7257
+ err = set_kcfg_value_num(ext, ext_val, kver);
7258
+ if (err)
7259
+ return err;
7260
+ pr_debug("extern (kcfg) %s=0x%x\n", ext->name, kver);
7261
+ } else if (ext->type == EXT_KCFG &&
7262
+ strncmp(ext->name, "CONFIG_", 7) == 0) {
7263
+ need_config = true;
7264
+ } else if (ext->type == EXT_KSYM) {
7265
+ if (ext->ksym.type_id)
7266
+ need_vmlinux_btf = true;
7267
+ else
7268
+ need_kallsyms = true;
7269
+ } else {
7270
+ pr_warn("unrecognized extern '%s'\n", ext->name);
7271
+ return -EINVAL;
7272
+ }
7273
+ }
7274
+ if (need_config && extra_kconfig) {
7275
+ err = bpf_object__read_kconfig_mem(obj, extra_kconfig, kcfg_data);
7276
+ if (err)
7277
+ return -EINVAL;
7278
+ need_config = false;
7279
+ for (i = 0; i < obj->nr_extern; i++) {
7280
+ ext = &obj->externs[i];
7281
+ if (ext->type == EXT_KCFG && !ext->is_set) {
7282
+ need_config = true;
7283
+ break;
7284
+ }
7285
+ }
7286
+ }
7287
+ if (need_config) {
7288
+ err = bpf_object__read_kconfig_file(obj, kcfg_data);
7289
+ if (err)
7290
+ return -EINVAL;
7291
+ }
7292
+ if (need_kallsyms) {
7293
+ err = bpf_object__read_kallsyms_file(obj);
7294
+ if (err)
7295
+ return -EINVAL;
7296
+ }
7297
+ if (need_vmlinux_btf) {
7298
+ err = bpf_object__resolve_ksyms_btf_id(obj);
7299
+ if (err)
7300
+ return -EINVAL;
7301
+ }
7302
+ for (i = 0; i < obj->nr_extern; i++) {
7303
+ ext = &obj->externs[i];
7304
+
7305
+ if (!ext->is_set && !ext->is_weak) {
7306
+ pr_warn("extern %s (strong) not resolved\n", ext->name);
7307
+ return -ESRCH;
7308
+ } else if (!ext->is_set) {
7309
+ pr_debug("extern %s (weak) not resolved, defaulting to zero\n",
7310
+ ext->name);
7311
+ }
7312
+ }
7313
+
7314
+ return 0;
7315
+}
7316
+
7317
+int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
7318
+{
7319
+ struct bpf_object *obj;
7320
+ int err, i;
7321
+
7322
+ if (!attr)
7323
+ return -EINVAL;
7324
+ obj = attr->obj;
16287325 if (!obj)
16297326 return -EINVAL;
16307327
16317328 if (obj->loaded) {
1632
- pr_warning("object should not be loaded twice\n");
7329
+ pr_warn("object '%s': load can't be attempted twice\n", obj->name);
16337330 return -EINVAL;
16347331 }
16357332
1636
- obj->loaded = true;
7333
+ err = bpf_object__probe_loading(obj);
7334
+ err = err ? : bpf_object__load_vmlinux_btf(obj);
7335
+ err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
7336
+ err = err ? : bpf_object__sanitize_and_load_btf(obj);
7337
+ err = err ? : bpf_object__sanitize_maps(obj);
7338
+ err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
7339
+ err = err ? : bpf_object__create_maps(obj);
7340
+ err = err ? : bpf_object__relocate(obj, attr->target_btf_path);
7341
+ err = err ? : bpf_object__load_progs(obj, attr->log_level);
16377342
1638
- CHECK_ERR(bpf_object__create_maps(obj), err, out);
1639
- CHECK_ERR(bpf_object__relocate(obj), err, out);
1640
- CHECK_ERR(bpf_object__load_progs(obj), err, out);
7343
+ btf__free(obj->btf_vmlinux);
7344
+ obj->btf_vmlinux = NULL;
7345
+
7346
+ obj->loaded = true; /* doesn't matter if successfully or not */
7347
+
7348
+ if (err)
7349
+ goto out;
16417350
16427351 return 0;
16437352 out:
7353
+ /* unpin any maps that were auto-pinned during load */
7354
+ for (i = 0; i < obj->nr_maps; i++)
7355
+ if (obj->maps[i].pinned && !obj->maps[i].reused)
7356
+ bpf_map__unpin(&obj->maps[i], NULL);
7357
+
16447358 bpf_object__unload(obj);
1645
- pr_warning("failed to load object '%s'\n", obj->path);
7359
+ pr_warn("failed to load object '%s'\n", obj->path);
7360
+ return err;
7361
+}
7362
+
7363
+int bpf_object__load(struct bpf_object *obj)
7364
+{
7365
+ struct bpf_object_load_attr attr = {
7366
+ .obj = obj,
7367
+ };
7368
+
7369
+ return bpf_object__load_xattr(&attr);
7370
+}
7371
+
7372
+static int make_parent_dir(const char *path)
7373
+{
7374
+ char *cp, errmsg[STRERR_BUFSIZE];
7375
+ char *dname, *dir;
7376
+ int err = 0;
7377
+
7378
+ dname = strdup(path);
7379
+ if (dname == NULL)
7380
+ return -ENOMEM;
7381
+
7382
+ dir = dirname(dname);
7383
+ if (mkdir(dir, 0700) && errno != EEXIST)
7384
+ err = -errno;
7385
+
7386
+ free(dname);
7387
+ if (err) {
7388
+ cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
7389
+ pr_warn("failed to mkdir %s: %s\n", path, cp);
7390
+ }
16467391 return err;
16477392 }
16487393
....@@ -1662,14 +7407,14 @@
16627407
16637408 dir = dirname(dname);
16647409 if (statfs(dir, &st_fs)) {
1665
- cp = str_error(errno, errmsg, sizeof(errmsg));
1666
- pr_warning("failed to statfs %s: %s\n", dir, cp);
7410
+ cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
7411
+ pr_warn("failed to statfs %s: %s\n", dir, cp);
16677412 err = -errno;
16687413 }
16697414 free(dname);
16707415
16717416 if (!err && st_fs.f_type != BPF_FS_MAGIC) {
1672
- pr_warning("specified path %s is not on BPF FS\n", path);
7417
+ pr_warn("specified path %s is not on BPF FS\n", path);
16737418 err = -EINVAL;
16747419 }
16757420
....@@ -1682,47 +7427,131 @@
16827427 char *cp, errmsg[STRERR_BUFSIZE];
16837428 int err;
16847429
7430
+ err = make_parent_dir(path);
7431
+ if (err)
7432
+ return err;
7433
+
16857434 err = check_path(path);
16867435 if (err)
16877436 return err;
16887437
16897438 if (prog == NULL) {
1690
- pr_warning("invalid program pointer\n");
7439
+ pr_warn("invalid program pointer\n");
16917440 return -EINVAL;
16927441 }
16937442
16947443 if (instance < 0 || instance >= prog->instances.nr) {
1695
- pr_warning("invalid prog instance %d of prog %s (max %d)\n",
1696
- instance, prog->section_name, prog->instances.nr);
7444
+ pr_warn("invalid prog instance %d of prog %s (max %d)\n",
7445
+ instance, prog->name, prog->instances.nr);
16977446 return -EINVAL;
16987447 }
16997448
17007449 if (bpf_obj_pin(prog->instances.fds[instance], path)) {
1701
- cp = str_error(errno, errmsg, sizeof(errmsg));
1702
- pr_warning("failed to pin program: %s\n", cp);
1703
- return -errno;
7450
+ err = -errno;
7451
+ cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
7452
+ pr_warn("failed to pin program: %s\n", cp);
7453
+ return err;
17047454 }
17057455 pr_debug("pinned program '%s'\n", path);
17067456
17077457 return 0;
17087458 }
17097459
1710
-static int make_dir(const char *path)
7460
+int bpf_program__unpin_instance(struct bpf_program *prog, const char *path,
7461
+ int instance)
17117462 {
1712
- char *cp, errmsg[STRERR_BUFSIZE];
1713
- int err = 0;
7463
+ int err;
17147464
1715
- if (mkdir(path, 0700) && errno != EEXIST)
1716
- err = -errno;
7465
+ err = check_path(path);
7466
+ if (err)
7467
+ return err;
17177468
1718
- if (err) {
1719
- cp = str_error(-err, errmsg, sizeof(errmsg));
1720
- pr_warning("failed to mkdir %s: %s\n", path, cp);
7469
+ if (prog == NULL) {
7470
+ pr_warn("invalid program pointer\n");
7471
+ return -EINVAL;
17217472 }
1722
- return err;
7473
+
7474
+ if (instance < 0 || instance >= prog->instances.nr) {
7475
+ pr_warn("invalid prog instance %d of prog %s (max %d)\n",
7476
+ instance, prog->name, prog->instances.nr);
7477
+ return -EINVAL;
7478
+ }
7479
+
7480
+ err = unlink(path);
7481
+ if (err != 0)
7482
+ return -errno;
7483
+ pr_debug("unpinned program '%s'\n", path);
7484
+
7485
+ return 0;
17237486 }
17247487
17257488 int bpf_program__pin(struct bpf_program *prog, const char *path)
7489
+{
7490
+ int i, err;
7491
+
7492
+ err = make_parent_dir(path);
7493
+ if (err)
7494
+ return err;
7495
+
7496
+ err = check_path(path);
7497
+ if (err)
7498
+ return err;
7499
+
7500
+ if (prog == NULL) {
7501
+ pr_warn("invalid program pointer\n");
7502
+ return -EINVAL;
7503
+ }
7504
+
7505
+ if (prog->instances.nr <= 0) {
7506
+ pr_warn("no instances of prog %s to pin\n", prog->name);
7507
+ return -EINVAL;
7508
+ }
7509
+
7510
+ if (prog->instances.nr == 1) {
7511
+ /* don't create subdirs when pinning single instance */
7512
+ return bpf_program__pin_instance(prog, path, 0);
7513
+ }
7514
+
7515
+ for (i = 0; i < prog->instances.nr; i++) {
7516
+ char buf[PATH_MAX];
7517
+ int len;
7518
+
7519
+ len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
7520
+ if (len < 0) {
7521
+ err = -EINVAL;
7522
+ goto err_unpin;
7523
+ } else if (len >= PATH_MAX) {
7524
+ err = -ENAMETOOLONG;
7525
+ goto err_unpin;
7526
+ }
7527
+
7528
+ err = bpf_program__pin_instance(prog, buf, i);
7529
+ if (err)
7530
+ goto err_unpin;
7531
+ }
7532
+
7533
+ return 0;
7534
+
7535
+err_unpin:
7536
+ for (i = i - 1; i >= 0; i--) {
7537
+ char buf[PATH_MAX];
7538
+ int len;
7539
+
7540
+ len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
7541
+ if (len < 0)
7542
+ continue;
7543
+ else if (len >= PATH_MAX)
7544
+ continue;
7545
+
7546
+ bpf_program__unpin_instance(prog, buf, i);
7547
+ }
7548
+
7549
+ rmdir(path);
7550
+
7551
+ return err;
7552
+}
7553
+
7554
+int bpf_program__unpin(struct bpf_program *prog, const char *path)
17267555 {
17277556 int i, err;
17287557
....@@ -1731,19 +7560,19 @@
17317560 return err;
17327561
17337562 if (prog == NULL) {
1734
- pr_warning("invalid program pointer\n");
7563
+ pr_warn("invalid program pointer\n");
17357564 return -EINVAL;
17367565 }
17377566
17387567 if (prog->instances.nr <= 0) {
1739
- pr_warning("no instances of prog %s to pin\n",
1740
- prog->section_name);
7568
+ pr_warn("no instances of prog %s to pin\n", prog->name);
17417569 return -EINVAL;
17427570 }
17437571
1744
- err = make_dir(path);
1745
- if (err)
1746
- return err;
7572
+ if (prog->instances.nr == 1) {
7573
+ /* don't create subdirs when pinning single instance */
7574
+ return bpf_program__unpin_instance(prog, path, 0);
7575
+ }
17477576
17487577 for (i = 0; i < prog->instances.nr; i++) {
17497578 char buf[PATH_MAX];
....@@ -1755,10 +7584,14 @@
17557584 else if (len >= PATH_MAX)
17567585 return -ENAMETOOLONG;
17577586
1758
- err = bpf_program__pin_instance(prog, buf, i);
7587
+ err = bpf_program__unpin_instance(prog, buf, i);
17597588 if (err)
17607589 return err;
17617590 }
7591
+
7592
+ err = rmdir(path);
7593
+ if (err)
7594
+ return -errno;
17627595
17637596 return 0;
17647597 }
....@@ -1768,28 +7601,135 @@
17687601 char *cp, errmsg[STRERR_BUFSIZE];
17697602 int err;
17707603
7604
+ if (map == NULL) {
7605
+ pr_warn("invalid map pointer\n");
7606
+ return -EINVAL;
7607
+ }
7608
+
7609
+ if (map->pin_path) {
7610
+ if (path && strcmp(path, map->pin_path)) {
7611
+ pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
7612
+ bpf_map__name(map), map->pin_path, path);
7613
+ return -EINVAL;
7614
+ } else if (map->pinned) {
7615
+ pr_debug("map '%s' already pinned at '%s'; not re-pinning\n",
7616
+ bpf_map__name(map), map->pin_path);
7617
+ return 0;
7618
+ }
7619
+ } else {
7620
+ if (!path) {
7621
+ pr_warn("missing a path to pin map '%s' at\n",
7622
+ bpf_map__name(map));
7623
+ return -EINVAL;
7624
+ } else if (map->pinned) {
7625
+ pr_warn("map '%s' already pinned\n", bpf_map__name(map));
7626
+ return -EEXIST;
7627
+ }
7628
+
7629
+ map->pin_path = strdup(path);
7630
+ if (!map->pin_path) {
7631
+ err = -errno;
7632
+ goto out_err;
7633
+ }
7634
+ }
7635
+
7636
+ err = make_parent_dir(map->pin_path);
7637
+ if (err)
7638
+ return err;
7639
+
7640
+ err = check_path(map->pin_path);
7641
+ if (err)
7642
+ return err;
7643
+
7644
+ if (bpf_obj_pin(map->fd, map->pin_path)) {
7645
+ err = -errno;
7646
+ goto out_err;
7647
+ }
7648
+
7649
+ map->pinned = true;
7650
+ pr_debug("pinned map '%s'\n", map->pin_path);
7651
+
7652
+ return 0;
7653
+
7654
+out_err:
7655
+ cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
7656
+ pr_warn("failed to pin map: %s\n", cp);
7657
+ return err;
7658
+}
7659
+
7660
+int bpf_map__unpin(struct bpf_map *map, const char *path)
7661
+{
7662
+ int err;
7663
+
7664
+ if (map == NULL) {
7665
+ pr_warn("invalid map pointer\n");
7666
+ return -EINVAL;
7667
+ }
7668
+
7669
+ if (map->pin_path) {
7670
+ if (path && strcmp(path, map->pin_path)) {
7671
+ pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
7672
+ bpf_map__name(map), map->pin_path, path);
7673
+ return -EINVAL;
7674
+ }
7675
+ path = map->pin_path;
7676
+ } else if (!path) {
7677
+ pr_warn("no path to unpin map '%s' from\n",
7678
+ bpf_map__name(map));
7679
+ return -EINVAL;
7680
+ }
7681
+
17717682 err = check_path(path);
17727683 if (err)
17737684 return err;
17747685
1775
- if (map == NULL) {
1776
- pr_warning("invalid map pointer\n");
1777
- return -EINVAL;
1778
- }
1779
-
1780
- if (bpf_obj_pin(map->fd, path)) {
1781
- cp = str_error(errno, errmsg, sizeof(errmsg));
1782
- pr_warning("failed to pin map: %s\n", cp);
7686
+ err = unlink(path);
7687
+ if (err != 0)
17837688 return -errno;
1784
- }
17857689
1786
- pr_debug("pinned map '%s'\n", path);
7690
+ map->pinned = false;
7691
+ pr_debug("unpinned map '%s' from '%s'\n", bpf_map__name(map), path);
7692
+
17877693 return 0;
17887694 }
17897695
1790
-int bpf_object__pin(struct bpf_object *obj, const char *path)
7696
+int bpf_map__set_pin_path(struct bpf_map *map, const char *path)
17917697 {
1792
- struct bpf_program *prog;
7698
+ char *new = NULL;
7699
+
7700
+ if (path) {
7701
+ new = strdup(path);
7702
+ if (!new)
7703
+ return -errno;
7704
+ }
7705
+
7706
+ free(map->pin_path);
7707
+ map->pin_path = new;
7708
+ return 0;
7709
+}
7710
+
7711
+const char *bpf_map__get_pin_path(const struct bpf_map *map)
7712
+{
7713
+ return map->pin_path;
7714
+}
7715
+
7716
+bool bpf_map__is_pinned(const struct bpf_map *map)
7717
+{
7718
+ return map->pinned;
7719
+}
7720
+
7721
+static void sanitize_pin_path(char *s)
7722
+{
7723
+ /* bpffs disallows periods in path names */
7724
+ while (*s) {
7725
+ if (*s == '.')
7726
+ *s = '_';
7727
+ s++;
7728
+ }
7729
+}
7730
+
7731
+int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
7732
+{
17937733 struct bpf_map *map;
17947734 int err;
17957735
....@@ -1797,42 +7737,78 @@
17977737 return -ENOENT;
17987738
17997739 if (!obj->loaded) {
1800
- pr_warning("object not yet loaded; load it first\n");
7740
+ pr_warn("object not yet loaded; load it first\n");
18017741 return -ENOENT;
18027742 }
18037743
1804
- err = make_dir(path);
1805
- if (err)
1806
- return err;
1807
-
1808
- bpf_map__for_each(map, obj) {
7744
+ bpf_object__for_each_map(map, obj) {
7745
+ char *pin_path = NULL;
18097746 char buf[PATH_MAX];
1810
- int len;
18117747
1812
- len = snprintf(buf, PATH_MAX, "%s/%s", path,
1813
- bpf_map__name(map));
1814
- if (len < 0)
1815
- return -EINVAL;
1816
- else if (len >= PATH_MAX)
1817
- return -ENAMETOOLONG;
7748
+ if (path) {
7749
+ int len;
18187750
1819
- err = bpf_map__pin(map, buf);
7751
+ len = snprintf(buf, PATH_MAX, "%s/%s", path,
7752
+ bpf_map__name(map));
7753
+ if (len < 0) {
7754
+ err = -EINVAL;
7755
+ goto err_unpin_maps;
7756
+ } else if (len >= PATH_MAX) {
7757
+ err = -ENAMETOOLONG;
7758
+ goto err_unpin_maps;
7759
+ }
7760
+ sanitize_pin_path(buf);
7761
+ pin_path = buf;
7762
+ } else if (!map->pin_path) {
7763
+ continue;
7764
+ }
7765
+
7766
+ err = bpf_map__pin(map, pin_path);
18207767 if (err)
1821
- return err;
7768
+ goto err_unpin_maps;
18227769 }
18237770
1824
- bpf_object__for_each_program(prog, obj) {
7771
+ return 0;
7772
+
7773
+err_unpin_maps:
7774
+ while ((map = bpf_map__prev(map, obj))) {
7775
+ if (!map->pin_path)
7776
+ continue;
7777
+
7778
+ bpf_map__unpin(map, NULL);
7779
+ }
7780
+
7781
+ return err;
7782
+}
7783
+
7784
+int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
7785
+{
7786
+ struct bpf_map *map;
7787
+ int err;
7788
+
7789
+ if (!obj)
7790
+ return -ENOENT;
7791
+
7792
+ bpf_object__for_each_map(map, obj) {
7793
+ char *pin_path = NULL;
18257794 char buf[PATH_MAX];
1826
- int len;
18277795
1828
- len = snprintf(buf, PATH_MAX, "%s/%s", path,
1829
- prog->section_name);
1830
- if (len < 0)
1831
- return -EINVAL;
1832
- else if (len >= PATH_MAX)
1833
- return -ENAMETOOLONG;
7796
+ if (path) {
7797
+ int len;
18347798
1835
- err = bpf_program__pin(prog, buf);
7799
+ len = snprintf(buf, PATH_MAX, "%s/%s", path,
7800
+ bpf_map__name(map));
7801
+ if (len < 0)
7802
+ return -EINVAL;
7803
+ else if (len >= PATH_MAX)
7804
+ return -ENAMETOOLONG;
7805
+ sanitize_pin_path(buf);
7806
+ pin_path = buf;
7807
+ } else if (!map->pin_path) {
7808
+ continue;
7809
+ }
7810
+
7811
+ err = bpf_map__unpin(map, pin_path);
18367812 if (err)
18377813 return err;
18387814 }
....@@ -1840,11 +7816,141 @@
18407816 return 0;
18417817 }
18427818
7819
+int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
7820
+{
7821
+ struct bpf_program *prog;
7822
+ int err;
7823
+
7824
+ if (!obj)
7825
+ return -ENOENT;
7826
+
7827
+ if (!obj->loaded) {
7828
+ pr_warn("object not yet loaded; load it first\n");
7829
+ return -ENOENT;
7830
+ }
7831
+
7832
+ bpf_object__for_each_program(prog, obj) {
7833
+ char buf[PATH_MAX];
7834
+ int len;
7835
+
7836
+ len = snprintf(buf, PATH_MAX, "%s/%s", path,
7837
+ prog->pin_name);
7838
+ if (len < 0) {
7839
+ err = -EINVAL;
7840
+ goto err_unpin_programs;
7841
+ } else if (len >= PATH_MAX) {
7842
+ err = -ENAMETOOLONG;
7843
+ goto err_unpin_programs;
7844
+ }
7845
+
7846
+ err = bpf_program__pin(prog, buf);
7847
+ if (err)
7848
+ goto err_unpin_programs;
7849
+ }
7850
+
7851
+ return 0;
7852
+
7853
+err_unpin_programs:
7854
+ while ((prog = bpf_program__prev(prog, obj))) {
7855
+ char buf[PATH_MAX];
7856
+ int len;
7857
+
7858
+ len = snprintf(buf, PATH_MAX, "%s/%s", path,
7859
+ prog->pin_name);
7860
+ if (len < 0)
7861
+ continue;
7862
+ else if (len >= PATH_MAX)
7863
+ continue;
7864
+
7865
+ bpf_program__unpin(prog, buf);
7866
+ }
7867
+
7868
+ return err;
7869
+}
7870
+
7871
+int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
7872
+{
7873
+ struct bpf_program *prog;
7874
+ int err;
7875
+
7876
+ if (!obj)
7877
+ return -ENOENT;
7878
+
7879
+ bpf_object__for_each_program(prog, obj) {
7880
+ char buf[PATH_MAX];
7881
+ int len;
7882
+
7883
+ len = snprintf(buf, PATH_MAX, "%s/%s", path,
7884
+ prog->pin_name);
7885
+ if (len < 0)
7886
+ return -EINVAL;
7887
+ else if (len >= PATH_MAX)
7888
+ return -ENAMETOOLONG;
7889
+
7890
+ err = bpf_program__unpin(prog, buf);
7891
+ if (err)
7892
+ return err;
7893
+ }
7894
+
7895
+ return 0;
7896
+}
7897
+
7898
+int bpf_object__pin(struct bpf_object *obj, const char *path)
7899
+{
7900
+ int err;
7901
+
7902
+ err = bpf_object__pin_maps(obj, path);
7903
+ if (err)
7904
+ return err;
7905
+
7906
+ err = bpf_object__pin_programs(obj, path);
7907
+ if (err) {
7908
+ bpf_object__unpin_maps(obj, path);
7909
+ return err;
7910
+ }
7911
+
7912
+ return 0;
7913
+}
7914
+
7915
+static void bpf_map__destroy(struct bpf_map *map)
7916
+{
7917
+ if (map->clear_priv)
7918
+ map->clear_priv(map, map->priv);
7919
+ map->priv = NULL;
7920
+ map->clear_priv = NULL;
7921
+
7922
+ if (map->inner_map) {
7923
+ bpf_map__destroy(map->inner_map);
7924
+ zfree(&map->inner_map);
7925
+ }
7926
+
7927
+ zfree(&map->init_slots);
7928
+ map->init_slots_sz = 0;
7929
+
7930
+ if (map->mmaped) {
7931
+ munmap(map->mmaped, bpf_map_mmap_sz(map));
7932
+ map->mmaped = NULL;
7933
+ }
7934
+
7935
+ if (map->st_ops) {
7936
+ zfree(&map->st_ops->data);
7937
+ zfree(&map->st_ops->progs);
7938
+ zfree(&map->st_ops->kern_func_off);
7939
+ zfree(&map->st_ops);
7940
+ }
7941
+
7942
+ zfree(&map->name);
7943
+ zfree(&map->pin_path);
7944
+
7945
+ if (map->fd >= 0)
7946
+ zclose(map->fd);
7947
+}
7948
+
18437949 void bpf_object__close(struct bpf_object *obj)
18447950 {
18457951 size_t i;
18467952
1847
- if (!obj)
7953
+ if (IS_ERR_OR_NULL(obj))
18487954 return;
18497955
18507956 if (obj->clear_priv)
....@@ -1853,15 +7959,15 @@
18537959 bpf_object__elf_finish(obj);
18547960 bpf_object__unload(obj);
18557961 btf__free(obj->btf);
7962
+ btf_ext__free(obj->btf_ext);
18567963
1857
- for (i = 0; i < obj->nr_maps; i++) {
1858
- zfree(&obj->maps[i].name);
1859
- if (obj->maps[i].clear_priv)
1860
- obj->maps[i].clear_priv(&obj->maps[i],
1861
- obj->maps[i].priv);
1862
- obj->maps[i].priv = NULL;
1863
- obj->maps[i].clear_priv = NULL;
1864
- }
7964
+ for (i = 0; i < obj->nr_maps; i++)
7965
+ bpf_map__destroy(&obj->maps[i]);
7966
+
7967
+ zfree(&obj->kconfig);
7968
+ zfree(&obj->externs);
7969
+ obj->nr_extern = 0;
7970
+
18657971 zfree(&obj->maps);
18667972 obj->nr_maps = 0;
18677973
....@@ -1894,14 +8000,19 @@
18948000 return next;
18958001 }
18968002
1897
-const char *bpf_object__name(struct bpf_object *obj)
8003
+const char *bpf_object__name(const struct bpf_object *obj)
18988004 {
1899
- return obj ? obj->path : ERR_PTR(-EINVAL);
8005
+ return obj ? obj->name : ERR_PTR(-EINVAL);
19008006 }
19018007
1902
-unsigned int bpf_object__kversion(struct bpf_object *obj)
8008
+unsigned int bpf_object__kversion(const struct bpf_object *obj)
19038009 {
19048010 return obj ? obj->kern_version : 0;
8011
+}
8012
+
8013
+struct btf *bpf_object__btf(const struct bpf_object *obj)
8014
+{
8015
+ return obj ? obj->btf : NULL;
19058016 }
19068017
19078018 int bpf_object__btf_fd(const struct bpf_object *obj)
....@@ -1920,41 +8031,57 @@
19208031 return 0;
19218032 }
19228033
1923
-void *bpf_object__priv(struct bpf_object *obj)
8034
+void *bpf_object__priv(const struct bpf_object *obj)
19248035 {
19258036 return obj ? obj->priv : ERR_PTR(-EINVAL);
19268037 }
19278038
19288039 static struct bpf_program *
1929
-__bpf_program__next(struct bpf_program *prev, struct bpf_object *obj)
8040
+__bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj,
8041
+ bool forward)
19308042 {
1931
- size_t idx;
8043
+ size_t nr_programs = obj->nr_programs;
8044
+ ssize_t idx;
19328045
1933
- if (!obj->programs)
8046
+ if (!nr_programs)
19348047 return NULL;
1935
- /* First handler */
1936
- if (prev == NULL)
1937
- return &obj->programs[0];
19388048
1939
- if (prev->obj != obj) {
1940
- pr_warning("error: program handler doesn't match object\n");
8049
+ if (!p)
8050
+ /* Iter from the beginning */
8051
+ return forward ? &obj->programs[0] :
8052
+ &obj->programs[nr_programs - 1];
8053
+
8054
+ if (p->obj != obj) {
8055
+ pr_warn("error: program handler doesn't match object\n");
19418056 return NULL;
19428057 }
19438058
1944
- idx = (prev - obj->programs) + 1;
1945
- if (idx >= obj->nr_programs)
8059
+ idx = (p - obj->programs) + (forward ? 1 : -1);
8060
+ if (idx >= obj->nr_programs || idx < 0)
19468061 return NULL;
19478062 return &obj->programs[idx];
19488063 }
19498064
19508065 struct bpf_program *
1951
-bpf_program__next(struct bpf_program *prev, struct bpf_object *obj)
8066
+bpf_program__next(struct bpf_program *prev, const struct bpf_object *obj)
19528067 {
19538068 struct bpf_program *prog = prev;
19548069
19558070 do {
1956
- prog = __bpf_program__next(prog, obj);
1957
- } while (prog && bpf_program__is_function_storage(prog, obj));
8071
+ prog = __bpf_program__iter(prog, obj, true);
8072
+ } while (prog && prog_is_subprog(obj, prog));
8073
+
8074
+ return prog;
8075
+}
8076
+
8077
+struct bpf_program *
8078
+bpf_program__prev(struct bpf_program *next, const struct bpf_object *obj)
8079
+{
8080
+ struct bpf_program *prog = next;
8081
+
8082
+ do {
8083
+ prog = __bpf_program__iter(prog, obj, false);
8084
+ } while (prog && prog_is_subprog(obj, prog));
19588085
19598086 return prog;
19608087 }
....@@ -1970,7 +8097,7 @@
19708097 return 0;
19718098 }
19728099
1973
-void *bpf_program__priv(struct bpf_program *prog)
8100
+void *bpf_program__priv(const struct bpf_program *prog)
19748101 {
19758102 return prog ? prog->priv : ERR_PTR(-EINVAL);
19768103 }
....@@ -1980,15 +8107,25 @@
19808107 prog->prog_ifindex = ifindex;
19818108 }
19828109
1983
-const char *bpf_program__title(struct bpf_program *prog, bool needs_copy)
8110
+const char *bpf_program__name(const struct bpf_program *prog)
8111
+{
8112
+ return prog->name;
8113
+}
8114
+
8115
+const char *bpf_program__section_name(const struct bpf_program *prog)
8116
+{
8117
+ return prog->sec_name;
8118
+}
8119
+
8120
+const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy)
19848121 {
19858122 const char *title;
19868123
1987
- title = prog->section_name;
8124
+ title = prog->sec_name;
19888125 if (needs_copy) {
19898126 title = strdup(title);
19908127 if (!title) {
1991
- pr_warning("failed to strdup program title\n");
8128
+ pr_warn("failed to strdup program title\n");
19928129 return ERR_PTR(-ENOMEM);
19938130 }
19948131 }
....@@ -1996,9 +8133,28 @@
19968133 return title;
19978134 }
19988135
1999
-int bpf_program__fd(struct bpf_program *prog)
8136
+bool bpf_program__autoload(const struct bpf_program *prog)
8137
+{
8138
+ return prog->load;
8139
+}
8140
+
8141
+int bpf_program__set_autoload(struct bpf_program *prog, bool autoload)
8142
+{
8143
+ if (prog->obj->loaded)
8144
+ return -EINVAL;
8145
+
8146
+ prog->load = autoload;
8147
+ return 0;
8148
+}
8149
+
8150
+int bpf_program__fd(const struct bpf_program *prog)
20008151 {
20018152 return bpf_program__nth_fd(prog, 0);
8153
+}
8154
+
8155
+size_t bpf_program__size(const struct bpf_program *prog)
8156
+{
8157
+ return prog->insns_cnt * BPF_INSN_SZ;
20028158 }
20038159
20048160 int bpf_program__set_prep(struct bpf_program *prog, int nr_instances,
....@@ -2010,13 +8166,13 @@
20108166 return -EINVAL;
20118167
20128168 if (prog->instances.nr > 0 || prog->instances.fds) {
2013
- pr_warning("Can't set pre-processor after loading\n");
8169
+ pr_warn("Can't set pre-processor after loading\n");
20148170 return -EINVAL;
20158171 }
20168172
20178173 instances_fds = malloc(sizeof(int) * nr_instances);
20188174 if (!instances_fds) {
2019
- pr_warning("alloc memory failed for fds\n");
8175
+ pr_warn("alloc memory failed for fds\n");
20208176 return -ENOMEM;
20218177 }
20228178
....@@ -2029,7 +8185,7 @@
20298185 return 0;
20308186 }
20318187
2032
-int bpf_program__nth_fd(struct bpf_program *prog, int n)
8188
+int bpf_program__nth_fd(const struct bpf_program *prog, int n)
20338189 {
20348190 int fd;
20358191
....@@ -2037,19 +8193,24 @@
20378193 return -EINVAL;
20388194
20398195 if (n >= prog->instances.nr || n < 0) {
2040
- pr_warning("Can't get the %dth fd from program %s: only %d instances\n",
2041
- n, prog->section_name, prog->instances.nr);
8196
+ pr_warn("Can't get the %dth fd from program %s: only %d instances\n",
8197
+ n, prog->name, prog->instances.nr);
20428198 return -EINVAL;
20438199 }
20448200
20458201 fd = prog->instances.fds[n];
20468202 if (fd < 0) {
2047
- pr_warning("%dth instance of program '%s' is invalid\n",
2048
- n, prog->section_name);
8203
+ pr_warn("%dth instance of program '%s' is invalid\n",
8204
+ n, prog->name);
20498205 return -ENOENT;
20508206 }
20518207
20528208 return fd;
8209
+}
8210
+
8211
+enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog)
8212
+{
8213
+ return prog->type;
20538214 }
20548215
20558216 void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type)
....@@ -2057,27 +8218,28 @@
20578218 prog->type = type;
20588219 }
20598220
2060
-static bool bpf_program__is_type(struct bpf_program *prog,
8221
+static bool bpf_program__is_type(const struct bpf_program *prog,
20618222 enum bpf_prog_type type)
20628223 {
20638224 return prog ? (prog->type == type) : false;
20648225 }
20658226
2066
-#define BPF_PROG_TYPE_FNS(NAME, TYPE) \
2067
-int bpf_program__set_##NAME(struct bpf_program *prog) \
2068
-{ \
2069
- if (!prog) \
2070
- return -EINVAL; \
2071
- bpf_program__set_type(prog, TYPE); \
2072
- return 0; \
2073
-} \
2074
- \
2075
-bool bpf_program__is_##NAME(struct bpf_program *prog) \
2076
-{ \
2077
- return bpf_program__is_type(prog, TYPE); \
2078
-} \
8227
+#define BPF_PROG_TYPE_FNS(NAME, TYPE) \
8228
+int bpf_program__set_##NAME(struct bpf_program *prog) \
8229
+{ \
8230
+ if (!prog) \
8231
+ return -EINVAL; \
8232
+ bpf_program__set_type(prog, TYPE); \
8233
+ return 0; \
8234
+} \
8235
+ \
8236
+bool bpf_program__is_##NAME(const struct bpf_program *prog) \
8237
+{ \
8238
+ return bpf_program__is_type(prog, TYPE); \
8239
+} \
20798240
20808241 BPF_PROG_TYPE_FNS(socket_filter, BPF_PROG_TYPE_SOCKET_FILTER);
8242
+BPF_PROG_TYPE_FNS(lsm, BPF_PROG_TYPE_LSM);
20818243 BPF_PROG_TYPE_FNS(kprobe, BPF_PROG_TYPE_KPROBE);
20828244 BPF_PROG_TYPE_FNS(sched_cls, BPF_PROG_TYPE_SCHED_CLS);
20838245 BPF_PROG_TYPE_FNS(sched_act, BPF_PROG_TYPE_SCHED_ACT);
....@@ -2085,6 +8247,16 @@
20858247 BPF_PROG_TYPE_FNS(raw_tracepoint, BPF_PROG_TYPE_RAW_TRACEPOINT);
20868248 BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP);
20878249 BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT);
8250
+BPF_PROG_TYPE_FNS(tracing, BPF_PROG_TYPE_TRACING);
8251
+BPF_PROG_TYPE_FNS(struct_ops, BPF_PROG_TYPE_STRUCT_OPS);
8252
+BPF_PROG_TYPE_FNS(extension, BPF_PROG_TYPE_EXT);
8253
+BPF_PROG_TYPE_FNS(sk_lookup, BPF_PROG_TYPE_SK_LOOKUP);
8254
+
8255
+enum bpf_attach_type
8256
+bpf_program__get_expected_attach_type(struct bpf_program *prog)
8257
+{
8258
+ return prog->expected_attach_type;
8259
+}
20888260
20898261 void bpf_program__set_expected_attach_type(struct bpf_program *prog,
20908262 enum bpf_attach_type type)
....@@ -2092,98 +8264,636 @@
20928264 prog->expected_attach_type = type;
20938265 }
20948266
2095
-#define BPF_PROG_SEC_FULL(string, ptype, atype) \
2096
- { string, sizeof(string) - 1, ptype, atype }
8267
+#define BPF_PROG_SEC_IMPL(string, ptype, eatype, eatype_optional, \
8268
+ attachable, attach_btf) \
8269
+ { \
8270
+ .sec = string, \
8271
+ .len = sizeof(string) - 1, \
8272
+ .prog_type = ptype, \
8273
+ .expected_attach_type = eatype, \
8274
+ .is_exp_attach_type_optional = eatype_optional, \
8275
+ .is_attachable = attachable, \
8276
+ .is_attach_btf = attach_btf, \
8277
+ }
20978278
2098
-#define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_FULL(string, ptype, 0)
8279
+/* Programs that can NOT be attached. */
8280
+#define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_IMPL(string, ptype, 0, 0, 0, 0)
20998281
2100
-#define BPF_S_PROG_SEC(string, ptype) \
2101
- BPF_PROG_SEC_FULL(string, BPF_PROG_TYPE_CGROUP_SOCK, ptype)
8282
+/* Programs that can be attached. */
8283
+#define BPF_APROG_SEC(string, ptype, atype) \
8284
+ BPF_PROG_SEC_IMPL(string, ptype, atype, true, 1, 0)
21028285
2103
-#define BPF_SA_PROG_SEC(string, ptype) \
2104
- BPF_PROG_SEC_FULL(string, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, ptype)
8286
+/* Programs that must specify expected attach type at load time. */
8287
+#define BPF_EAPROG_SEC(string, ptype, eatype) \
8288
+ BPF_PROG_SEC_IMPL(string, ptype, eatype, false, 1, 0)
21058289
2106
-static const struct {
2107
- const char *sec;
2108
- size_t len;
2109
- enum bpf_prog_type prog_type;
2110
- enum bpf_attach_type expected_attach_type;
2111
-} section_names[] = {
2112
- BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER),
2113
- BPF_PROG_SEC("kprobe/", BPF_PROG_TYPE_KPROBE),
2114
- BPF_PROG_SEC("kretprobe/", BPF_PROG_TYPE_KPROBE),
2115
- BPF_PROG_SEC("classifier", BPF_PROG_TYPE_SCHED_CLS),
2116
- BPF_PROG_SEC("action", BPF_PROG_TYPE_SCHED_ACT),
2117
- BPF_PROG_SEC("tracepoint/", BPF_PROG_TYPE_TRACEPOINT),
2118
- BPF_PROG_SEC("raw_tracepoint/", BPF_PROG_TYPE_RAW_TRACEPOINT),
2119
- BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP),
2120
- BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT),
2121
- BPF_PROG_SEC("cgroup/skb", BPF_PROG_TYPE_CGROUP_SKB),
2122
- BPF_PROG_SEC("cgroup/sock", BPF_PROG_TYPE_CGROUP_SOCK),
2123
- BPF_PROG_SEC("cgroup/dev", BPF_PROG_TYPE_CGROUP_DEVICE),
2124
- BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN),
2125
- BPF_PROG_SEC("lwt_out", BPF_PROG_TYPE_LWT_OUT),
2126
- BPF_PROG_SEC("lwt_xmit", BPF_PROG_TYPE_LWT_XMIT),
2127
- BPF_PROG_SEC("lwt_seg6local", BPF_PROG_TYPE_LWT_SEG6LOCAL),
2128
- BPF_PROG_SEC("sockops", BPF_PROG_TYPE_SOCK_OPS),
2129
- BPF_PROG_SEC("sk_skb", BPF_PROG_TYPE_SK_SKB),
2130
- BPF_PROG_SEC("sk_msg", BPF_PROG_TYPE_SK_MSG),
2131
- BPF_PROG_SEC("lirc_mode2", BPF_PROG_TYPE_LIRC_MODE2),
2132
- BPF_SA_PROG_SEC("cgroup/bind4", BPF_CGROUP_INET4_BIND),
2133
- BPF_SA_PROG_SEC("cgroup/bind6", BPF_CGROUP_INET6_BIND),
2134
- BPF_SA_PROG_SEC("cgroup/connect4", BPF_CGROUP_INET4_CONNECT),
2135
- BPF_SA_PROG_SEC("cgroup/connect6", BPF_CGROUP_INET6_CONNECT),
2136
- BPF_SA_PROG_SEC("cgroup/sendmsg4", BPF_CGROUP_UDP4_SENDMSG),
2137
- BPF_SA_PROG_SEC("cgroup/sendmsg6", BPF_CGROUP_UDP6_SENDMSG),
2138
- BPF_S_PROG_SEC("cgroup/post_bind4", BPF_CGROUP_INET4_POST_BIND),
2139
- BPF_S_PROG_SEC("cgroup/post_bind6", BPF_CGROUP_INET6_POST_BIND),
8290
+/* Programs that use BTF to identify attach point */
8291
+#define BPF_PROG_BTF(string, ptype, eatype) \
8292
+ BPF_PROG_SEC_IMPL(string, ptype, eatype, false, 0, 1)
8293
+
8294
+/* Programs that can be attached but attach type can't be identified by section
8295
+ * name. Kept for backward compatibility.
8296
+ */
8297
+#define BPF_APROG_COMPAT(string, ptype) BPF_PROG_SEC(string, ptype)
8298
+
8299
+#define SEC_DEF(sec_pfx, ptype, ...) { \
8300
+ .sec = sec_pfx, \
8301
+ .len = sizeof(sec_pfx) - 1, \
8302
+ .prog_type = BPF_PROG_TYPE_##ptype, \
8303
+ __VA_ARGS__ \
8304
+}
8305
+
8306
+static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec,
8307
+ struct bpf_program *prog);
8308
+static struct bpf_link *attach_tp(const struct bpf_sec_def *sec,
8309
+ struct bpf_program *prog);
8310
+static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec,
8311
+ struct bpf_program *prog);
8312
+static struct bpf_link *attach_trace(const struct bpf_sec_def *sec,
8313
+ struct bpf_program *prog);
8314
+static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec,
8315
+ struct bpf_program *prog);
8316
+static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,
8317
+ struct bpf_program *prog);
8318
+
8319
+static const struct bpf_sec_def section_defs[] = {
8320
+ BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER),
8321
+ BPF_PROG_SEC("sk_reuseport", BPF_PROG_TYPE_SK_REUSEPORT),
8322
+ SEC_DEF("kprobe/", KPROBE,
8323
+ .attach_fn = attach_kprobe),
8324
+ BPF_PROG_SEC("uprobe/", BPF_PROG_TYPE_KPROBE),
8325
+ SEC_DEF("kretprobe/", KPROBE,
8326
+ .attach_fn = attach_kprobe),
8327
+ BPF_PROG_SEC("uretprobe/", BPF_PROG_TYPE_KPROBE),
8328
+ BPF_PROG_SEC("classifier", BPF_PROG_TYPE_SCHED_CLS),
8329
+ BPF_PROG_SEC("action", BPF_PROG_TYPE_SCHED_ACT),
8330
+ SEC_DEF("tracepoint/", TRACEPOINT,
8331
+ .attach_fn = attach_tp),
8332
+ SEC_DEF("tp/", TRACEPOINT,
8333
+ .attach_fn = attach_tp),
8334
+ SEC_DEF("raw_tracepoint/", RAW_TRACEPOINT,
8335
+ .attach_fn = attach_raw_tp),
8336
+ SEC_DEF("raw_tp/", RAW_TRACEPOINT,
8337
+ .attach_fn = attach_raw_tp),
8338
+ SEC_DEF("tp_btf/", TRACING,
8339
+ .expected_attach_type = BPF_TRACE_RAW_TP,
8340
+ .is_attach_btf = true,
8341
+ .attach_fn = attach_trace),
8342
+ SEC_DEF("fentry/", TRACING,
8343
+ .expected_attach_type = BPF_TRACE_FENTRY,
8344
+ .is_attach_btf = true,
8345
+ .attach_fn = attach_trace),
8346
+ SEC_DEF("fmod_ret/", TRACING,
8347
+ .expected_attach_type = BPF_MODIFY_RETURN,
8348
+ .is_attach_btf = true,
8349
+ .attach_fn = attach_trace),
8350
+ SEC_DEF("fexit/", TRACING,
8351
+ .expected_attach_type = BPF_TRACE_FEXIT,
8352
+ .is_attach_btf = true,
8353
+ .attach_fn = attach_trace),
8354
+ SEC_DEF("fentry.s/", TRACING,
8355
+ .expected_attach_type = BPF_TRACE_FENTRY,
8356
+ .is_attach_btf = true,
8357
+ .is_sleepable = true,
8358
+ .attach_fn = attach_trace),
8359
+ SEC_DEF("fmod_ret.s/", TRACING,
8360
+ .expected_attach_type = BPF_MODIFY_RETURN,
8361
+ .is_attach_btf = true,
8362
+ .is_sleepable = true,
8363
+ .attach_fn = attach_trace),
8364
+ SEC_DEF("fexit.s/", TRACING,
8365
+ .expected_attach_type = BPF_TRACE_FEXIT,
8366
+ .is_attach_btf = true,
8367
+ .is_sleepable = true,
8368
+ .attach_fn = attach_trace),
8369
+ SEC_DEF("freplace/", EXT,
8370
+ .is_attach_btf = true,
8371
+ .attach_fn = attach_trace),
8372
+ SEC_DEF("lsm/", LSM,
8373
+ .is_attach_btf = true,
8374
+ .expected_attach_type = BPF_LSM_MAC,
8375
+ .attach_fn = attach_lsm),
8376
+ SEC_DEF("lsm.s/", LSM,
8377
+ .is_attach_btf = true,
8378
+ .is_sleepable = true,
8379
+ .expected_attach_type = BPF_LSM_MAC,
8380
+ .attach_fn = attach_lsm),
8381
+ SEC_DEF("iter/", TRACING,
8382
+ .expected_attach_type = BPF_TRACE_ITER,
8383
+ .is_attach_btf = true,
8384
+ .attach_fn = attach_iter),
8385
+ BPF_EAPROG_SEC("xdp_devmap/", BPF_PROG_TYPE_XDP,
8386
+ BPF_XDP_DEVMAP),
8387
+ BPF_EAPROG_SEC("xdp_cpumap/", BPF_PROG_TYPE_XDP,
8388
+ BPF_XDP_CPUMAP),
8389
+ BPF_APROG_SEC("xdp", BPF_PROG_TYPE_XDP,
8390
+ BPF_XDP),
8391
+ BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT),
8392
+ BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN),
8393
+ BPF_PROG_SEC("lwt_out", BPF_PROG_TYPE_LWT_OUT),
8394
+ BPF_PROG_SEC("lwt_xmit", BPF_PROG_TYPE_LWT_XMIT),
8395
+ BPF_PROG_SEC("lwt_seg6local", BPF_PROG_TYPE_LWT_SEG6LOCAL),
8396
+ BPF_APROG_SEC("cgroup_skb/ingress", BPF_PROG_TYPE_CGROUP_SKB,
8397
+ BPF_CGROUP_INET_INGRESS),
8398
+ BPF_APROG_SEC("cgroup_skb/egress", BPF_PROG_TYPE_CGROUP_SKB,
8399
+ BPF_CGROUP_INET_EGRESS),
8400
+ BPF_APROG_COMPAT("cgroup/skb", BPF_PROG_TYPE_CGROUP_SKB),
8401
+ BPF_EAPROG_SEC("cgroup/sock_create", BPF_PROG_TYPE_CGROUP_SOCK,
8402
+ BPF_CGROUP_INET_SOCK_CREATE),
8403
+ BPF_EAPROG_SEC("cgroup/sock_release", BPF_PROG_TYPE_CGROUP_SOCK,
8404
+ BPF_CGROUP_INET_SOCK_RELEASE),
8405
+ BPF_APROG_SEC("cgroup/sock", BPF_PROG_TYPE_CGROUP_SOCK,
8406
+ BPF_CGROUP_INET_SOCK_CREATE),
8407
+ BPF_EAPROG_SEC("cgroup/post_bind4", BPF_PROG_TYPE_CGROUP_SOCK,
8408
+ BPF_CGROUP_INET4_POST_BIND),
8409
+ BPF_EAPROG_SEC("cgroup/post_bind6", BPF_PROG_TYPE_CGROUP_SOCK,
8410
+ BPF_CGROUP_INET6_POST_BIND),
8411
+ BPF_APROG_SEC("cgroup/dev", BPF_PROG_TYPE_CGROUP_DEVICE,
8412
+ BPF_CGROUP_DEVICE),
8413
+ BPF_APROG_SEC("sockops", BPF_PROG_TYPE_SOCK_OPS,
8414
+ BPF_CGROUP_SOCK_OPS),
8415
+ BPF_APROG_SEC("sk_skb/stream_parser", BPF_PROG_TYPE_SK_SKB,
8416
+ BPF_SK_SKB_STREAM_PARSER),
8417
+ BPF_APROG_SEC("sk_skb/stream_verdict", BPF_PROG_TYPE_SK_SKB,
8418
+ BPF_SK_SKB_STREAM_VERDICT),
8419
+ BPF_APROG_COMPAT("sk_skb", BPF_PROG_TYPE_SK_SKB),
8420
+ BPF_APROG_SEC("sk_msg", BPF_PROG_TYPE_SK_MSG,
8421
+ BPF_SK_MSG_VERDICT),
8422
+ BPF_APROG_SEC("lirc_mode2", BPF_PROG_TYPE_LIRC_MODE2,
8423
+ BPF_LIRC_MODE2),
8424
+ BPF_APROG_SEC("flow_dissector", BPF_PROG_TYPE_FLOW_DISSECTOR,
8425
+ BPF_FLOW_DISSECTOR),
8426
+ BPF_EAPROG_SEC("cgroup/bind4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
8427
+ BPF_CGROUP_INET4_BIND),
8428
+ BPF_EAPROG_SEC("cgroup/bind6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
8429
+ BPF_CGROUP_INET6_BIND),
8430
+ BPF_EAPROG_SEC("cgroup/connect4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
8431
+ BPF_CGROUP_INET4_CONNECT),
8432
+ BPF_EAPROG_SEC("cgroup/connect6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
8433
+ BPF_CGROUP_INET6_CONNECT),
8434
+ BPF_EAPROG_SEC("cgroup/sendmsg4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
8435
+ BPF_CGROUP_UDP4_SENDMSG),
8436
+ BPF_EAPROG_SEC("cgroup/sendmsg6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
8437
+ BPF_CGROUP_UDP6_SENDMSG),
8438
+ BPF_EAPROG_SEC("cgroup/recvmsg4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
8439
+ BPF_CGROUP_UDP4_RECVMSG),
8440
+ BPF_EAPROG_SEC("cgroup/recvmsg6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
8441
+ BPF_CGROUP_UDP6_RECVMSG),
8442
+ BPF_EAPROG_SEC("cgroup/getpeername4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
8443
+ BPF_CGROUP_INET4_GETPEERNAME),
8444
+ BPF_EAPROG_SEC("cgroup/getpeername6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
8445
+ BPF_CGROUP_INET6_GETPEERNAME),
8446
+ BPF_EAPROG_SEC("cgroup/getsockname4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
8447
+ BPF_CGROUP_INET4_GETSOCKNAME),
8448
+ BPF_EAPROG_SEC("cgroup/getsockname6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
8449
+ BPF_CGROUP_INET6_GETSOCKNAME),
8450
+ BPF_EAPROG_SEC("cgroup/sysctl", BPF_PROG_TYPE_CGROUP_SYSCTL,
8451
+ BPF_CGROUP_SYSCTL),
8452
+ BPF_EAPROG_SEC("cgroup/getsockopt", BPF_PROG_TYPE_CGROUP_SOCKOPT,
8453
+ BPF_CGROUP_GETSOCKOPT),
8454
+ BPF_EAPROG_SEC("cgroup/setsockopt", BPF_PROG_TYPE_CGROUP_SOCKOPT,
8455
+ BPF_CGROUP_SETSOCKOPT),
8456
+ BPF_PROG_SEC("struct_ops", BPF_PROG_TYPE_STRUCT_OPS),
8457
+ BPF_EAPROG_SEC("sk_lookup/", BPF_PROG_TYPE_SK_LOOKUP,
8458
+ BPF_SK_LOOKUP),
21408459 };
21418460
8461
+#undef BPF_PROG_SEC_IMPL
21428462 #undef BPF_PROG_SEC
2143
-#undef BPF_PROG_SEC_FULL
2144
-#undef BPF_S_PROG_SEC
2145
-#undef BPF_SA_PROG_SEC
8463
+#undef BPF_APROG_SEC
8464
+#undef BPF_EAPROG_SEC
8465
+#undef BPF_APROG_COMPAT
8466
+#undef SEC_DEF
8467
+
8468
+#define MAX_TYPE_NAME_SIZE 32
8469
+
8470
+static const struct bpf_sec_def *find_sec_def(const char *sec_name)
8471
+{
8472
+ int i, n = ARRAY_SIZE(section_defs);
8473
+
8474
+ for (i = 0; i < n; i++) {
8475
+ if (strncmp(sec_name,
8476
+ section_defs[i].sec, section_defs[i].len))
8477
+ continue;
8478
+ return &section_defs[i];
8479
+ }
8480
+ return NULL;
8481
+}
8482
+
8483
+static char *libbpf_get_type_names(bool attach_type)
8484
+{
8485
+ int i, len = ARRAY_SIZE(section_defs) * MAX_TYPE_NAME_SIZE;
8486
+ char *buf;
8487
+
8488
+ buf = malloc(len);
8489
+ if (!buf)
8490
+ return NULL;
8491
+
8492
+ buf[0] = '\0';
8493
+ /* Forge string buf with all available names */
8494
+ for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
8495
+ if (attach_type && !section_defs[i].is_attachable)
8496
+ continue;
8497
+
8498
+ if (strlen(buf) + strlen(section_defs[i].sec) + 2 > len) {
8499
+ free(buf);
8500
+ return NULL;
8501
+ }
8502
+ strcat(buf, " ");
8503
+ strcat(buf, section_defs[i].sec);
8504
+ }
8505
+
8506
+ return buf;
8507
+}
21468508
21478509 int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
21488510 enum bpf_attach_type *expected_attach_type)
21498511 {
8512
+ const struct bpf_sec_def *sec_def;
8513
+ char *type_names;
8514
+
8515
+ if (!name)
8516
+ return -EINVAL;
8517
+
8518
+ sec_def = find_sec_def(name);
8519
+ if (sec_def) {
8520
+ *prog_type = sec_def->prog_type;
8521
+ *expected_attach_type = sec_def->expected_attach_type;
8522
+ return 0;
8523
+ }
8524
+
8525
+ pr_debug("failed to guess program type from ELF section '%s'\n", name);
8526
+ type_names = libbpf_get_type_names(false);
8527
+ if (type_names != NULL) {
8528
+ pr_debug("supported section(type) names are:%s\n", type_names);
8529
+ free(type_names);
8530
+ }
8531
+
8532
+ return -ESRCH;
8533
+}
8534
+
8535
+static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj,
8536
+ size_t offset)
8537
+{
8538
+ struct bpf_map *map;
8539
+ size_t i;
8540
+
8541
+ for (i = 0; i < obj->nr_maps; i++) {
8542
+ map = &obj->maps[i];
8543
+ if (!bpf_map__is_struct_ops(map))
8544
+ continue;
8545
+ if (map->sec_offset <= offset &&
8546
+ offset - map->sec_offset < map->def.value_size)
8547
+ return map;
8548
+ }
8549
+
8550
+ return NULL;
8551
+}
8552
+
8553
+/* Collect the reloc from ELF and populate the st_ops->progs[] */
8554
+static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
8555
+ GElf_Shdr *shdr, Elf_Data *data)
8556
+{
8557
+ const struct btf_member *member;
8558
+ struct bpf_struct_ops *st_ops;
8559
+ struct bpf_program *prog;
8560
+ unsigned int shdr_idx;
8561
+ const struct btf *btf;
8562
+ struct bpf_map *map;
8563
+ Elf_Data *symbols;
8564
+ unsigned int moff, insn_idx;
8565
+ const char *name;
8566
+ __u32 member_idx;
8567
+ GElf_Sym sym;
8568
+ GElf_Rel rel;
8569
+ int i, nrels;
8570
+
8571
+ symbols = obj->efile.symbols;
8572
+ btf = obj->btf;
8573
+ nrels = shdr->sh_size / shdr->sh_entsize;
8574
+ for (i = 0; i < nrels; i++) {
8575
+ if (!gelf_getrel(data, i, &rel)) {
8576
+ pr_warn("struct_ops reloc: failed to get %d reloc\n", i);
8577
+ return -LIBBPF_ERRNO__FORMAT;
8578
+ }
8579
+
8580
+ if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) {
8581
+ pr_warn("struct_ops reloc: symbol %zx not found\n",
8582
+ (size_t)GELF_R_SYM(rel.r_info));
8583
+ return -LIBBPF_ERRNO__FORMAT;
8584
+ }
8585
+
8586
+ name = elf_sym_str(obj, sym.st_name) ?: "<?>";
8587
+ map = find_struct_ops_map_by_offset(obj, rel.r_offset);
8588
+ if (!map) {
8589
+ pr_warn("struct_ops reloc: cannot find map at rel.r_offset %zu\n",
8590
+ (size_t)rel.r_offset);
8591
+ return -EINVAL;
8592
+ }
8593
+
8594
+ moff = rel.r_offset - map->sec_offset;
8595
+ shdr_idx = sym.st_shndx;
8596
+ st_ops = map->st_ops;
8597
+ pr_debug("struct_ops reloc %s: for %lld value %lld shdr_idx %u rel.r_offset %zu map->sec_offset %zu name %d (\'%s\')\n",
8598
+ map->name,
8599
+ (long long)(rel.r_info >> 32),
8600
+ (long long)sym.st_value,
8601
+ shdr_idx, (size_t)rel.r_offset,
8602
+ map->sec_offset, sym.st_name, name);
8603
+
8604
+ if (shdr_idx >= SHN_LORESERVE) {
8605
+ pr_warn("struct_ops reloc %s: rel.r_offset %zu shdr_idx %u unsupported non-static function\n",
8606
+ map->name, (size_t)rel.r_offset, shdr_idx);
8607
+ return -LIBBPF_ERRNO__RELOC;
8608
+ }
8609
+ if (sym.st_value % BPF_INSN_SZ) {
8610
+ pr_warn("struct_ops reloc %s: invalid target program offset %llu\n",
8611
+ map->name, (unsigned long long)sym.st_value);
8612
+ return -LIBBPF_ERRNO__FORMAT;
8613
+ }
8614
+ insn_idx = sym.st_value / BPF_INSN_SZ;
8615
+
8616
+ member = find_member_by_offset(st_ops->type, moff * 8);
8617
+ if (!member) {
8618
+ pr_warn("struct_ops reloc %s: cannot find member at moff %u\n",
8619
+ map->name, moff);
8620
+ return -EINVAL;
8621
+ }
8622
+ member_idx = member - btf_members(st_ops->type);
8623
+ name = btf__name_by_offset(btf, member->name_off);
8624
+
8625
+ if (!resolve_func_ptr(btf, member->type, NULL)) {
8626
+ pr_warn("struct_ops reloc %s: cannot relocate non func ptr %s\n",
8627
+ map->name, name);
8628
+ return -EINVAL;
8629
+ }
8630
+
8631
+ prog = find_prog_by_sec_insn(obj, shdr_idx, insn_idx);
8632
+ if (!prog) {
8633
+ pr_warn("struct_ops reloc %s: cannot find prog at shdr_idx %u to relocate func ptr %s\n",
8634
+ map->name, shdr_idx, name);
8635
+ return -EINVAL;
8636
+ }
8637
+
8638
+ if (prog->type == BPF_PROG_TYPE_UNSPEC) {
8639
+ const struct bpf_sec_def *sec_def;
8640
+
8641
+ sec_def = find_sec_def(prog->sec_name);
8642
+ if (sec_def &&
8643
+ sec_def->prog_type != BPF_PROG_TYPE_STRUCT_OPS) {
8644
+ /* for pr_warn */
8645
+ prog->type = sec_def->prog_type;
8646
+ goto invalid_prog;
8647
+ }
8648
+
8649
+ prog->type = BPF_PROG_TYPE_STRUCT_OPS;
8650
+ prog->attach_btf_id = st_ops->type_id;
8651
+ prog->expected_attach_type = member_idx;
8652
+ } else if (prog->type != BPF_PROG_TYPE_STRUCT_OPS ||
8653
+ prog->attach_btf_id != st_ops->type_id ||
8654
+ prog->expected_attach_type != member_idx) {
8655
+ goto invalid_prog;
8656
+ }
8657
+ st_ops->progs[member_idx] = prog;
8658
+ }
8659
+
8660
+ return 0;
8661
+
8662
+invalid_prog:
8663
+ pr_warn("struct_ops reloc %s: cannot use prog %s in sec %s with type %u attach_btf_id %u expected_attach_type %u for func ptr %s\n",
8664
+ map->name, prog->name, prog->sec_name, prog->type,
8665
+ prog->attach_btf_id, prog->expected_attach_type, name);
8666
+ return -EINVAL;
8667
+}
8668
+
8669
+#define BTF_TRACE_PREFIX "btf_trace_"
8670
+#define BTF_LSM_PREFIX "bpf_lsm_"
8671
+#define BTF_ITER_PREFIX "bpf_iter_"
8672
+#define BTF_MAX_NAME_SIZE 128
8673
+
8674
+static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
8675
+ const char *name, __u32 kind)
8676
+{
8677
+ char btf_type_name[BTF_MAX_NAME_SIZE];
8678
+ int ret;
8679
+
8680
+ ret = snprintf(btf_type_name, sizeof(btf_type_name),
8681
+ "%s%s", prefix, name);
8682
+ /* snprintf returns the number of characters written excluding the
8683
+ * the terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it
8684
+ * indicates truncation.
8685
+ */
8686
+ if (ret < 0 || ret >= sizeof(btf_type_name))
8687
+ return -ENAMETOOLONG;
8688
+ return btf__find_by_name_kind(btf, btf_type_name, kind);
8689
+}
8690
+
8691
+static inline int __find_vmlinux_btf_id(struct btf *btf, const char *name,
8692
+ enum bpf_attach_type attach_type)
8693
+{
8694
+ int err;
8695
+
8696
+ if (attach_type == BPF_TRACE_RAW_TP)
8697
+ err = find_btf_by_prefix_kind(btf, BTF_TRACE_PREFIX, name,
8698
+ BTF_KIND_TYPEDEF);
8699
+ else if (attach_type == BPF_LSM_MAC)
8700
+ err = find_btf_by_prefix_kind(btf, BTF_LSM_PREFIX, name,
8701
+ BTF_KIND_FUNC);
8702
+ else if (attach_type == BPF_TRACE_ITER)
8703
+ err = find_btf_by_prefix_kind(btf, BTF_ITER_PREFIX, name,
8704
+ BTF_KIND_FUNC);
8705
+ else
8706
+ err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
8707
+
8708
+ if (err <= 0)
8709
+ pr_warn("%s is not found in vmlinux BTF\n", name);
8710
+
8711
+ return err;
8712
+}
8713
+
8714
+int libbpf_find_vmlinux_btf_id(const char *name,
8715
+ enum bpf_attach_type attach_type)
8716
+{
8717
+ struct btf *btf;
8718
+ int err;
8719
+
8720
+ btf = libbpf_find_kernel_btf();
8721
+ if (IS_ERR(btf)) {
8722
+ pr_warn("vmlinux BTF is not found\n");
8723
+ return -EINVAL;
8724
+ }
8725
+
8726
+ err = __find_vmlinux_btf_id(btf, name, attach_type);
8727
+ btf__free(btf);
8728
+ return err;
8729
+}
8730
+
8731
+static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
8732
+{
8733
+ struct bpf_prog_info_linear *info_linear;
8734
+ struct bpf_prog_info *info;
8735
+ struct btf *btf = NULL;
8736
+ int err = -EINVAL;
8737
+
8738
+ info_linear = bpf_program__get_prog_info_linear(attach_prog_fd, 0);
8739
+ if (IS_ERR_OR_NULL(info_linear)) {
8740
+ pr_warn("failed get_prog_info_linear for FD %d\n",
8741
+ attach_prog_fd);
8742
+ return -EINVAL;
8743
+ }
8744
+ info = &info_linear->info;
8745
+ if (!info->btf_id) {
8746
+ pr_warn("The target program doesn't have BTF\n");
8747
+ goto out;
8748
+ }
8749
+ if (btf__get_from_id(info->btf_id, &btf)) {
8750
+ pr_warn("Failed to get BTF of the program\n");
8751
+ goto out;
8752
+ }
8753
+ err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
8754
+ btf__free(btf);
8755
+ if (err <= 0) {
8756
+ pr_warn("%s is not found in prog's BTF\n", name);
8757
+ goto out;
8758
+ }
8759
+out:
8760
+ free(info_linear);
8761
+ return err;
8762
+}
8763
+
8764
+static int libbpf_find_attach_btf_id(struct bpf_program *prog)
8765
+{
8766
+ enum bpf_attach_type attach_type = prog->expected_attach_type;
8767
+ __u32 attach_prog_fd = prog->attach_prog_fd;
8768
+ const char *name = prog->sec_name;
8769
+ int i, err;
8770
+
8771
+ if (!name)
8772
+ return -EINVAL;
8773
+
8774
+ for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
8775
+ if (!section_defs[i].is_attach_btf)
8776
+ continue;
8777
+ if (strncmp(name, section_defs[i].sec, section_defs[i].len))
8778
+ continue;
8779
+ if (attach_prog_fd)
8780
+ err = libbpf_find_prog_btf_id(name + section_defs[i].len,
8781
+ attach_prog_fd);
8782
+ else
8783
+ err = __find_vmlinux_btf_id(prog->obj->btf_vmlinux,
8784
+ name + section_defs[i].len,
8785
+ attach_type);
8786
+ return err;
8787
+ }
8788
+ pr_warn("failed to identify btf_id based on ELF section name '%s'\n", name);
8789
+ return -ESRCH;
8790
+}
8791
+
8792
+int libbpf_attach_type_by_name(const char *name,
8793
+ enum bpf_attach_type *attach_type)
8794
+{
8795
+ char *type_names;
21508796 int i;
21518797
21528798 if (!name)
21538799 return -EINVAL;
21548800
2155
- for (i = 0; i < ARRAY_SIZE(section_names); i++) {
2156
- if (strncmp(name, section_names[i].sec, section_names[i].len))
8801
+ for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
8802
+ if (strncmp(name, section_defs[i].sec, section_defs[i].len))
21578803 continue;
2158
- *prog_type = section_names[i].prog_type;
2159
- *expected_attach_type = section_names[i].expected_attach_type;
8804
+ if (!section_defs[i].is_attachable)
8805
+ return -EINVAL;
8806
+ *attach_type = section_defs[i].expected_attach_type;
21608807 return 0;
21618808 }
8809
+ pr_debug("failed to guess attach type based on ELF section name '%s'\n", name);
8810
+ type_names = libbpf_get_type_names(true);
8811
+ if (type_names != NULL) {
8812
+ pr_debug("attachable section(type) names are:%s\n", type_names);
8813
+ free(type_names);
8814
+ }
8815
+
21628816 return -EINVAL;
21638817 }
21648818
2165
-static int
2166
-bpf_program__identify_section(struct bpf_program *prog,
2167
- enum bpf_prog_type *prog_type,
2168
- enum bpf_attach_type *expected_attach_type)
2169
-{
2170
- return libbpf_prog_type_by_name(prog->section_name, prog_type,
2171
- expected_attach_type);
2172
-}
2173
-
2174
-int bpf_map__fd(struct bpf_map *map)
8819
+int bpf_map__fd(const struct bpf_map *map)
21758820 {
21768821 return map ? map->fd : -EINVAL;
21778822 }
21788823
2179
-const struct bpf_map_def *bpf_map__def(struct bpf_map *map)
8824
+const struct bpf_map_def *bpf_map__def(const struct bpf_map *map)
21808825 {
21818826 return map ? &map->def : ERR_PTR(-EINVAL);
21828827 }
21838828
2184
-const char *bpf_map__name(struct bpf_map *map)
8829
+const char *bpf_map__name(const struct bpf_map *map)
21858830 {
21868831 return map ? map->name : NULL;
8832
+}
8833
+
8834
+enum bpf_map_type bpf_map__type(const struct bpf_map *map)
8835
+{
8836
+ return map->def.type;
8837
+}
8838
+
8839
+int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type)
8840
+{
8841
+ if (map->fd >= 0)
8842
+ return -EBUSY;
8843
+ map->def.type = type;
8844
+ return 0;
8845
+}
8846
+
8847
+__u32 bpf_map__map_flags(const struct bpf_map *map)
8848
+{
8849
+ return map->def.map_flags;
8850
+}
8851
+
8852
+int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags)
8853
+{
8854
+ if (map->fd >= 0)
8855
+ return -EBUSY;
8856
+ map->def.map_flags = flags;
8857
+ return 0;
8858
+}
8859
+
8860
+__u32 bpf_map__numa_node(const struct bpf_map *map)
8861
+{
8862
+ return map->numa_node;
8863
+}
8864
+
8865
+int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node)
8866
+{
8867
+ if (map->fd >= 0)
8868
+ return -EBUSY;
8869
+ map->numa_node = numa_node;
8870
+ return 0;
8871
+}
8872
+
8873
+__u32 bpf_map__key_size(const struct bpf_map *map)
8874
+{
8875
+ return map->def.key_size;
8876
+}
8877
+
8878
+int bpf_map__set_key_size(struct bpf_map *map, __u32 size)
8879
+{
8880
+ if (map->fd >= 0)
8881
+ return -EBUSY;
8882
+ map->def.key_size = size;
8883
+ return 0;
8884
+}
8885
+
8886
+__u32 bpf_map__value_size(const struct bpf_map *map)
8887
+{
8888
+ return map->def.value_size;
8889
+}
8890
+
8891
+int bpf_map__set_value_size(struct bpf_map *map, __u32 size)
8892
+{
8893
+ if (map->fd >= 0)
8894
+ return -EBUSY;
8895
+ map->def.value_size = size;
8896
+ return 0;
21878897 }
21888898
21898899 __u32 bpf_map__btf_key_type_id(const struct bpf_map *map)
....@@ -2212,25 +8922,63 @@
22128922 return 0;
22138923 }
22148924
2215
-void *bpf_map__priv(struct bpf_map *map)
8925
+void *bpf_map__priv(const struct bpf_map *map)
22168926 {
22178927 return map ? map->priv : ERR_PTR(-EINVAL);
22188928 }
22198929
2220
-bool bpf_map__is_offload_neutral(struct bpf_map *map)
8930
+int bpf_map__set_initial_value(struct bpf_map *map,
8931
+ const void *data, size_t size)
8932
+{
8933
+ if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG ||
8934
+ size != map->def.value_size || map->fd >= 0)
8935
+ return -EINVAL;
8936
+
8937
+ memcpy(map->mmaped, data, size);
8938
+ return 0;
8939
+}
8940
+
8941
+bool bpf_map__is_offload_neutral(const struct bpf_map *map)
22218942 {
22228943 return map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY;
22238944 }
22248945
2225
-void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
8946
+bool bpf_map__is_internal(const struct bpf_map *map)
22268947 {
2227
- map->map_ifindex = ifindex;
8948
+ return map->libbpf_type != LIBBPF_MAP_UNSPEC;
22288949 }
22298950
2230
-struct bpf_map *
2231
-bpf_map__next(struct bpf_map *prev, struct bpf_object *obj)
8951
+__u32 bpf_map__ifindex(const struct bpf_map *map)
22328952 {
2233
- size_t idx;
8953
+ return map->map_ifindex;
8954
+}
8955
+
8956
+int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
8957
+{
8958
+ if (map->fd >= 0)
8959
+ return -EBUSY;
8960
+ map->map_ifindex = ifindex;
8961
+ return 0;
8962
+}
8963
+
8964
+int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
8965
+{
8966
+ if (!bpf_map_type__is_map_in_map(map->def.type)) {
8967
+ pr_warn("error: unsupported map type\n");
8968
+ return -EINVAL;
8969
+ }
8970
+ if (map->inner_map_fd != -1) {
8971
+ pr_warn("error: inner_map_fd already specified\n");
8972
+ return -EINVAL;
8973
+ }
8974
+ map->inner_map_fd = fd;
8975
+ return 0;
8976
+}
8977
+
8978
+static struct bpf_map *
8979
+__bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
8980
+{
8981
+ ssize_t idx;
22348982 struct bpf_map *s, *e;
22358983
22368984 if (!obj || !obj->maps)
....@@ -2239,50 +8987,66 @@
22398987 s = obj->maps;
22408988 e = obj->maps + obj->nr_maps;
22418989
2242
- if (prev == NULL)
2243
- return s;
2244
-
2245
- if ((prev < s) || (prev >= e)) {
2246
- pr_warning("error in %s: map handler doesn't belong to object\n",
2247
- __func__);
8990
+ if ((m < s) || (m >= e)) {
8991
+ pr_warn("error in %s: map handler doesn't belong to object\n",
8992
+ __func__);
22488993 return NULL;
22498994 }
22508995
2251
- idx = (prev - obj->maps) + 1;
2252
- if (idx >= obj->nr_maps)
8996
+ idx = (m - obj->maps) + i;
8997
+ if (idx >= obj->nr_maps || idx < 0)
22538998 return NULL;
22548999 return &obj->maps[idx];
22559000 }
22569001
22579002 struct bpf_map *
2258
-bpf_object__find_map_by_name(struct bpf_object *obj, const char *name)
9003
+bpf_map__next(const struct bpf_map *prev, const struct bpf_object *obj)
9004
+{
9005
+ if (prev == NULL)
9006
+ return obj->maps;
9007
+
9008
+ return __bpf_map__iter(prev, obj, 1);
9009
+}
9010
+
9011
+struct bpf_map *
9012
+bpf_map__prev(const struct bpf_map *next, const struct bpf_object *obj)
9013
+{
9014
+ if (next == NULL) {
9015
+ if (!obj->nr_maps)
9016
+ return NULL;
9017
+ return obj->maps + obj->nr_maps - 1;
9018
+ }
9019
+
9020
+ return __bpf_map__iter(next, obj, -1);
9021
+}
9022
+
9023
+struct bpf_map *
9024
+bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name)
22599025 {
22609026 struct bpf_map *pos;
22619027
2262
- bpf_map__for_each(pos, obj) {
9028
+ bpf_object__for_each_map(pos, obj) {
22639029 if (pos->name && !strcmp(pos->name, name))
22649030 return pos;
22659031 }
22669032 return NULL;
22679033 }
22689034
9035
+int
9036
+bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name)
9037
+{
9038
+ return bpf_map__fd(bpf_object__find_map_by_name(obj, name));
9039
+}
9040
+
22699041 struct bpf_map *
22709042 bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset)
22719043 {
2272
- int i;
2273
-
2274
- for (i = 0; i < obj->nr_maps; i++) {
2275
- if (obj->maps[i].offset == offset)
2276
- return &obj->maps[i];
2277
- }
2278
- return ERR_PTR(-ENOENT);
9044
+ return ERR_PTR(-ENOTSUP);
22799045 }
22809046
22819047 long libbpf_get_error(const void *ptr)
22829048 {
2283
- if (IS_ERR(ptr))
2284
- return PTR_ERR(ptr);
2285
- return 0;
9049
+ return PTR_ERR_OR_ZERO(ptr);
22869050 }
22879051
22889052 int bpf_prog_load(const char *file, enum bpf_prog_type type,
....@@ -2303,8 +9067,6 @@
23039067 {
23049068 struct bpf_object_open_attr open_attr = {};
23059069 struct bpf_program *prog, *first_prog = NULL;
2306
- enum bpf_attach_type expected_attach_type;
2307
- enum bpf_prog_type prog_type;
23089070 struct bpf_object *obj;
23099071 struct bpf_map *map;
23109072 int err;
....@@ -2322,39 +9084,40 @@
23229084 return -ENOENT;
23239085
23249086 bpf_object__for_each_program(prog, obj) {
9087
+ enum bpf_attach_type attach_type = attr->expected_attach_type;
23259088 /*
2326
- * If type is not specified, try to guess it based on
2327
- * section name.
9089
+ * to preserve backwards compatibility, bpf_prog_load treats
9090
+ * attr->prog_type, if specified, as an override to whatever
9091
+ * bpf_object__open guessed
23289092 */
2329
- prog_type = attr->prog_type;
2330
- prog->prog_ifindex = attr->ifindex;
2331
- expected_attach_type = attr->expected_attach_type;
2332
- if (prog_type == BPF_PROG_TYPE_UNSPEC) {
2333
- err = bpf_program__identify_section(prog, &prog_type,
2334
- &expected_attach_type);
2335
- if (err < 0) {
2336
- pr_warning("failed to guess program type based on section name %s\n",
2337
- prog->section_name);
2338
- bpf_object__close(obj);
2339
- return -EINVAL;
2340
- }
9093
+ if (attr->prog_type != BPF_PROG_TYPE_UNSPEC) {
9094
+ bpf_program__set_type(prog, attr->prog_type);
9095
+ bpf_program__set_expected_attach_type(prog,
9096
+ attach_type);
9097
+ }
9098
+ if (bpf_program__get_type(prog) == BPF_PROG_TYPE_UNSPEC) {
9099
+ /*
9100
+ * we haven't guessed from section name and user
9101
+ * didn't provide a fallback type, too bad...
9102
+ */
9103
+ bpf_object__close(obj);
9104
+ return -EINVAL;
23419105 }
23429106
2343
- bpf_program__set_type(prog, prog_type);
2344
- bpf_program__set_expected_attach_type(prog,
2345
- expected_attach_type);
2346
-
2347
- if (!bpf_program__is_function_storage(prog, obj) && !first_prog)
9107
+ prog->prog_ifindex = attr->ifindex;
9108
+ prog->log_level = attr->log_level;
9109
+ prog->prog_flags |= attr->prog_flags;
9110
+ if (!first_prog)
23489111 first_prog = prog;
23499112 }
23509113
2351
- bpf_map__for_each(map, obj) {
9114
+ bpf_object__for_each_map(map, obj) {
23529115 if (!bpf_map__is_offload_neutral(map))
23539116 map->map_ifindex = attr->ifindex;
23549117 }
23559118
23569119 if (!first_prog) {
2357
- pr_warning("object file doesn't contain bpf program\n");
9120
+ pr_warn("object file doesn't contain bpf program\n");
23589121 bpf_object__close(obj);
23599122 return -ENOENT;
23609123 }
....@@ -2362,7 +9125,7 @@
23629125 err = bpf_object__load(obj);
23639126 if (err) {
23649127 bpf_object__close(obj);
2365
- return -EINVAL;
9128
+ return err;
23669129 }
23679130
23689131 *pobj = obj;
....@@ -2370,62 +9133,1817 @@
23709133 return 0;
23719134 }
23729135
2373
-enum bpf_perf_event_ret
2374
-bpf_perf_event_read_simple(void *mem, unsigned long size,
2375
- unsigned long page_size, void **buf, size_t *buf_len,
2376
- bpf_perf_event_print_t fn, void *priv)
9136
+struct bpf_link {
9137
+ int (*detach)(struct bpf_link *link);
9138
+ int (*destroy)(struct bpf_link *link);
9139
+ char *pin_path; /* NULL, if not pinned */
9140
+ int fd; /* hook FD, -1 if not applicable */
9141
+ bool disconnected;
9142
+};
9143
+
9144
+/* Replace link's underlying BPF program with the new one */
9145
+int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog)
23779146 {
2378
- volatile struct perf_event_mmap_page *header = mem;
9147
+ return bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL);
9148
+}
9149
+
9150
+/* Release "ownership" of underlying BPF resource (typically, BPF program
9151
+ * attached to some BPF hook, e.g., tracepoint, kprobe, etc). Disconnected
9152
+ * link, when destructed through bpf_link__destroy() call won't attempt to
9153
+ * detach/unregisted that BPF resource. This is useful in situations where,
9154
+ * say, attached BPF program has to outlive userspace program that attached it
9155
+ * in the system. Depending on type of BPF program, though, there might be
9156
+ * additional steps (like pinning BPF program in BPF FS) necessary to ensure
9157
+ * exit of userspace program doesn't trigger automatic detachment and clean up
9158
+ * inside the kernel.
9159
+ */
9160
+void bpf_link__disconnect(struct bpf_link *link)
9161
+{
9162
+ link->disconnected = true;
9163
+}
9164
+
9165
+int bpf_link__destroy(struct bpf_link *link)
9166
+{
9167
+ int err = 0;
9168
+
9169
+ if (IS_ERR_OR_NULL(link))
9170
+ return 0;
9171
+
9172
+ if (!link->disconnected && link->detach)
9173
+ err = link->detach(link);
9174
+ if (link->destroy)
9175
+ link->destroy(link);
9176
+ if (link->pin_path)
9177
+ free(link->pin_path);
9178
+ free(link);
9179
+
9180
+ return err;
9181
+}
9182
+
9183
+int bpf_link__fd(const struct bpf_link *link)
9184
+{
9185
+ return link->fd;
9186
+}
9187
+
9188
+const char *bpf_link__pin_path(const struct bpf_link *link)
9189
+{
9190
+ return link->pin_path;
9191
+}
9192
+
9193
+static int bpf_link__detach_fd(struct bpf_link *link)
9194
+{
9195
+ return close(link->fd);
9196
+}
9197
+
9198
+struct bpf_link *bpf_link__open(const char *path)
9199
+{
9200
+ struct bpf_link *link;
9201
+ int fd;
9202
+
9203
+ fd = bpf_obj_get(path);
9204
+ if (fd < 0) {
9205
+ fd = -errno;
9206
+ pr_warn("failed to open link at %s: %d\n", path, fd);
9207
+ return ERR_PTR(fd);
9208
+ }
9209
+
9210
+ link = calloc(1, sizeof(*link));
9211
+ if (!link) {
9212
+ close(fd);
9213
+ return ERR_PTR(-ENOMEM);
9214
+ }
9215
+ link->detach = &bpf_link__detach_fd;
9216
+ link->fd = fd;
9217
+
9218
+ link->pin_path = strdup(path);
9219
+ if (!link->pin_path) {
9220
+ bpf_link__destroy(link);
9221
+ return ERR_PTR(-ENOMEM);
9222
+ }
9223
+
9224
+ return link;
9225
+}
9226
+
9227
+int bpf_link__detach(struct bpf_link *link)
9228
+{
9229
+ return bpf_link_detach(link->fd) ? -errno : 0;
9230
+}
9231
+
9232
+int bpf_link__pin(struct bpf_link *link, const char *path)
9233
+{
9234
+ int err;
9235
+
9236
+ if (link->pin_path)
9237
+ return -EBUSY;
9238
+ err = make_parent_dir(path);
9239
+ if (err)
9240
+ return err;
9241
+ err = check_path(path);
9242
+ if (err)
9243
+ return err;
9244
+
9245
+ link->pin_path = strdup(path);
9246
+ if (!link->pin_path)
9247
+ return -ENOMEM;
9248
+
9249
+ if (bpf_obj_pin(link->fd, link->pin_path)) {
9250
+ err = -errno;
9251
+ zfree(&link->pin_path);
9252
+ return err;
9253
+ }
9254
+
9255
+ pr_debug("link fd=%d: pinned at %s\n", link->fd, link->pin_path);
9256
+ return 0;
9257
+}
9258
+
9259
+int bpf_link__unpin(struct bpf_link *link)
9260
+{
9261
+ int err;
9262
+
9263
+ if (!link->pin_path)
9264
+ return -EINVAL;
9265
+
9266
+ err = unlink(link->pin_path);
9267
+ if (err != 0)
9268
+ return -errno;
9269
+
9270
+ pr_debug("link fd=%d: unpinned from %s\n", link->fd, link->pin_path);
9271
+ zfree(&link->pin_path);
9272
+ return 0;
9273
+}
9274
+
9275
+static int bpf_link__detach_perf_event(struct bpf_link *link)
9276
+{
9277
+ int err;
9278
+
9279
+ err = ioctl(link->fd, PERF_EVENT_IOC_DISABLE, 0);
9280
+ if (err)
9281
+ err = -errno;
9282
+
9283
+ close(link->fd);
9284
+ return err;
9285
+}
9286
+
9287
+struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
9288
+ int pfd)
9289
+{
9290
+ char errmsg[STRERR_BUFSIZE];
9291
+ struct bpf_link *link;
9292
+ int prog_fd, err;
9293
+
9294
+ if (pfd < 0) {
9295
+ pr_warn("prog '%s': invalid perf event FD %d\n",
9296
+ prog->name, pfd);
9297
+ return ERR_PTR(-EINVAL);
9298
+ }
9299
+ prog_fd = bpf_program__fd(prog);
9300
+ if (prog_fd < 0) {
9301
+ pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n",
9302
+ prog->name);
9303
+ return ERR_PTR(-EINVAL);
9304
+ }
9305
+
9306
+ link = calloc(1, sizeof(*link));
9307
+ if (!link)
9308
+ return ERR_PTR(-ENOMEM);
9309
+ link->detach = &bpf_link__detach_perf_event;
9310
+ link->fd = pfd;
9311
+
9312
+ if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
9313
+ err = -errno;
9314
+ free(link);
9315
+ pr_warn("prog '%s': failed to attach to pfd %d: %s\n",
9316
+ prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
9317
+ if (err == -EPROTO)
9318
+ pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n",
9319
+ prog->name, pfd);
9320
+ return ERR_PTR(err);
9321
+ }
9322
+ if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
9323
+ err = -errno;
9324
+ free(link);
9325
+ pr_warn("prog '%s': failed to enable pfd %d: %s\n",
9326
+ prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
9327
+ return ERR_PTR(err);
9328
+ }
9329
+ return link;
9330
+}
9331
+
9332
+/*
9333
+ * this function is expected to parse integer in the range of [0, 2^31-1] from
9334
+ * given file using scanf format string fmt. If actual parsed value is
9335
+ * negative, the result might be indistinguishable from error
9336
+ */
9337
+static int parse_uint_from_file(const char *file, const char *fmt)
9338
+{
9339
+ char buf[STRERR_BUFSIZE];
9340
+ int err, ret;
9341
+ FILE *f;
9342
+
9343
+ f = fopen(file, "r");
9344
+ if (!f) {
9345
+ err = -errno;
9346
+ pr_debug("failed to open '%s': %s\n", file,
9347
+ libbpf_strerror_r(err, buf, sizeof(buf)));
9348
+ return err;
9349
+ }
9350
+ err = fscanf(f, fmt, &ret);
9351
+ if (err != 1) {
9352
+ err = err == EOF ? -EIO : -errno;
9353
+ pr_debug("failed to parse '%s': %s\n", file,
9354
+ libbpf_strerror_r(err, buf, sizeof(buf)));
9355
+ fclose(f);
9356
+ return err;
9357
+ }
9358
+ fclose(f);
9359
+ return ret;
9360
+}
9361
+
9362
+static int determine_kprobe_perf_type(void)
9363
+{
9364
+ const char *file = "/sys/bus/event_source/devices/kprobe/type";
9365
+
9366
+ return parse_uint_from_file(file, "%d\n");
9367
+}
9368
+
9369
+static int determine_uprobe_perf_type(void)
9370
+{
9371
+ const char *file = "/sys/bus/event_source/devices/uprobe/type";
9372
+
9373
+ return parse_uint_from_file(file, "%d\n");
9374
+}
9375
+
9376
+static int determine_kprobe_retprobe_bit(void)
9377
+{
9378
+ const char *file = "/sys/bus/event_source/devices/kprobe/format/retprobe";
9379
+
9380
+ return parse_uint_from_file(file, "config:%d\n");
9381
+}
9382
+
9383
+static int determine_uprobe_retprobe_bit(void)
9384
+{
9385
+ const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe";
9386
+
9387
+ return parse_uint_from_file(file, "config:%d\n");
9388
+}
9389
+
9390
+static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
9391
+ uint64_t offset, int pid)
9392
+{
9393
+ struct perf_event_attr attr = {};
9394
+ char errmsg[STRERR_BUFSIZE];
9395
+ int type, pfd, err;
9396
+
9397
+ type = uprobe ? determine_uprobe_perf_type()
9398
+ : determine_kprobe_perf_type();
9399
+ if (type < 0) {
9400
+ pr_warn("failed to determine %s perf type: %s\n",
9401
+ uprobe ? "uprobe" : "kprobe",
9402
+ libbpf_strerror_r(type, errmsg, sizeof(errmsg)));
9403
+ return type;
9404
+ }
9405
+ if (retprobe) {
9406
+ int bit = uprobe ? determine_uprobe_retprobe_bit()
9407
+ : determine_kprobe_retprobe_bit();
9408
+
9409
+ if (bit < 0) {
9410
+ pr_warn("failed to determine %s retprobe bit: %s\n",
9411
+ uprobe ? "uprobe" : "kprobe",
9412
+ libbpf_strerror_r(bit, errmsg, sizeof(errmsg)));
9413
+ return bit;
9414
+ }
9415
+ attr.config |= 1 << bit;
9416
+ }
9417
+ attr.size = sizeof(attr);
9418
+ attr.type = type;
9419
+ attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */
9420
+ attr.config2 = offset; /* kprobe_addr or probe_offset */
9421
+
9422
+ /* pid filter is meaningful only for uprobes */
9423
+ pfd = syscall(__NR_perf_event_open, &attr,
9424
+ pid < 0 ? -1 : pid /* pid */,
9425
+ pid == -1 ? 0 : -1 /* cpu */,
9426
+ -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
9427
+ if (pfd < 0) {
9428
+ err = -errno;
9429
+ pr_warn("%s perf_event_open() failed: %s\n",
9430
+ uprobe ? "uprobe" : "kprobe",
9431
+ libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
9432
+ return err;
9433
+ }
9434
+ return pfd;
9435
+}
9436
+
9437
+struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
9438
+ bool retprobe,
9439
+ const char *func_name)
9440
+{
9441
+ char errmsg[STRERR_BUFSIZE];
9442
+ struct bpf_link *link;
9443
+ int pfd, err;
9444
+
9445
+ pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name,
9446
+ 0 /* offset */, -1 /* pid */);
9447
+ if (pfd < 0) {
9448
+ pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n",
9449
+ prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
9450
+ libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
9451
+ return ERR_PTR(pfd);
9452
+ }
9453
+ link = bpf_program__attach_perf_event(prog, pfd);
9454
+ if (IS_ERR(link)) {
9455
+ close(pfd);
9456
+ err = PTR_ERR(link);
9457
+ pr_warn("prog '%s': failed to attach to %s '%s': %s\n",
9458
+ prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
9459
+ libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
9460
+ return link;
9461
+ }
9462
+ return link;
9463
+}
9464
+
9465
+static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec,
9466
+ struct bpf_program *prog)
9467
+{
9468
+ const char *func_name;
9469
+ bool retprobe;
9470
+
9471
+ func_name = prog->sec_name + sec->len;
9472
+ retprobe = strcmp(sec->sec, "kretprobe/") == 0;
9473
+
9474
+ return bpf_program__attach_kprobe(prog, retprobe, func_name);
9475
+}
9476
+
9477
+struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,
9478
+ bool retprobe, pid_t pid,
9479
+ const char *binary_path,
9480
+ size_t func_offset)
9481
+{
9482
+ char errmsg[STRERR_BUFSIZE];
9483
+ struct bpf_link *link;
9484
+ int pfd, err;
9485
+
9486
+ pfd = perf_event_open_probe(true /* uprobe */, retprobe,
9487
+ binary_path, func_offset, pid);
9488
+ if (pfd < 0) {
9489
+ pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n",
9490
+ prog->name, retprobe ? "uretprobe" : "uprobe",
9491
+ binary_path, func_offset,
9492
+ libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
9493
+ return ERR_PTR(pfd);
9494
+ }
9495
+ link = bpf_program__attach_perf_event(prog, pfd);
9496
+ if (IS_ERR(link)) {
9497
+ close(pfd);
9498
+ err = PTR_ERR(link);
9499
+ pr_warn("prog '%s': failed to attach to %s '%s:0x%zx': %s\n",
9500
+ prog->name, retprobe ? "uretprobe" : "uprobe",
9501
+ binary_path, func_offset,
9502
+ libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
9503
+ return link;
9504
+ }
9505
+ return link;
9506
+}
9507
+
9508
+static int determine_tracepoint_id(const char *tp_category,
9509
+ const char *tp_name)
9510
+{
9511
+ char file[PATH_MAX];
9512
+ int ret;
9513
+
9514
+ ret = snprintf(file, sizeof(file),
9515
+ "/sys/kernel/debug/tracing/events/%s/%s/id",
9516
+ tp_category, tp_name);
9517
+ if (ret < 0)
9518
+ return -errno;
9519
+ if (ret >= sizeof(file)) {
9520
+ pr_debug("tracepoint %s/%s path is too long\n",
9521
+ tp_category, tp_name);
9522
+ return -E2BIG;
9523
+ }
9524
+ return parse_uint_from_file(file, "%d\n");
9525
+}
9526
+
9527
+static int perf_event_open_tracepoint(const char *tp_category,
9528
+ const char *tp_name)
9529
+{
9530
+ struct perf_event_attr attr = {};
9531
+ char errmsg[STRERR_BUFSIZE];
9532
+ int tp_id, pfd, err;
9533
+
9534
+ tp_id = determine_tracepoint_id(tp_category, tp_name);
9535
+ if (tp_id < 0) {
9536
+ pr_warn("failed to determine tracepoint '%s/%s' perf event ID: %s\n",
9537
+ tp_category, tp_name,
9538
+ libbpf_strerror_r(tp_id, errmsg, sizeof(errmsg)));
9539
+ return tp_id;
9540
+ }
9541
+
9542
+ attr.type = PERF_TYPE_TRACEPOINT;
9543
+ attr.size = sizeof(attr);
9544
+ attr.config = tp_id;
9545
+
9546
+ pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu */,
9547
+ -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
9548
+ if (pfd < 0) {
9549
+ err = -errno;
9550
+ pr_warn("tracepoint '%s/%s' perf_event_open() failed: %s\n",
9551
+ tp_category, tp_name,
9552
+ libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
9553
+ return err;
9554
+ }
9555
+ return pfd;
9556
+}
9557
+
9558
+struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,
9559
+ const char *tp_category,
9560
+ const char *tp_name)
9561
+{
9562
+ char errmsg[STRERR_BUFSIZE];
9563
+ struct bpf_link *link;
9564
+ int pfd, err;
9565
+
9566
+ pfd = perf_event_open_tracepoint(tp_category, tp_name);
9567
+ if (pfd < 0) {
9568
+ pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n",
9569
+ prog->name, tp_category, tp_name,
9570
+ libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
9571
+ return ERR_PTR(pfd);
9572
+ }
9573
+ link = bpf_program__attach_perf_event(prog, pfd);
9574
+ if (IS_ERR(link)) {
9575
+ close(pfd);
9576
+ err = PTR_ERR(link);
9577
+ pr_warn("prog '%s': failed to attach to tracepoint '%s/%s': %s\n",
9578
+ prog->name, tp_category, tp_name,
9579
+ libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
9580
+ return link;
9581
+ }
9582
+ return link;
9583
+}
9584
+
9585
+static struct bpf_link *attach_tp(const struct bpf_sec_def *sec,
9586
+ struct bpf_program *prog)
9587
+{
9588
+ char *sec_name, *tp_cat, *tp_name;
9589
+ struct bpf_link *link;
9590
+
9591
+ sec_name = strdup(prog->sec_name);
9592
+ if (!sec_name)
9593
+ return ERR_PTR(-ENOMEM);
9594
+
9595
+ /* extract "tp/<category>/<name>" */
9596
+ tp_cat = sec_name + sec->len;
9597
+ tp_name = strchr(tp_cat, '/');
9598
+ if (!tp_name) {
9599
+ link = ERR_PTR(-EINVAL);
9600
+ goto out;
9601
+ }
9602
+ *tp_name = '\0';
9603
+ tp_name++;
9604
+
9605
+ link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name);
9606
+out:
9607
+ free(sec_name);
9608
+ return link;
9609
+}
9610
+
9611
+struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
9612
+ const char *tp_name)
9613
+{
9614
+ char errmsg[STRERR_BUFSIZE];
9615
+ struct bpf_link *link;
9616
+ int prog_fd, pfd;
9617
+
9618
+ prog_fd = bpf_program__fd(prog);
9619
+ if (prog_fd < 0) {
9620
+ pr_warn("prog '%s': can't attach before loaded\n", prog->name);
9621
+ return ERR_PTR(-EINVAL);
9622
+ }
9623
+
9624
+ link = calloc(1, sizeof(*link));
9625
+ if (!link)
9626
+ return ERR_PTR(-ENOMEM);
9627
+ link->detach = &bpf_link__detach_fd;
9628
+
9629
+ pfd = bpf_raw_tracepoint_open(tp_name, prog_fd);
9630
+ if (pfd < 0) {
9631
+ pfd = -errno;
9632
+ free(link);
9633
+ pr_warn("prog '%s': failed to attach to raw tracepoint '%s': %s\n",
9634
+ prog->name, tp_name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
9635
+ return ERR_PTR(pfd);
9636
+ }
9637
+ link->fd = pfd;
9638
+ return link;
9639
+}
9640
+
9641
+static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec,
9642
+ struct bpf_program *prog)
9643
+{
9644
+ const char *tp_name = prog->sec_name + sec->len;
9645
+
9646
+ return bpf_program__attach_raw_tracepoint(prog, tp_name);
9647
+}
9648
+
9649
+/* Common logic for all BPF program types that attach to a btf_id */
9650
+static struct bpf_link *bpf_program__attach_btf_id(struct bpf_program *prog)
9651
+{
9652
+ char errmsg[STRERR_BUFSIZE];
9653
+ struct bpf_link *link;
9654
+ int prog_fd, pfd;
9655
+
9656
+ prog_fd = bpf_program__fd(prog);
9657
+ if (prog_fd < 0) {
9658
+ pr_warn("prog '%s': can't attach before loaded\n", prog->name);
9659
+ return ERR_PTR(-EINVAL);
9660
+ }
9661
+
9662
+ link = calloc(1, sizeof(*link));
9663
+ if (!link)
9664
+ return ERR_PTR(-ENOMEM);
9665
+ link->detach = &bpf_link__detach_fd;
9666
+
9667
+ pfd = bpf_raw_tracepoint_open(NULL, prog_fd);
9668
+ if (pfd < 0) {
9669
+ pfd = -errno;
9670
+ free(link);
9671
+ pr_warn("prog '%s': failed to attach: %s\n",
9672
+ prog->name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
9673
+ return ERR_PTR(pfd);
9674
+ }
9675
+ link->fd = pfd;
9676
+ return (struct bpf_link *)link;
9677
+}
9678
+
9679
+struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog)
9680
+{
9681
+ return bpf_program__attach_btf_id(prog);
9682
+}
9683
+
9684
+struct bpf_link *bpf_program__attach_lsm(struct bpf_program *prog)
9685
+{
9686
+ return bpf_program__attach_btf_id(prog);
9687
+}
9688
+
9689
+static struct bpf_link *attach_trace(const struct bpf_sec_def *sec,
9690
+ struct bpf_program *prog)
9691
+{
9692
+ return bpf_program__attach_trace(prog);
9693
+}
9694
+
9695
+static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec,
9696
+ struct bpf_program *prog)
9697
+{
9698
+ return bpf_program__attach_lsm(prog);
9699
+}
9700
+
9701
+static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,
9702
+ struct bpf_program *prog)
9703
+{
9704
+ return bpf_program__attach_iter(prog, NULL);
9705
+}
9706
+
9707
+static struct bpf_link *
9708
+bpf_program__attach_fd(struct bpf_program *prog, int target_fd, int btf_id,
9709
+ const char *target_name)
9710
+{
9711
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts,
9712
+ .target_btf_id = btf_id);
9713
+ enum bpf_attach_type attach_type;
9714
+ char errmsg[STRERR_BUFSIZE];
9715
+ struct bpf_link *link;
9716
+ int prog_fd, link_fd;
9717
+
9718
+ prog_fd = bpf_program__fd(prog);
9719
+ if (prog_fd < 0) {
9720
+ pr_warn("prog '%s': can't attach before loaded\n", prog->name);
9721
+ return ERR_PTR(-EINVAL);
9722
+ }
9723
+
9724
+ link = calloc(1, sizeof(*link));
9725
+ if (!link)
9726
+ return ERR_PTR(-ENOMEM);
9727
+ link->detach = &bpf_link__detach_fd;
9728
+
9729
+ attach_type = bpf_program__get_expected_attach_type(prog);
9730
+ link_fd = bpf_link_create(prog_fd, target_fd, attach_type, &opts);
9731
+ if (link_fd < 0) {
9732
+ link_fd = -errno;
9733
+ free(link);
9734
+ pr_warn("prog '%s': failed to attach to %s: %s\n",
9735
+ prog->name, target_name,
9736
+ libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
9737
+ return ERR_PTR(link_fd);
9738
+ }
9739
+ link->fd = link_fd;
9740
+ return link;
9741
+}
9742
+
9743
+struct bpf_link *
9744
+bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd)
9745
+{
9746
+ return bpf_program__attach_fd(prog, cgroup_fd, 0, "cgroup");
9747
+}
9748
+
9749
+struct bpf_link *
9750
+bpf_program__attach_netns(struct bpf_program *prog, int netns_fd)
9751
+{
9752
+ return bpf_program__attach_fd(prog, netns_fd, 0, "netns");
9753
+}
9754
+
9755
+struct bpf_link *bpf_program__attach_xdp(struct bpf_program *prog, int ifindex)
9756
+{
9757
+ /* target_fd/target_ifindex use the same field in LINK_CREATE */
9758
+ return bpf_program__attach_fd(prog, ifindex, 0, "xdp");
9759
+}
9760
+
9761
+struct bpf_link *bpf_program__attach_freplace(struct bpf_program *prog,
9762
+ int target_fd,
9763
+ const char *attach_func_name)
9764
+{
9765
+ int btf_id;
9766
+
9767
+ if (!!target_fd != !!attach_func_name) {
9768
+ pr_warn("prog '%s': supply none or both of target_fd and attach_func_name\n",
9769
+ prog->name);
9770
+ return ERR_PTR(-EINVAL);
9771
+ }
9772
+
9773
+ if (prog->type != BPF_PROG_TYPE_EXT) {
9774
+ pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace",
9775
+ prog->name);
9776
+ return ERR_PTR(-EINVAL);
9777
+ }
9778
+
9779
+ if (target_fd) {
9780
+ btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd);
9781
+ if (btf_id < 0)
9782
+ return ERR_PTR(btf_id);
9783
+
9784
+ return bpf_program__attach_fd(prog, target_fd, btf_id, "freplace");
9785
+ } else {
9786
+ /* no target, so use raw_tracepoint_open for compatibility
9787
+ * with old kernels
9788
+ */
9789
+ return bpf_program__attach_trace(prog);
9790
+ }
9791
+}
9792
+
9793
+struct bpf_link *
9794
+bpf_program__attach_iter(struct bpf_program *prog,
9795
+ const struct bpf_iter_attach_opts *opts)
9796
+{
9797
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
9798
+ char errmsg[STRERR_BUFSIZE];
9799
+ struct bpf_link *link;
9800
+ int prog_fd, link_fd;
9801
+ __u32 target_fd = 0;
9802
+
9803
+ if (!OPTS_VALID(opts, bpf_iter_attach_opts))
9804
+ return ERR_PTR(-EINVAL);
9805
+
9806
+ link_create_opts.iter_info = OPTS_GET(opts, link_info, (void *)0);
9807
+ link_create_opts.iter_info_len = OPTS_GET(opts, link_info_len, 0);
9808
+
9809
+ prog_fd = bpf_program__fd(prog);
9810
+ if (prog_fd < 0) {
9811
+ pr_warn("prog '%s': can't attach before loaded\n", prog->name);
9812
+ return ERR_PTR(-EINVAL);
9813
+ }
9814
+
9815
+ link = calloc(1, sizeof(*link));
9816
+ if (!link)
9817
+ return ERR_PTR(-ENOMEM);
9818
+ link->detach = &bpf_link__detach_fd;
9819
+
9820
+ link_fd = bpf_link_create(prog_fd, target_fd, BPF_TRACE_ITER,
9821
+ &link_create_opts);
9822
+ if (link_fd < 0) {
9823
+ link_fd = -errno;
9824
+ free(link);
9825
+ pr_warn("prog '%s': failed to attach to iterator: %s\n",
9826
+ prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
9827
+ return ERR_PTR(link_fd);
9828
+ }
9829
+ link->fd = link_fd;
9830
+ return link;
9831
+}
9832
+
9833
+struct bpf_link *bpf_program__attach(struct bpf_program *prog)
9834
+{
9835
+ const struct bpf_sec_def *sec_def;
9836
+
9837
+ sec_def = find_sec_def(prog->sec_name);
9838
+ if (!sec_def || !sec_def->attach_fn)
9839
+ return ERR_PTR(-ESRCH);
9840
+
9841
+ return sec_def->attach_fn(sec_def, prog);
9842
+}
9843
+
9844
+static int bpf_link__detach_struct_ops(struct bpf_link *link)
9845
+{
9846
+ __u32 zero = 0;
9847
+
9848
+ if (bpf_map_delete_elem(link->fd, &zero))
9849
+ return -errno;
9850
+
9851
+ return 0;
9852
+}
9853
+
9854
+struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map)
9855
+{
9856
+ struct bpf_struct_ops *st_ops;
9857
+ struct bpf_link *link;
9858
+ __u32 i, zero = 0;
9859
+ int err;
9860
+
9861
+ if (!bpf_map__is_struct_ops(map) || map->fd == -1)
9862
+ return ERR_PTR(-EINVAL);
9863
+
9864
+ link = calloc(1, sizeof(*link));
9865
+ if (!link)
9866
+ return ERR_PTR(-EINVAL);
9867
+
9868
+ st_ops = map->st_ops;
9869
+ for (i = 0; i < btf_vlen(st_ops->type); i++) {
9870
+ struct bpf_program *prog = st_ops->progs[i];
9871
+ void *kern_data;
9872
+ int prog_fd;
9873
+
9874
+ if (!prog)
9875
+ continue;
9876
+
9877
+ prog_fd = bpf_program__fd(prog);
9878
+ kern_data = st_ops->kern_vdata + st_ops->kern_func_off[i];
9879
+ *(unsigned long *)kern_data = prog_fd;
9880
+ }
9881
+
9882
+ err = bpf_map_update_elem(map->fd, &zero, st_ops->kern_vdata, 0);
9883
+ if (err) {
9884
+ err = -errno;
9885
+ free(link);
9886
+ return ERR_PTR(err);
9887
+ }
9888
+
9889
+ link->detach = bpf_link__detach_struct_ops;
9890
+ link->fd = map->fd;
9891
+
9892
+ return link;
9893
+}
9894
+
9895
+enum bpf_perf_event_ret
9896
+bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
9897
+ void **copy_mem, size_t *copy_size,
9898
+ bpf_perf_event_print_t fn, void *private_data)
9899
+{
9900
+ struct perf_event_mmap_page *header = mmap_mem;
9901
+ __u64 data_head = ring_buffer_read_head(header);
23799902 __u64 data_tail = header->data_tail;
2380
- __u64 data_head = header->data_head;
2381
- int ret = LIBBPF_PERF_EVENT_ERROR;
2382
- void *base, *begin, *end;
9903
+ void *base = ((__u8 *)header) + page_size;
9904
+ int ret = LIBBPF_PERF_EVENT_CONT;
9905
+ struct perf_event_header *ehdr;
9906
+ size_t ehdr_size;
23839907
2384
- asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */
2385
- if (data_head == data_tail)
2386
- return LIBBPF_PERF_EVENT_CONT;
9908
+ while (data_head != data_tail) {
9909
+ ehdr = base + (data_tail & (mmap_size - 1));
9910
+ ehdr_size = ehdr->size;
23879911
2388
- base = ((char *)header) + page_size;
9912
+ if (((void *)ehdr) + ehdr_size > base + mmap_size) {
9913
+ void *copy_start = ehdr;
9914
+ size_t len_first = base + mmap_size - copy_start;
9915
+ size_t len_secnd = ehdr_size - len_first;
23899916
2390
- begin = base + data_tail % size;
2391
- end = base + data_head % size;
2392
-
2393
- while (begin != end) {
2394
- struct perf_event_header *ehdr;
2395
-
2396
- ehdr = begin;
2397
- if (begin + ehdr->size > base + size) {
2398
- long len = base + size - begin;
2399
-
2400
- if (*buf_len < ehdr->size) {
2401
- free(*buf);
2402
- *buf = malloc(ehdr->size);
2403
- if (!*buf) {
9917
+ if (*copy_size < ehdr_size) {
9918
+ free(*copy_mem);
9919
+ *copy_mem = malloc(ehdr_size);
9920
+ if (!*copy_mem) {
9921
+ *copy_size = 0;
24049922 ret = LIBBPF_PERF_EVENT_ERROR;
24059923 break;
24069924 }
2407
- *buf_len = ehdr->size;
9925
+ *copy_size = ehdr_size;
24089926 }
24099927
2410
- memcpy(*buf, begin, len);
2411
- memcpy(*buf + len, base, ehdr->size - len);
2412
- ehdr = (void *)*buf;
2413
- begin = base + ehdr->size - len;
2414
- } else if (begin + ehdr->size == base + size) {
2415
- begin = base;
2416
- } else {
2417
- begin += ehdr->size;
9928
+ memcpy(*copy_mem, copy_start, len_first);
9929
+ memcpy(*copy_mem + len_first, base, len_secnd);
9930
+ ehdr = *copy_mem;
24189931 }
24199932
2420
- ret = fn(ehdr, priv);
9933
+ ret = fn(ehdr, private_data);
9934
+ data_tail += ehdr_size;
24219935 if (ret != LIBBPF_PERF_EVENT_CONT)
24229936 break;
2423
-
2424
- data_tail += ehdr->size;
24259937 }
24269938
2427
- __sync_synchronize(); /* smp_mb() */
2428
- header->data_tail = data_tail;
2429
-
9939
+ ring_buffer_write_tail(header, data_tail);
24309940 return ret;
24319941 }
9942
+
9943
+struct perf_buffer;
9944
+
9945
+struct perf_buffer_params {
9946
+ struct perf_event_attr *attr;
9947
+ /* if event_cb is specified, it takes precendence */
9948
+ perf_buffer_event_fn event_cb;
9949
+ /* sample_cb and lost_cb are higher-level common-case callbacks */
9950
+ perf_buffer_sample_fn sample_cb;
9951
+ perf_buffer_lost_fn lost_cb;
9952
+ void *ctx;
9953
+ int cpu_cnt;
9954
+ int *cpus;
9955
+ int *map_keys;
9956
+};
9957
+
9958
+struct perf_cpu_buf {
9959
+ struct perf_buffer *pb;
9960
+ void *base; /* mmap()'ed memory */
9961
+ void *buf; /* for reconstructing segmented data */
9962
+ size_t buf_size;
9963
+ int fd;
9964
+ int cpu;
9965
+ int map_key;
9966
+};
9967
+
9968
+struct perf_buffer {
9969
+ perf_buffer_event_fn event_cb;
9970
+ perf_buffer_sample_fn sample_cb;
9971
+ perf_buffer_lost_fn lost_cb;
9972
+ void *ctx; /* passed into callbacks */
9973
+
9974
+ size_t page_size;
9975
+ size_t mmap_size;
9976
+ struct perf_cpu_buf **cpu_bufs;
9977
+ struct epoll_event *events;
9978
+ int cpu_cnt; /* number of allocated CPU buffers */
9979
+ int epoll_fd; /* perf event FD */
9980
+ int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */
9981
+};
9982
+
9983
+static void perf_buffer__free_cpu_buf(struct perf_buffer *pb,
9984
+ struct perf_cpu_buf *cpu_buf)
9985
+{
9986
+ if (!cpu_buf)
9987
+ return;
9988
+ if (cpu_buf->base &&
9989
+ munmap(cpu_buf->base, pb->mmap_size + pb->page_size))
9990
+ pr_warn("failed to munmap cpu_buf #%d\n", cpu_buf->cpu);
9991
+ if (cpu_buf->fd >= 0) {
9992
+ ioctl(cpu_buf->fd, PERF_EVENT_IOC_DISABLE, 0);
9993
+ close(cpu_buf->fd);
9994
+ }
9995
+ free(cpu_buf->buf);
9996
+ free(cpu_buf);
9997
+}
9998
+
9999
+void perf_buffer__free(struct perf_buffer *pb)
10000
+{
10001
+ int i;
10002
+
10003
+ if (IS_ERR_OR_NULL(pb))
10004
+ return;
10005
+ if (pb->cpu_bufs) {
10006
+ for (i = 0; i < pb->cpu_cnt; i++) {
10007
+ struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
10008
+
10009
+ if (!cpu_buf)
10010
+ continue;
10011
+
10012
+ bpf_map_delete_elem(pb->map_fd, &cpu_buf->map_key);
10013
+ perf_buffer__free_cpu_buf(pb, cpu_buf);
10014
+ }
10015
+ free(pb->cpu_bufs);
10016
+ }
10017
+ if (pb->epoll_fd >= 0)
10018
+ close(pb->epoll_fd);
10019
+ free(pb->events);
10020
+ free(pb);
10021
+}
10022
+
10023
+static struct perf_cpu_buf *
10024
+perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr,
10025
+ int cpu, int map_key)
10026
+{
10027
+ struct perf_cpu_buf *cpu_buf;
10028
+ char msg[STRERR_BUFSIZE];
10029
+ int err;
10030
+
10031
+ cpu_buf = calloc(1, sizeof(*cpu_buf));
10032
+ if (!cpu_buf)
10033
+ return ERR_PTR(-ENOMEM);
10034
+
10035
+ cpu_buf->pb = pb;
10036
+ cpu_buf->cpu = cpu;
10037
+ cpu_buf->map_key = map_key;
10038
+
10039
+ cpu_buf->fd = syscall(__NR_perf_event_open, attr, -1 /* pid */, cpu,
10040
+ -1, PERF_FLAG_FD_CLOEXEC);
10041
+ if (cpu_buf->fd < 0) {
10042
+ err = -errno;
10043
+ pr_warn("failed to open perf buffer event on cpu #%d: %s\n",
10044
+ cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
10045
+ goto error;
10046
+ }
10047
+
10048
+ cpu_buf->base = mmap(NULL, pb->mmap_size + pb->page_size,
10049
+ PROT_READ | PROT_WRITE, MAP_SHARED,
10050
+ cpu_buf->fd, 0);
10051
+ if (cpu_buf->base == MAP_FAILED) {
10052
+ cpu_buf->base = NULL;
10053
+ err = -errno;
10054
+ pr_warn("failed to mmap perf buffer on cpu #%d: %s\n",
10055
+ cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
10056
+ goto error;
10057
+ }
10058
+
10059
+ if (ioctl(cpu_buf->fd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
10060
+ err = -errno;
10061
+ pr_warn("failed to enable perf buffer event on cpu #%d: %s\n",
10062
+ cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
10063
+ goto error;
10064
+ }
10065
+
10066
+ return cpu_buf;
10067
+
10068
+error:
10069
+ perf_buffer__free_cpu_buf(pb, cpu_buf);
10070
+ return (struct perf_cpu_buf *)ERR_PTR(err);
10071
+}
10072
+
10073
+static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
10074
+ struct perf_buffer_params *p);
10075
+
10076
+struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
10077
+ const struct perf_buffer_opts *opts)
10078
+{
10079
+ struct perf_buffer_params p = {};
10080
+ struct perf_event_attr attr = { 0, };
10081
+
10082
+ attr.config = PERF_COUNT_SW_BPF_OUTPUT;
10083
+ attr.type = PERF_TYPE_SOFTWARE;
10084
+ attr.sample_type = PERF_SAMPLE_RAW;
10085
+ attr.sample_period = 1;
10086
+ attr.wakeup_events = 1;
10087
+
10088
+ p.attr = &attr;
10089
+ p.sample_cb = opts ? opts->sample_cb : NULL;
10090
+ p.lost_cb = opts ? opts->lost_cb : NULL;
10091
+ p.ctx = opts ? opts->ctx : NULL;
10092
+
10093
+ return __perf_buffer__new(map_fd, page_cnt, &p);
10094
+}
10095
+
10096
+struct perf_buffer *
10097
+perf_buffer__new_raw(int map_fd, size_t page_cnt,
10098
+ const struct perf_buffer_raw_opts *opts)
10099
+{
10100
+ struct perf_buffer_params p = {};
10101
+
10102
+ p.attr = opts->attr;
10103
+ p.event_cb = opts->event_cb;
10104
+ p.ctx = opts->ctx;
10105
+ p.cpu_cnt = opts->cpu_cnt;
10106
+ p.cpus = opts->cpus;
10107
+ p.map_keys = opts->map_keys;
10108
+
10109
+ return __perf_buffer__new(map_fd, page_cnt, &p);
10110
+}
10111
+
10112
+static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
10113
+ struct perf_buffer_params *p)
10114
+{
10115
+ const char *online_cpus_file = "/sys/devices/system/cpu/online";
10116
+ struct bpf_map_info map;
10117
+ char msg[STRERR_BUFSIZE];
10118
+ struct perf_buffer *pb;
10119
+ bool *online = NULL;
10120
+ __u32 map_info_len;
10121
+ int err, i, j, n;
10122
+
10123
+ if (page_cnt & (page_cnt - 1)) {
10124
+ pr_warn("page count should be power of two, but is %zu\n",
10125
+ page_cnt);
10126
+ return ERR_PTR(-EINVAL);
10127
+ }
10128
+
10129
+ /* best-effort sanity checks */
10130
+ memset(&map, 0, sizeof(map));
10131
+ map_info_len = sizeof(map);
10132
+ err = bpf_obj_get_info_by_fd(map_fd, &map, &map_info_len);
10133
+ if (err) {
10134
+ err = -errno;
10135
+ /* if BPF_OBJ_GET_INFO_BY_FD is supported, will return
10136
+ * -EBADFD, -EFAULT, or -E2BIG on real error
10137
+ */
10138
+ if (err != -EINVAL) {
10139
+ pr_warn("failed to get map info for map FD %d: %s\n",
10140
+ map_fd, libbpf_strerror_r(err, msg, sizeof(msg)));
10141
+ return ERR_PTR(err);
10142
+ }
10143
+ pr_debug("failed to get map info for FD %d; API not supported? Ignoring...\n",
10144
+ map_fd);
10145
+ } else {
10146
+ if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
10147
+ pr_warn("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
10148
+ map.name);
10149
+ return ERR_PTR(-EINVAL);
10150
+ }
10151
+ }
10152
+
10153
+ pb = calloc(1, sizeof(*pb));
10154
+ if (!pb)
10155
+ return ERR_PTR(-ENOMEM);
10156
+
10157
+ pb->event_cb = p->event_cb;
10158
+ pb->sample_cb = p->sample_cb;
10159
+ pb->lost_cb = p->lost_cb;
10160
+ pb->ctx = p->ctx;
10161
+
10162
+ pb->page_size = getpagesize();
10163
+ pb->mmap_size = pb->page_size * page_cnt;
10164
+ pb->map_fd = map_fd;
10165
+
10166
+ pb->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
10167
+ if (pb->epoll_fd < 0) {
10168
+ err = -errno;
10169
+ pr_warn("failed to create epoll instance: %s\n",
10170
+ libbpf_strerror_r(err, msg, sizeof(msg)));
10171
+ goto error;
10172
+ }
10173
+
10174
+ if (p->cpu_cnt > 0) {
10175
+ pb->cpu_cnt = p->cpu_cnt;
10176
+ } else {
10177
+ pb->cpu_cnt = libbpf_num_possible_cpus();
10178
+ if (pb->cpu_cnt < 0) {
10179
+ err = pb->cpu_cnt;
10180
+ goto error;
10181
+ }
10182
+ if (map.max_entries && map.max_entries < pb->cpu_cnt)
10183
+ pb->cpu_cnt = map.max_entries;
10184
+ }
10185
+
10186
+ pb->events = calloc(pb->cpu_cnt, sizeof(*pb->events));
10187
+ if (!pb->events) {
10188
+ err = -ENOMEM;
10189
+ pr_warn("failed to allocate events: out of memory\n");
10190
+ goto error;
10191
+ }
10192
+ pb->cpu_bufs = calloc(pb->cpu_cnt, sizeof(*pb->cpu_bufs));
10193
+ if (!pb->cpu_bufs) {
10194
+ err = -ENOMEM;
10195
+ pr_warn("failed to allocate buffers: out of memory\n");
10196
+ goto error;
10197
+ }
10198
+
10199
+ err = parse_cpu_mask_file(online_cpus_file, &online, &n);
10200
+ if (err) {
10201
+ pr_warn("failed to get online CPU mask: %d\n", err);
10202
+ goto error;
10203
+ }
10204
+
10205
+ for (i = 0, j = 0; i < pb->cpu_cnt; i++) {
10206
+ struct perf_cpu_buf *cpu_buf;
10207
+ int cpu, map_key;
10208
+
10209
+ cpu = p->cpu_cnt > 0 ? p->cpus[i] : i;
10210
+ map_key = p->cpu_cnt > 0 ? p->map_keys[i] : i;
10211
+
10212
+ /* in case user didn't explicitly requested particular CPUs to
10213
+ * be attached to, skip offline/not present CPUs
10214
+ */
10215
+ if (p->cpu_cnt <= 0 && (cpu >= n || !online[cpu]))
10216
+ continue;
10217
+
10218
+ cpu_buf = perf_buffer__open_cpu_buf(pb, p->attr, cpu, map_key);
10219
+ if (IS_ERR(cpu_buf)) {
10220
+ err = PTR_ERR(cpu_buf);
10221
+ goto error;
10222
+ }
10223
+
10224
+ pb->cpu_bufs[j] = cpu_buf;
10225
+
10226
+ err = bpf_map_update_elem(pb->map_fd, &map_key,
10227
+ &cpu_buf->fd, 0);
10228
+ if (err) {
10229
+ err = -errno;
10230
+ pr_warn("failed to set cpu #%d, key %d -> perf FD %d: %s\n",
10231
+ cpu, map_key, cpu_buf->fd,
10232
+ libbpf_strerror_r(err, msg, sizeof(msg)));
10233
+ goto error;
10234
+ }
10235
+
10236
+ pb->events[j].events = EPOLLIN;
10237
+ pb->events[j].data.ptr = cpu_buf;
10238
+ if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd,
10239
+ &pb->events[j]) < 0) {
10240
+ err = -errno;
10241
+ pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n",
10242
+ cpu, cpu_buf->fd,
10243
+ libbpf_strerror_r(err, msg, sizeof(msg)));
10244
+ goto error;
10245
+ }
10246
+ j++;
10247
+ }
10248
+ pb->cpu_cnt = j;
10249
+ free(online);
10250
+
10251
+ return pb;
10252
+
10253
+error:
10254
+ free(online);
10255
+ if (pb)
10256
+ perf_buffer__free(pb);
10257
+ return ERR_PTR(err);
10258
+}
10259
+
10260
+struct perf_sample_raw {
10261
+ struct perf_event_header header;
10262
+ uint32_t size;
10263
+ char data[];
10264
+};
10265
+
10266
+struct perf_sample_lost {
10267
+ struct perf_event_header header;
10268
+ uint64_t id;
10269
+ uint64_t lost;
10270
+ uint64_t sample_id;
10271
+};
10272
+
10273
+static enum bpf_perf_event_ret
10274
+perf_buffer__process_record(struct perf_event_header *e, void *ctx)
10275
+{
10276
+ struct perf_cpu_buf *cpu_buf = ctx;
10277
+ struct perf_buffer *pb = cpu_buf->pb;
10278
+ void *data = e;
10279
+
10280
+ /* user wants full control over parsing perf event */
10281
+ if (pb->event_cb)
10282
+ return pb->event_cb(pb->ctx, cpu_buf->cpu, e);
10283
+
10284
+ switch (e->type) {
10285
+ case PERF_RECORD_SAMPLE: {
10286
+ struct perf_sample_raw *s = data;
10287
+
10288
+ if (pb->sample_cb)
10289
+ pb->sample_cb(pb->ctx, cpu_buf->cpu, s->data, s->size);
10290
+ break;
10291
+ }
10292
+ case PERF_RECORD_LOST: {
10293
+ struct perf_sample_lost *s = data;
10294
+
10295
+ if (pb->lost_cb)
10296
+ pb->lost_cb(pb->ctx, cpu_buf->cpu, s->lost);
10297
+ break;
10298
+ }
10299
+ default:
10300
+ pr_warn("unknown perf sample type %d\n", e->type);
10301
+ return LIBBPF_PERF_EVENT_ERROR;
10302
+ }
10303
+ return LIBBPF_PERF_EVENT_CONT;
10304
+}
10305
+
10306
+static int perf_buffer__process_records(struct perf_buffer *pb,
10307
+ struct perf_cpu_buf *cpu_buf)
10308
+{
10309
+ enum bpf_perf_event_ret ret;
10310
+
10311
+ ret = bpf_perf_event_read_simple(cpu_buf->base, pb->mmap_size,
10312
+ pb->page_size, &cpu_buf->buf,
10313
+ &cpu_buf->buf_size,
10314
+ perf_buffer__process_record, cpu_buf);
10315
+ if (ret != LIBBPF_PERF_EVENT_CONT)
10316
+ return ret;
10317
+ return 0;
10318
+}
10319
+
10320
+int perf_buffer__epoll_fd(const struct perf_buffer *pb)
10321
+{
10322
+ return pb->epoll_fd;
10323
+}
10324
+
10325
+int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)
10326
+{
10327
+ int i, cnt, err;
10328
+
10329
+ cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms);
10330
+ for (i = 0; i < cnt; i++) {
10331
+ struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr;
10332
+
10333
+ err = perf_buffer__process_records(pb, cpu_buf);
10334
+ if (err) {
10335
+ pr_warn("error while processing records: %d\n", err);
10336
+ return err;
10337
+ }
10338
+ }
10339
+ return cnt < 0 ? -errno : cnt;
10340
+}
10341
+
10342
+/* Return number of PERF_EVENT_ARRAY map slots set up by this perf_buffer
10343
+ * manager.
10344
+ */
10345
+size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb)
10346
+{
10347
+ return pb->cpu_cnt;
10348
+}
10349
+
10350
+/*
10351
+ * Return perf_event FD of a ring buffer in *buf_idx* slot of
10352
+ * PERF_EVENT_ARRAY BPF map. This FD can be polled for new data using
10353
+ * select()/poll()/epoll() Linux syscalls.
10354
+ */
10355
+int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx)
10356
+{
10357
+ struct perf_cpu_buf *cpu_buf;
10358
+
10359
+ if (buf_idx >= pb->cpu_cnt)
10360
+ return -EINVAL;
10361
+
10362
+ cpu_buf = pb->cpu_bufs[buf_idx];
10363
+ if (!cpu_buf)
10364
+ return -ENOENT;
10365
+
10366
+ return cpu_buf->fd;
10367
+}
10368
+
10369
+/*
10370
+ * Consume data from perf ring buffer corresponding to slot *buf_idx* in
10371
+ * PERF_EVENT_ARRAY BPF map without waiting/polling. If there is no data to
10372
+ * consume, do nothing and return success.
10373
+ * Returns:
10374
+ * - 0 on success;
10375
+ * - <0 on failure.
10376
+ */
10377
+int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx)
10378
+{
10379
+ struct perf_cpu_buf *cpu_buf;
10380
+
10381
+ if (buf_idx >= pb->cpu_cnt)
10382
+ return -EINVAL;
10383
+
10384
+ cpu_buf = pb->cpu_bufs[buf_idx];
10385
+ if (!cpu_buf)
10386
+ return -ENOENT;
10387
+
10388
+ return perf_buffer__process_records(pb, cpu_buf);
10389
+}
10390
+
10391
+int perf_buffer__consume(struct perf_buffer *pb)
10392
+{
10393
+ int i, err;
10394
+
10395
+ for (i = 0; i < pb->cpu_cnt; i++) {
10396
+ struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
10397
+
10398
+ if (!cpu_buf)
10399
+ continue;
10400
+
10401
+ err = perf_buffer__process_records(pb, cpu_buf);
10402
+ if (err) {
10403
+ pr_warn("perf_buffer: failed to process records in buffer #%d: %d\n", i, err);
10404
+ return err;
10405
+ }
10406
+ }
10407
+ return 0;
10408
+}
10409
+
10410
+struct bpf_prog_info_array_desc {
10411
+ int array_offset; /* e.g. offset of jited_prog_insns */
10412
+ int count_offset; /* e.g. offset of jited_prog_len */
10413
+ int size_offset; /* > 0: offset of rec size,
10414
+ * < 0: fix size of -size_offset
10415
+ */
10416
+};
10417
+
10418
+static struct bpf_prog_info_array_desc bpf_prog_info_array_desc[] = {
10419
+ [BPF_PROG_INFO_JITED_INSNS] = {
10420
+ offsetof(struct bpf_prog_info, jited_prog_insns),
10421
+ offsetof(struct bpf_prog_info, jited_prog_len),
10422
+ -1,
10423
+ },
10424
+ [BPF_PROG_INFO_XLATED_INSNS] = {
10425
+ offsetof(struct bpf_prog_info, xlated_prog_insns),
10426
+ offsetof(struct bpf_prog_info, xlated_prog_len),
10427
+ -1,
10428
+ },
10429
+ [BPF_PROG_INFO_MAP_IDS] = {
10430
+ offsetof(struct bpf_prog_info, map_ids),
10431
+ offsetof(struct bpf_prog_info, nr_map_ids),
10432
+ -(int)sizeof(__u32),
10433
+ },
10434
+ [BPF_PROG_INFO_JITED_KSYMS] = {
10435
+ offsetof(struct bpf_prog_info, jited_ksyms),
10436
+ offsetof(struct bpf_prog_info, nr_jited_ksyms),
10437
+ -(int)sizeof(__u64),
10438
+ },
10439
+ [BPF_PROG_INFO_JITED_FUNC_LENS] = {
10440
+ offsetof(struct bpf_prog_info, jited_func_lens),
10441
+ offsetof(struct bpf_prog_info, nr_jited_func_lens),
10442
+ -(int)sizeof(__u32),
10443
+ },
10444
+ [BPF_PROG_INFO_FUNC_INFO] = {
10445
+ offsetof(struct bpf_prog_info, func_info),
10446
+ offsetof(struct bpf_prog_info, nr_func_info),
10447
+ offsetof(struct bpf_prog_info, func_info_rec_size),
10448
+ },
10449
+ [BPF_PROG_INFO_LINE_INFO] = {
10450
+ offsetof(struct bpf_prog_info, line_info),
10451
+ offsetof(struct bpf_prog_info, nr_line_info),
10452
+ offsetof(struct bpf_prog_info, line_info_rec_size),
10453
+ },
10454
+ [BPF_PROG_INFO_JITED_LINE_INFO] = {
10455
+ offsetof(struct bpf_prog_info, jited_line_info),
10456
+ offsetof(struct bpf_prog_info, nr_jited_line_info),
10457
+ offsetof(struct bpf_prog_info, jited_line_info_rec_size),
10458
+ },
10459
+ [BPF_PROG_INFO_PROG_TAGS] = {
10460
+ offsetof(struct bpf_prog_info, prog_tags),
10461
+ offsetof(struct bpf_prog_info, nr_prog_tags),
10462
+ -(int)sizeof(__u8) * BPF_TAG_SIZE,
10463
+ },
10464
+
10465
+};
10466
+
10467
+static __u32 bpf_prog_info_read_offset_u32(struct bpf_prog_info *info,
10468
+ int offset)
10469
+{
10470
+ __u32 *array = (__u32 *)info;
10471
+
10472
+ if (offset >= 0)
10473
+ return array[offset / sizeof(__u32)];
10474
+ return -(int)offset;
10475
+}
10476
+
10477
+static __u64 bpf_prog_info_read_offset_u64(struct bpf_prog_info *info,
10478
+ int offset)
10479
+{
10480
+ __u64 *array = (__u64 *)info;
10481
+
10482
+ if (offset >= 0)
10483
+ return array[offset / sizeof(__u64)];
10484
+ return -(int)offset;
10485
+}
10486
+
10487
+static void bpf_prog_info_set_offset_u32(struct bpf_prog_info *info, int offset,
10488
+ __u32 val)
10489
+{
10490
+ __u32 *array = (__u32 *)info;
10491
+
10492
+ if (offset >= 0)
10493
+ array[offset / sizeof(__u32)] = val;
10494
+}
10495
+
10496
+static void bpf_prog_info_set_offset_u64(struct bpf_prog_info *info, int offset,
10497
+ __u64 val)
10498
+{
10499
+ __u64 *array = (__u64 *)info;
10500
+
10501
+ if (offset >= 0)
10502
+ array[offset / sizeof(__u64)] = val;
10503
+}
10504
+
10505
+struct bpf_prog_info_linear *
10506
+bpf_program__get_prog_info_linear(int fd, __u64 arrays)
10507
+{
10508
+ struct bpf_prog_info_linear *info_linear;
10509
+ struct bpf_prog_info info = {};
10510
+ __u32 info_len = sizeof(info);
10511
+ __u32 data_len = 0;
10512
+ int i, err;
10513
+ void *ptr;
10514
+
10515
+ if (arrays >> BPF_PROG_INFO_LAST_ARRAY)
10516
+ return ERR_PTR(-EINVAL);
10517
+
10518
+ /* step 1: get array dimensions */
10519
+ err = bpf_obj_get_info_by_fd(fd, &info, &info_len);
10520
+ if (err) {
10521
+ pr_debug("can't get prog info: %s", strerror(errno));
10522
+ return ERR_PTR(-EFAULT);
10523
+ }
10524
+
10525
+ /* step 2: calculate total size of all arrays */
10526
+ for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
10527
+ bool include_array = (arrays & (1UL << i)) > 0;
10528
+ struct bpf_prog_info_array_desc *desc;
10529
+ __u32 count, size;
10530
+
10531
+ desc = bpf_prog_info_array_desc + i;
10532
+
10533
+ /* kernel is too old to support this field */
10534
+ if (info_len < desc->array_offset + sizeof(__u32) ||
10535
+ info_len < desc->count_offset + sizeof(__u32) ||
10536
+ (desc->size_offset > 0 && info_len < desc->size_offset))
10537
+ include_array = false;
10538
+
10539
+ if (!include_array) {
10540
+ arrays &= ~(1UL << i); /* clear the bit */
10541
+ continue;
10542
+ }
10543
+
10544
+ count = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
10545
+ size = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
10546
+
10547
+ data_len += count * size;
10548
+ }
10549
+
10550
+ /* step 3: allocate continuous memory */
10551
+ data_len = roundup(data_len, sizeof(__u64));
10552
+ info_linear = malloc(sizeof(struct bpf_prog_info_linear) + data_len);
10553
+ if (!info_linear)
10554
+ return ERR_PTR(-ENOMEM);
10555
+
10556
+ /* step 4: fill data to info_linear->info */
10557
+ info_linear->arrays = arrays;
10558
+ memset(&info_linear->info, 0, sizeof(info));
10559
+ ptr = info_linear->data;
10560
+
10561
+ for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
10562
+ struct bpf_prog_info_array_desc *desc;
10563
+ __u32 count, size;
10564
+
10565
+ if ((arrays & (1UL << i)) == 0)
10566
+ continue;
10567
+
10568
+ desc = bpf_prog_info_array_desc + i;
10569
+ count = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
10570
+ size = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
10571
+ bpf_prog_info_set_offset_u32(&info_linear->info,
10572
+ desc->count_offset, count);
10573
+ bpf_prog_info_set_offset_u32(&info_linear->info,
10574
+ desc->size_offset, size);
10575
+ bpf_prog_info_set_offset_u64(&info_linear->info,
10576
+ desc->array_offset,
10577
+ ptr_to_u64(ptr));
10578
+ ptr += count * size;
10579
+ }
10580
+
10581
+ /* step 5: call syscall again to get required arrays */
10582
+ err = bpf_obj_get_info_by_fd(fd, &info_linear->info, &info_len);
10583
+ if (err) {
10584
+ pr_debug("can't get prog info: %s", strerror(errno));
10585
+ free(info_linear);
10586
+ return ERR_PTR(-EFAULT);
10587
+ }
10588
+
10589
+ /* step 6: verify the data */
10590
+ for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
10591
+ struct bpf_prog_info_array_desc *desc;
10592
+ __u32 v1, v2;
10593
+
10594
+ if ((arrays & (1UL << i)) == 0)
10595
+ continue;
10596
+
10597
+ desc = bpf_prog_info_array_desc + i;
10598
+ v1 = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
10599
+ v2 = bpf_prog_info_read_offset_u32(&info_linear->info,
10600
+ desc->count_offset);
10601
+ if (v1 != v2)
10602
+ pr_warn("%s: mismatch in element count\n", __func__);
10603
+
10604
+ v1 = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
10605
+ v2 = bpf_prog_info_read_offset_u32(&info_linear->info,
10606
+ desc->size_offset);
10607
+ if (v1 != v2)
10608
+ pr_warn("%s: mismatch in rec size\n", __func__);
10609
+ }
10610
+
10611
+ /* step 7: update info_len and data_len */
10612
+ info_linear->info_len = sizeof(struct bpf_prog_info);
10613
+ info_linear->data_len = data_len;
10614
+
10615
+ return info_linear;
10616
+}
10617
+
10618
+void bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear)
10619
+{
10620
+ int i;
10621
+
10622
+ for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
10623
+ struct bpf_prog_info_array_desc *desc;
10624
+ __u64 addr, offs;
10625
+
10626
+ if ((info_linear->arrays & (1UL << i)) == 0)
10627
+ continue;
10628
+
10629
+ desc = bpf_prog_info_array_desc + i;
10630
+ addr = bpf_prog_info_read_offset_u64(&info_linear->info,
10631
+ desc->array_offset);
10632
+ offs = addr - ptr_to_u64(info_linear->data);
10633
+ bpf_prog_info_set_offset_u64(&info_linear->info,
10634
+ desc->array_offset, offs);
10635
+ }
10636
+}
10637
+
10638
+void bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear)
10639
+{
10640
+ int i;
10641
+
10642
+ for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
10643
+ struct bpf_prog_info_array_desc *desc;
10644
+ __u64 addr, offs;
10645
+
10646
+ if ((info_linear->arrays & (1UL << i)) == 0)
10647
+ continue;
10648
+
10649
+ desc = bpf_prog_info_array_desc + i;
10650
+ offs = bpf_prog_info_read_offset_u64(&info_linear->info,
10651
+ desc->array_offset);
10652
+ addr = offs + ptr_to_u64(info_linear->data);
10653
+ bpf_prog_info_set_offset_u64(&info_linear->info,
10654
+ desc->array_offset, addr);
10655
+ }
10656
+}
10657
+
10658
+int bpf_program__set_attach_target(struct bpf_program *prog,
10659
+ int attach_prog_fd,
10660
+ const char *attach_func_name)
10661
+{
10662
+ int btf_id;
10663
+
10664
+ if (!prog || attach_prog_fd < 0 || !attach_func_name)
10665
+ return -EINVAL;
10666
+
10667
+ if (attach_prog_fd)
10668
+ btf_id = libbpf_find_prog_btf_id(attach_func_name,
10669
+ attach_prog_fd);
10670
+ else
10671
+ btf_id = libbpf_find_vmlinux_btf_id(attach_func_name,
10672
+ prog->expected_attach_type);
10673
+
10674
+ if (btf_id < 0)
10675
+ return btf_id;
10676
+
10677
+ prog->attach_btf_id = btf_id;
10678
+ prog->attach_prog_fd = attach_prog_fd;
10679
+ return 0;
10680
+}
10681
+
10682
+int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz)
10683
+{
10684
+ int err = 0, n, len, start, end = -1;
10685
+ bool *tmp;
10686
+
10687
+ *mask = NULL;
10688
+ *mask_sz = 0;
10689
+
10690
+ /* Each sub string separated by ',' has format \d+-\d+ or \d+ */
10691
+ while (*s) {
10692
+ if (*s == ',' || *s == '\n') {
10693
+ s++;
10694
+ continue;
10695
+ }
10696
+ n = sscanf(s, "%d%n-%d%n", &start, &len, &end, &len);
10697
+ if (n <= 0 || n > 2) {
10698
+ pr_warn("Failed to get CPU range %s: %d\n", s, n);
10699
+ err = -EINVAL;
10700
+ goto cleanup;
10701
+ } else if (n == 1) {
10702
+ end = start;
10703
+ }
10704
+ if (start < 0 || start > end) {
10705
+ pr_warn("Invalid CPU range [%d,%d] in %s\n",
10706
+ start, end, s);
10707
+ err = -EINVAL;
10708
+ goto cleanup;
10709
+ }
10710
+ tmp = realloc(*mask, end + 1);
10711
+ if (!tmp) {
10712
+ err = -ENOMEM;
10713
+ goto cleanup;
10714
+ }
10715
+ *mask = tmp;
10716
+ memset(tmp + *mask_sz, 0, start - *mask_sz);
10717
+ memset(tmp + start, 1, end - start + 1);
10718
+ *mask_sz = end + 1;
10719
+ s += len;
10720
+ }
10721
+ if (!*mask_sz) {
10722
+ pr_warn("Empty CPU range\n");
10723
+ return -EINVAL;
10724
+ }
10725
+ return 0;
10726
+cleanup:
10727
+ free(*mask);
10728
+ *mask = NULL;
10729
+ return err;
10730
+}
10731
+
10732
+int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz)
10733
+{
10734
+ int fd, err = 0, len;
10735
+ char buf[128];
10736
+
10737
+ fd = open(fcpu, O_RDONLY);
10738
+ if (fd < 0) {
10739
+ err = -errno;
10740
+ pr_warn("Failed to open cpu mask file %s: %d\n", fcpu, err);
10741
+ return err;
10742
+ }
10743
+ len = read(fd, buf, sizeof(buf));
10744
+ close(fd);
10745
+ if (len <= 0) {
10746
+ err = len ? -errno : -EINVAL;
10747
+ pr_warn("Failed to read cpu mask from %s: %d\n", fcpu, err);
10748
+ return err;
10749
+ }
10750
+ if (len >= sizeof(buf)) {
10751
+ pr_warn("CPU mask is too big in file %s\n", fcpu);
10752
+ return -E2BIG;
10753
+ }
10754
+ buf[len] = '\0';
10755
+
10756
+ return parse_cpu_mask_str(buf, mask, mask_sz);
10757
+}
10758
+
10759
+int libbpf_num_possible_cpus(void)
10760
+{
10761
+ static const char *fcpu = "/sys/devices/system/cpu/possible";
10762
+ static int cpus;
10763
+ int err, n, i, tmp_cpus;
10764
+ bool *mask;
10765
+
10766
+ tmp_cpus = READ_ONCE(cpus);
10767
+ if (tmp_cpus > 0)
10768
+ return tmp_cpus;
10769
+
10770
+ err = parse_cpu_mask_file(fcpu, &mask, &n);
10771
+ if (err)
10772
+ return err;
10773
+
10774
+ tmp_cpus = 0;
10775
+ for (i = 0; i < n; i++) {
10776
+ if (mask[i])
10777
+ tmp_cpus++;
10778
+ }
10779
+ free(mask);
10780
+
10781
+ WRITE_ONCE(cpus, tmp_cpus);
10782
+ return tmp_cpus;
10783
+}
10784
+
10785
+int bpf_object__open_skeleton(struct bpf_object_skeleton *s,
10786
+ const struct bpf_object_open_opts *opts)
10787
+{
10788
+ DECLARE_LIBBPF_OPTS(bpf_object_open_opts, skel_opts,
10789
+ .object_name = s->name,
10790
+ );
10791
+ struct bpf_object *obj;
10792
+ int i;
10793
+
10794
+ /* Attempt to preserve opts->object_name, unless overriden by user
10795
+ * explicitly. Overwriting object name for skeletons is discouraged,
10796
+ * as it breaks global data maps, because they contain object name
10797
+ * prefix as their own map name prefix. When skeleton is generated,
10798
+ * bpftool is making an assumption that this name will stay the same.
10799
+ */
10800
+ if (opts) {
10801
+ memcpy(&skel_opts, opts, sizeof(*opts));
10802
+ if (!opts->object_name)
10803
+ skel_opts.object_name = s->name;
10804
+ }
10805
+
10806
+ obj = bpf_object__open_mem(s->data, s->data_sz, &skel_opts);
10807
+ if (IS_ERR(obj)) {
10808
+ pr_warn("failed to initialize skeleton BPF object '%s': %ld\n",
10809
+ s->name, PTR_ERR(obj));
10810
+ return PTR_ERR(obj);
10811
+ }
10812
+
10813
+ *s->obj = obj;
10814
+
10815
+ for (i = 0; i < s->map_cnt; i++) {
10816
+ struct bpf_map **map = s->maps[i].map;
10817
+ const char *name = s->maps[i].name;
10818
+ void **mmaped = s->maps[i].mmaped;
10819
+
10820
+ *map = bpf_object__find_map_by_name(obj, name);
10821
+ if (!*map) {
10822
+ pr_warn("failed to find skeleton map '%s'\n", name);
10823
+ return -ESRCH;
10824
+ }
10825
+
10826
+ /* externs shouldn't be pre-setup from user code */
10827
+ if (mmaped && (*map)->libbpf_type != LIBBPF_MAP_KCONFIG)
10828
+ *mmaped = (*map)->mmaped;
10829
+ }
10830
+
10831
+ for (i = 0; i < s->prog_cnt; i++) {
10832
+ struct bpf_program **prog = s->progs[i].prog;
10833
+ const char *name = s->progs[i].name;
10834
+
10835
+ *prog = bpf_object__find_program_by_name(obj, name);
10836
+ if (!*prog) {
10837
+ pr_warn("failed to find skeleton program '%s'\n", name);
10838
+ return -ESRCH;
10839
+ }
10840
+ }
10841
+
10842
+ return 0;
10843
+}
10844
+
10845
+int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
10846
+{
10847
+ int i, err;
10848
+
10849
+ err = bpf_object__load(*s->obj);
10850
+ if (err) {
10851
+ pr_warn("failed to load BPF skeleton '%s': %d\n", s->name, err);
10852
+ return err;
10853
+ }
10854
+
10855
+ for (i = 0; i < s->map_cnt; i++) {
10856
+ struct bpf_map *map = *s->maps[i].map;
10857
+ size_t mmap_sz = bpf_map_mmap_sz(map);
10858
+ int prot, map_fd = bpf_map__fd(map);
10859
+ void **mmaped = s->maps[i].mmaped;
10860
+
10861
+ if (!mmaped)
10862
+ continue;
10863
+
10864
+ if (!(map->def.map_flags & BPF_F_MMAPABLE)) {
10865
+ *mmaped = NULL;
10866
+ continue;
10867
+ }
10868
+
10869
+ if (map->def.map_flags & BPF_F_RDONLY_PROG)
10870
+ prot = PROT_READ;
10871
+ else
10872
+ prot = PROT_READ | PROT_WRITE;
10873
+
10874
+ /* Remap anonymous mmap()-ed "map initialization image" as
10875
+ * a BPF map-backed mmap()-ed memory, but preserving the same
10876
+ * memory address. This will cause kernel to change process'
10877
+ * page table to point to a different piece of kernel memory,
10878
+ * but from userspace point of view memory address (and its
10879
+ * contents, being identical at this point) will stay the
10880
+ * same. This mapping will be released by bpf_object__close()
10881
+ * as per normal clean up procedure, so we don't need to worry
10882
+ * about it from skeleton's clean up perspective.
10883
+ */
10884
+ *mmaped = mmap(map->mmaped, mmap_sz, prot,
10885
+ MAP_SHARED | MAP_FIXED, map_fd, 0);
10886
+ if (*mmaped == MAP_FAILED) {
10887
+ err = -errno;
10888
+ *mmaped = NULL;
10889
+ pr_warn("failed to re-mmap() map '%s': %d\n",
10890
+ bpf_map__name(map), err);
10891
+ return err;
10892
+ }
10893
+ }
10894
+
10895
+ return 0;
10896
+}
10897
+
10898
+int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
10899
+{
10900
+ int i;
10901
+
10902
+ for (i = 0; i < s->prog_cnt; i++) {
10903
+ struct bpf_program *prog = *s->progs[i].prog;
10904
+ struct bpf_link **link = s->progs[i].link;
10905
+ const struct bpf_sec_def *sec_def;
10906
+
10907
+ if (!prog->load)
10908
+ continue;
10909
+
10910
+ sec_def = find_sec_def(prog->sec_name);
10911
+ if (!sec_def || !sec_def->attach_fn)
10912
+ continue;
10913
+
10914
+ *link = sec_def->attach_fn(sec_def, prog);
10915
+ if (IS_ERR(*link)) {
10916
+ pr_warn("failed to auto-attach program '%s': %ld\n",
10917
+ bpf_program__name(prog), PTR_ERR(*link));
10918
+ return PTR_ERR(*link);
10919
+ }
10920
+ }
10921
+
10922
+ return 0;
10923
+}
10924
+
10925
+void bpf_object__detach_skeleton(struct bpf_object_skeleton *s)
10926
+{
10927
+ int i;
10928
+
10929
+ for (i = 0; i < s->prog_cnt; i++) {
10930
+ struct bpf_link **link = s->progs[i].link;
10931
+
10932
+ bpf_link__destroy(*link);
10933
+ *link = NULL;
10934
+ }
10935
+}
10936
+
10937
+void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s)
10938
+{
10939
+ if (!s)
10940
+ return;
10941
+
10942
+ if (s->progs)
10943
+ bpf_object__detach_skeleton(s);
10944
+ if (s->obj)
10945
+ bpf_object__close(*s->obj);
10946
+ free(s->maps);
10947
+ free(s->progs);
10948
+ free(s);
10949
+}