hc
2024-05-10 23fa18eaa71266feff7ba8d83022d9e1cc83c65a
kernel/tools/lib/bpf/libbpf.c
....@@ -1,4 +1,4 @@
1
-// SPDX-License-Identifier: LGPL-2.1
1
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
22
33 /*
44 * Common eBPF ELF object loading operations.
....@@ -7,19 +7,7 @@
77 * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
88 * Copyright (C) 2015 Huawei Inc.
99 * Copyright (C) 2017 Nicira, Inc.
10
- *
11
- * This program is free software; you can redistribute it and/or
12
- * modify it under the terms of the GNU Lesser General Public
13
- * License as published by the Free Software Foundation;
14
- * version 2.1 of the License (not later!)
15
- *
16
- * This program is distributed in the hope that it will be useful,
17
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
18
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19
- * GNU Lesser General Public License for more details.
20
- *
21
- * You should have received a copy of the GNU Lesser General Public
22
- * License along with this program; if not, see <http://www.gnu.org/licenses>
10
+ * Copyright (C) 2019 Isovalent, Inc.
2311 */
2412
2513 #ifndef _GNU_SOURCE
....@@ -30,29 +18,42 @@
3018 #include <stdarg.h>
3119 #include <libgen.h>
3220 #include <inttypes.h>
21
+#include <limits.h>
3322 #include <string.h>
3423 #include <unistd.h>
24
+#include <endian.h>
3525 #include <fcntl.h>
3626 #include <errno.h>
37
-#include <perf-sys.h>
27
+#include <ctype.h>
3828 #include <asm/unistd.h>
3929 #include <linux/err.h>
4030 #include <linux/kernel.h>
4131 #include <linux/bpf.h>
4232 #include <linux/btf.h>
33
+#include <linux/filter.h>
4334 #include <linux/list.h>
4435 #include <linux/limits.h>
36
+#include <linux/perf_event.h>
37
+#include <linux/ring_buffer.h>
38
+#include <linux/version.h>
39
+#include <sys/epoll.h>
40
+#include <sys/ioctl.h>
41
+#include <sys/mman.h>
4542 #include <sys/stat.h>
4643 #include <sys/types.h>
4744 #include <sys/vfs.h>
48
-#include <tools/libc_compat.h>
45
+#include <sys/utsname.h>
46
+#include <sys/resource.h>
4947 #include <libelf.h>
5048 #include <gelf.h>
49
+#include <zlib.h>
5150
5251 #include "libbpf.h"
5352 #include "bpf.h"
5453 #include "btf.h"
5554 #include "str_error.h"
55
+#include "libbpf_internal.h"
56
+#include "hashmap.h"
5657
5758 #ifndef EM_BPF
5859 #define EM_BPF 247
....@@ -62,51 +63,78 @@
6263 #define BPF_FS_MAGIC 0xcafe4a11
6364 #endif
6465
66
+#define BPF_INSN_SZ (sizeof(struct bpf_insn))
67
+
68
+/* vsprintf() in __base_pr() uses nonliteral format string. It may break
69
+ * compilation if user enables corresponding warning. Disable it explicitly.
70
+ */
71
+#pragma GCC diagnostic ignored "-Wformat-nonliteral"
72
+
6573 #define __printf(a, b) __attribute__((format(printf, a, b)))
6674
67
-__printf(1, 2)
68
-static int __base_pr(const char *format, ...)
69
-{
70
- va_list args;
71
- int err;
75
+static struct bpf_map *bpf_object__add_map(struct bpf_object *obj);
76
+static const struct btf_type *
77
+skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id);
7278
73
- va_start(args, format);
74
- err = vfprintf(stderr, format, args);
75
- va_end(args);
76
- return err;
79
+static int __base_pr(enum libbpf_print_level level, const char *format,
80
+ va_list args)
81
+{
82
+ if (level == LIBBPF_DEBUG)
83
+ return 0;
84
+
85
+ return vfprintf(stderr, format, args);
7786 }
7887
79
-static __printf(1, 2) libbpf_print_fn_t __pr_warning = __base_pr;
80
-static __printf(1, 2) libbpf_print_fn_t __pr_info = __base_pr;
81
-static __printf(1, 2) libbpf_print_fn_t __pr_debug;
88
+static libbpf_print_fn_t __libbpf_pr = __base_pr;
8289
83
-#define __pr(func, fmt, ...) \
84
-do { \
85
- if ((func)) \
86
- (func)("libbpf: " fmt, ##__VA_ARGS__); \
87
-} while (0)
88
-
89
-#define pr_warning(fmt, ...) __pr(__pr_warning, fmt, ##__VA_ARGS__)
90
-#define pr_info(fmt, ...) __pr(__pr_info, fmt, ##__VA_ARGS__)
91
-#define pr_debug(fmt, ...) __pr(__pr_debug, fmt, ##__VA_ARGS__)
92
-
93
-void libbpf_set_print(libbpf_print_fn_t warn,
94
- libbpf_print_fn_t info,
95
- libbpf_print_fn_t debug)
90
+libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn)
9691 {
97
- __pr_warning = warn;
98
- __pr_info = info;
99
- __pr_debug = debug;
92
+ libbpf_print_fn_t old_print_fn = __libbpf_pr;
93
+
94
+ __libbpf_pr = fn;
95
+ return old_print_fn;
96
+}
97
+
98
+__printf(2, 3)
99
+void libbpf_print(enum libbpf_print_level level, const char *format, ...)
100
+{
101
+ va_list args;
102
+
103
+ if (!__libbpf_pr)
104
+ return;
105
+
106
+ va_start(args, format);
107
+ __libbpf_pr(level, format, args);
108
+ va_end(args);
109
+}
110
+
111
+static void pr_perm_msg(int err)
112
+{
113
+ struct rlimit limit;
114
+ char buf[100];
115
+
116
+ if (err != -EPERM || geteuid() != 0)
117
+ return;
118
+
119
+ err = getrlimit(RLIMIT_MEMLOCK, &limit);
120
+ if (err)
121
+ return;
122
+
123
+ if (limit.rlim_cur == RLIM_INFINITY)
124
+ return;
125
+
126
+ if (limit.rlim_cur < 1024)
127
+ snprintf(buf, sizeof(buf), "%zu bytes", (size_t)limit.rlim_cur);
128
+ else if (limit.rlim_cur < 1024*1024)
129
+ snprintf(buf, sizeof(buf), "%.1f KiB", (double)limit.rlim_cur / 1024);
130
+ else
131
+ snprintf(buf, sizeof(buf), "%.1f MiB", (double)limit.rlim_cur / (1024*1024));
132
+
133
+ pr_warn("permission error while running as root; try raising 'ulimit -l'? current value: %s\n",
134
+ buf);
100135 }
101136
102137 #define STRERR_BUFSIZE 128
103
-
104
-#define CHECK_ERR(action, err, out) do { \
105
- err = action; \
106
- if (err) \
107
- goto out; \
108
-} while(0)
109
-
110138
111139 /* Copied from tools/perf/util/util.h */
112140 #ifndef zfree
....@@ -122,38 +150,117 @@
122150 ___err; })
123151 #endif
124152
125
-#ifdef HAVE_LIBELF_MMAP_SUPPORT
126
-# define LIBBPF_ELF_C_READ_MMAP ELF_C_READ_MMAP
127
-#else
128
-# define LIBBPF_ELF_C_READ_MMAP ELF_C_READ
129
-#endif
153
+static inline __u64 ptr_to_u64(const void *ptr)
154
+{
155
+ return (__u64) (unsigned long) ptr;
156
+}
157
+
158
+enum kern_feature_id {
159
+ /* v4.14: kernel support for program & map names. */
160
+ FEAT_PROG_NAME,
161
+ /* v5.2: kernel support for global data sections. */
162
+ FEAT_GLOBAL_DATA,
163
+ /* BTF support */
164
+ FEAT_BTF,
165
+ /* BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO support */
166
+ FEAT_BTF_FUNC,
167
+ /* BTF_KIND_VAR and BTF_KIND_DATASEC support */
168
+ FEAT_BTF_DATASEC,
169
+ /* BTF_FUNC_GLOBAL is supported */
170
+ FEAT_BTF_GLOBAL_FUNC,
171
+ /* BPF_F_MMAPABLE is supported for arrays */
172
+ FEAT_ARRAY_MMAP,
173
+ /* kernel support for expected_attach_type in BPF_PROG_LOAD */
174
+ FEAT_EXP_ATTACH_TYPE,
175
+ /* bpf_probe_read_{kernel,user}[_str] helpers */
176
+ FEAT_PROBE_READ_KERN,
177
+ /* BPF_PROG_BIND_MAP is supported */
178
+ FEAT_PROG_BIND_MAP,
179
+ __FEAT_CNT,
180
+};
181
+
182
+static bool kernel_supports(enum kern_feature_id feat_id);
183
+
184
+enum reloc_type {
185
+ RELO_LD64,
186
+ RELO_CALL,
187
+ RELO_DATA,
188
+ RELO_EXTERN,
189
+};
190
+
191
+struct reloc_desc {
192
+ enum reloc_type type;
193
+ int insn_idx;
194
+ int map_idx;
195
+ int sym_off;
196
+ bool processed;
197
+};
198
+
199
+struct bpf_sec_def;
200
+
201
+typedef struct bpf_link *(*attach_fn_t)(const struct bpf_sec_def *sec,
202
+ struct bpf_program *prog);
203
+
204
+struct bpf_sec_def {
205
+ const char *sec;
206
+ size_t len;
207
+ enum bpf_prog_type prog_type;
208
+ enum bpf_attach_type expected_attach_type;
209
+ bool is_exp_attach_type_optional;
210
+ bool is_attachable;
211
+ bool is_attach_btf;
212
+ bool is_sleepable;
213
+ attach_fn_t attach_fn;
214
+};
130215
131216 /*
132217 * bpf_prog should be a better name but it has been used in
133218 * linux/filter.h.
134219 */
135220 struct bpf_program {
136
- /* Index in elf obj file, for relocation use. */
137
- int idx;
138
- char *name;
139
- int prog_ifindex;
140
- char *section_name;
141
- struct bpf_insn *insns;
142
- size_t insns_cnt, main_prog_cnt;
143
- enum bpf_prog_type type;
221
+ const struct bpf_sec_def *sec_def;
222
+ char *sec_name;
223
+ size_t sec_idx;
224
+ /* this program's instruction offset (in number of instructions)
225
+ * within its containing ELF section
226
+ */
227
+ size_t sec_insn_off;
228
+ /* number of original instructions in ELF section belonging to this
229
+ * program, not taking into account subprogram instructions possible
230
+ * appended later during relocation
231
+ */
232
+ size_t sec_insn_cnt;
233
+ /* Offset (in number of instructions) of the start of instruction
234
+ * belonging to this BPF program within its containing main BPF
235
+ * program. For the entry-point (main) BPF program, this is always
236
+ * zero. For a sub-program, this gets reset before each of main BPF
237
+ * programs are processed and relocated and is used to determined
238
+ * whether sub-program was already appended to the main program, and
239
+ * if yes, at which instruction offset.
240
+ */
241
+ size_t sub_insn_off;
144242
145
- struct reloc_desc {
146
- enum {
147
- RELO_LD64,
148
- RELO_CALL,
149
- } type;
150
- int insn_idx;
151
- union {
152
- int map_idx;
153
- int text_off;
154
- };
155
- } *reloc_desc;
243
+ char *name;
244
+ /* sec_name with / replaced by _; makes recursive pinning
245
+ * in bpf_object__pin_programs easier
246
+ */
247
+ char *pin_name;
248
+
249
+ /* instructions that belong to BPF program; insns[0] is located at
250
+ * sec_insn_off instruction within its ELF section in ELF file, so
251
+ * when mapping ELF file instruction index to the local instruction,
252
+ * one needs to subtract sec_insn_off; and vice versa.
253
+ */
254
+ struct bpf_insn *insns;
255
+ /* actual number of instruction in this BPF program's image; for
256
+ * entry-point BPF programs this includes the size of main program
257
+ * itself plus all the used sub-programs, appended at the end
258
+ */
259
+ size_t insns_cnt;
260
+
261
+ struct reloc_desc *reloc_desc;
156262 int nr_reloc;
263
+ int log_level;
157264
158265 struct {
159266 int nr;
....@@ -165,34 +272,155 @@
165272 void *priv;
166273 bpf_program_clear_priv_t clear_priv;
167274
275
+ bool load;
276
+ enum bpf_prog_type type;
168277 enum bpf_attach_type expected_attach_type;
278
+ int prog_ifindex;
279
+ __u32 attach_btf_id;
280
+ __u32 attach_prog_fd;
281
+ void *func_info;
282
+ __u32 func_info_rec_size;
283
+ __u32 func_info_cnt;
284
+
285
+ void *line_info;
286
+ __u32 line_info_rec_size;
287
+ __u32 line_info_cnt;
288
+ __u32 prog_flags;
289
+};
290
+
291
+struct bpf_struct_ops {
292
+ const char *tname;
293
+ const struct btf_type *type;
294
+ struct bpf_program **progs;
295
+ __u32 *kern_func_off;
296
+ /* e.g. struct tcp_congestion_ops in bpf_prog's btf format */
297
+ void *data;
298
+ /* e.g. struct bpf_struct_ops_tcp_congestion_ops in
299
+ * btf_vmlinux's format.
300
+ * struct bpf_struct_ops_tcp_congestion_ops {
301
+ * [... some other kernel fields ...]
302
+ * struct tcp_congestion_ops data;
303
+ * }
304
+ * kern_vdata-size == sizeof(struct bpf_struct_ops_tcp_congestion_ops)
305
+ * bpf_map__init_kern_struct_ops() will populate the "kern_vdata"
306
+ * from "data".
307
+ */
308
+ void *kern_vdata;
309
+ __u32 type_id;
310
+};
311
+
312
+#define DATA_SEC ".data"
313
+#define BSS_SEC ".bss"
314
+#define RODATA_SEC ".rodata"
315
+#define KCONFIG_SEC ".kconfig"
316
+#define KSYMS_SEC ".ksyms"
317
+#define STRUCT_OPS_SEC ".struct_ops"
318
+
319
+enum libbpf_map_type {
320
+ LIBBPF_MAP_UNSPEC,
321
+ LIBBPF_MAP_DATA,
322
+ LIBBPF_MAP_BSS,
323
+ LIBBPF_MAP_RODATA,
324
+ LIBBPF_MAP_KCONFIG,
325
+};
326
+
327
+static const char * const libbpf_type_to_btf_name[] = {
328
+ [LIBBPF_MAP_DATA] = DATA_SEC,
329
+ [LIBBPF_MAP_BSS] = BSS_SEC,
330
+ [LIBBPF_MAP_RODATA] = RODATA_SEC,
331
+ [LIBBPF_MAP_KCONFIG] = KCONFIG_SEC,
169332 };
170333
171334 struct bpf_map {
172
- int fd;
173335 char *name;
174
- size_t offset;
336
+ int fd;
337
+ int sec_idx;
338
+ size_t sec_offset;
175339 int map_ifindex;
340
+ int inner_map_fd;
176341 struct bpf_map_def def;
342
+ __u32 numa_node;
343
+ __u32 btf_var_idx;
177344 __u32 btf_key_type_id;
178345 __u32 btf_value_type_id;
346
+ __u32 btf_vmlinux_value_type_id;
179347 void *priv;
180348 bpf_map_clear_priv_t clear_priv;
349
+ enum libbpf_map_type libbpf_type;
350
+ void *mmaped;
351
+ struct bpf_struct_ops *st_ops;
352
+ struct bpf_map *inner_map;
353
+ void **init_slots;
354
+ int init_slots_sz;
355
+ char *pin_path;
356
+ bool pinned;
357
+ bool reused;
358
+};
359
+
360
+enum extern_type {
361
+ EXT_UNKNOWN,
362
+ EXT_KCFG,
363
+ EXT_KSYM,
364
+};
365
+
366
+enum kcfg_type {
367
+ KCFG_UNKNOWN,
368
+ KCFG_CHAR,
369
+ KCFG_BOOL,
370
+ KCFG_INT,
371
+ KCFG_TRISTATE,
372
+ KCFG_CHAR_ARR,
373
+};
374
+
375
+struct extern_desc {
376
+ enum extern_type type;
377
+ int sym_idx;
378
+ int btf_id;
379
+ int sec_btf_id;
380
+ const char *name;
381
+ bool is_set;
382
+ bool is_weak;
383
+ union {
384
+ struct {
385
+ enum kcfg_type type;
386
+ int sz;
387
+ int align;
388
+ int data_off;
389
+ bool is_signed;
390
+ } kcfg;
391
+ struct {
392
+ unsigned long long addr;
393
+
394
+ /* target btf_id of the corresponding kernel var. */
395
+ int vmlinux_btf_id;
396
+
397
+ /* local btf_id of the ksym extern's type. */
398
+ __u32 type_id;
399
+ } ksym;
400
+ };
181401 };
182402
183403 static LIST_HEAD(bpf_objects_list);
184404
185405 struct bpf_object {
406
+ char name[BPF_OBJ_NAME_LEN];
186407 char license[64];
187
- u32 kern_version;
408
+ __u32 kern_version;
188409
189410 struct bpf_program *programs;
190411 size_t nr_programs;
191412 struct bpf_map *maps;
192413 size_t nr_maps;
414
+ size_t maps_cap;
415
+
416
+ char *kconfig;
417
+ struct extern_desc *externs;
418
+ int nr_extern;
419
+ int kconfig_map_idx;
420
+ int rodata_map_idx;
193421
194422 bool loaded;
195
- bool has_pseudo_calls;
423
+ bool has_subcalls;
196424
197425 /*
198426 * Information when doing elf related work. Only valid if fd
....@@ -200,19 +428,31 @@
200428 */
201429 struct {
202430 int fd;
203
- void *obj_buf;
431
+ const void *obj_buf;
204432 size_t obj_buf_sz;
205433 Elf *elf;
206434 GElf_Ehdr ehdr;
207435 Elf_Data *symbols;
436
+ Elf_Data *data;
437
+ Elf_Data *rodata;
438
+ Elf_Data *bss;
439
+ Elf_Data *st_ops_data;
440
+ size_t shstrndx; /* section index for section name strings */
208441 size_t strtabidx;
209442 struct {
210443 GElf_Shdr shdr;
211444 Elf_Data *data;
212
- } *reloc;
213
- int nr_reloc;
445
+ } *reloc_sects;
446
+ int nr_reloc_sects;
214447 int maps_shndx;
448
+ int btf_maps_shndx;
449
+ __u32 btf_maps_sec_btf_id;
215450 int text_shndx;
451
+ int symbols_shndx;
452
+ int data_shndx;
453
+ int rodata_shndx;
454
+ int bss_shndx;
455
+ int st_ops_shndx;
216456 } efile;
217457 /*
218458 * All loaded bpf_object is linked in a list, which is
....@@ -222,6 +462,11 @@
222462 struct list_head list;
223463
224464 struct btf *btf;
465
+ /* Parse and load BTF vmlinux if any of the programs in the object need
466
+ * it at load time.
467
+ */
468
+ struct btf *btf_vmlinux;
469
+ struct btf_ext *btf_ext;
225470
226471 void *priv;
227472 bpf_object_clear_priv_t clear_priv;
....@@ -230,7 +475,17 @@
230475 };
231476 #define obj_elf_valid(o) ((o)->efile.elf)
232477
233
-static void bpf_program__unload(struct bpf_program *prog)
478
+static const char *elf_sym_str(const struct bpf_object *obj, size_t off);
479
+static const char *elf_sec_str(const struct bpf_object *obj, size_t off);
480
+static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx);
481
+static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name);
482
+static int elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn, GElf_Shdr *hdr);
483
+static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn);
484
+static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn);
485
+static int elf_sym_by_sec_off(const struct bpf_object *obj, size_t sec_idx,
486
+ size_t off, __u32 sym_type, GElf_Sym *sym);
487
+
488
+void bpf_program__unload(struct bpf_program *prog)
234489 {
235490 int i;
236491
....@@ -245,12 +500,15 @@
245500 for (i = 0; i < prog->instances.nr; i++)
246501 zclose(prog->instances.fds[i]);
247502 } else if (prog->instances.nr != -1) {
248
- pr_warning("Internal error: instances.nr is %d\n",
249
- prog->instances.nr);
503
+ pr_warn("Internal error: instances.nr is %d\n",
504
+ prog->instances.nr);
250505 }
251506
252507 prog->instances.nr = -1;
253508 zfree(&prog->instances.fds);
509
+
510
+ zfree(&prog->func_info);
511
+ zfree(&prog->line_info);
254512 }
255513
256514 static void bpf_program__exit(struct bpf_program *prog)
....@@ -266,158 +524,539 @@
266524
267525 bpf_program__unload(prog);
268526 zfree(&prog->name);
269
- zfree(&prog->section_name);
527
+ zfree(&prog->sec_name);
528
+ zfree(&prog->pin_name);
270529 zfree(&prog->insns);
271530 zfree(&prog->reloc_desc);
272531
273532 prog->nr_reloc = 0;
274533 prog->insns_cnt = 0;
275
- prog->idx = -1;
534
+ prog->sec_idx = -1;
535
+}
536
+
537
+static char *__bpf_program__pin_name(struct bpf_program *prog)
538
+{
539
+ char *name, *p;
540
+
541
+ name = p = strdup(prog->sec_name);
542
+ while ((p = strchr(p, '/')))
543
+ *p = '_';
544
+
545
+ return name;
546
+}
547
+
548
+static bool insn_is_subprog_call(const struct bpf_insn *insn)
549
+{
550
+ return BPF_CLASS(insn->code) == BPF_JMP &&
551
+ BPF_OP(insn->code) == BPF_CALL &&
552
+ BPF_SRC(insn->code) == BPF_K &&
553
+ insn->src_reg == BPF_PSEUDO_CALL &&
554
+ insn->dst_reg == 0 &&
555
+ insn->off == 0;
276556 }
277557
278558 static int
279
-bpf_program__init(void *data, size_t size, char *section_name, int idx,
280
- struct bpf_program *prog)
559
+bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
560
+ const char *name, size_t sec_idx, const char *sec_name,
561
+ size_t sec_off, void *insn_data, size_t insn_data_sz)
281562 {
282
- if (size < sizeof(struct bpf_insn)) {
283
- pr_warning("corrupted section '%s'\n", section_name);
563
+ if (insn_data_sz == 0 || insn_data_sz % BPF_INSN_SZ || sec_off % BPF_INSN_SZ) {
564
+ pr_warn("sec '%s': corrupted program '%s', offset %zu, size %zu\n",
565
+ sec_name, name, sec_off, insn_data_sz);
284566 return -EINVAL;
285567 }
286568
287
- bzero(prog, sizeof(*prog));
569
+ memset(prog, 0, sizeof(*prog));
570
+ prog->obj = obj;
288571
289
- prog->section_name = strdup(section_name);
290
- if (!prog->section_name) {
291
- pr_warning("failed to alloc name for prog under section(%d) %s\n",
292
- idx, section_name);
293
- goto errout;
294
- }
572
+ prog->sec_idx = sec_idx;
573
+ prog->sec_insn_off = sec_off / BPF_INSN_SZ;
574
+ prog->sec_insn_cnt = insn_data_sz / BPF_INSN_SZ;
575
+ /* insns_cnt can later be increased by appending used subprograms */
576
+ prog->insns_cnt = prog->sec_insn_cnt;
295577
296
- prog->insns = malloc(size);
297
- if (!prog->insns) {
298
- pr_warning("failed to alloc insns for prog under section %s\n",
299
- section_name);
300
- goto errout;
301
- }
302
- prog->insns_cnt = size / sizeof(struct bpf_insn);
303
- memcpy(prog->insns, data,
304
- prog->insns_cnt * sizeof(struct bpf_insn));
305
- prog->idx = idx;
578
+ prog->type = BPF_PROG_TYPE_UNSPEC;
579
+ prog->load = true;
580
+
306581 prog->instances.fds = NULL;
307582 prog->instances.nr = -1;
308
- prog->type = BPF_PROG_TYPE_KPROBE;
583
+
584
+ prog->sec_name = strdup(sec_name);
585
+ if (!prog->sec_name)
586
+ goto errout;
587
+
588
+ prog->name = strdup(name);
589
+ if (!prog->name)
590
+ goto errout;
591
+
592
+ prog->pin_name = __bpf_program__pin_name(prog);
593
+ if (!prog->pin_name)
594
+ goto errout;
595
+
596
+ prog->insns = malloc(insn_data_sz);
597
+ if (!prog->insns)
598
+ goto errout;
599
+ memcpy(prog->insns, insn_data, insn_data_sz);
309600
310601 return 0;
311602 errout:
603
+ pr_warn("sec '%s': failed to allocate memory for prog '%s'\n", sec_name, name);
312604 bpf_program__exit(prog);
313605 return -ENOMEM;
314606 }
315607
316608 static int
317
-bpf_object__add_program(struct bpf_object *obj, void *data, size_t size,
318
- char *section_name, int idx)
609
+bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
610
+ const char *sec_name, int sec_idx)
319611 {
320
- struct bpf_program prog, *progs;
612
+ struct bpf_program *prog, *progs;
613
+ void *data = sec_data->d_buf;
614
+ size_t sec_sz = sec_data->d_size, sec_off, prog_sz;
321615 int nr_progs, err;
322
-
323
- err = bpf_program__init(data, size, section_name, idx, &prog);
324
- if (err)
325
- return err;
616
+ const char *name;
617
+ GElf_Sym sym;
326618
327619 progs = obj->programs;
328620 nr_progs = obj->nr_programs;
621
+ sec_off = 0;
329622
330
- progs = reallocarray(progs, nr_progs + 1, sizeof(progs[0]));
331
- if (!progs) {
332
- /*
333
- * In this case the original obj->programs
334
- * is still valid, so don't need special treat for
335
- * bpf_close_object().
336
- */
337
- pr_warning("failed to alloc a new program under section '%s'\n",
338
- section_name);
339
- bpf_program__exit(&prog);
340
- return -ENOMEM;
623
+ while (sec_off < sec_sz) {
624
+ if (elf_sym_by_sec_off(obj, sec_idx, sec_off, STT_FUNC, &sym)) {
625
+ pr_warn("sec '%s': failed to find program symbol at offset %zu\n",
626
+ sec_name, sec_off);
627
+ return -LIBBPF_ERRNO__FORMAT;
628
+ }
629
+
630
+ prog_sz = sym.st_size;
631
+
632
+ name = elf_sym_str(obj, sym.st_name);
633
+ if (!name) {
634
+ pr_warn("sec '%s': failed to get symbol name for offset %zu\n",
635
+ sec_name, sec_off);
636
+ return -LIBBPF_ERRNO__FORMAT;
637
+ }
638
+
639
+ if (sec_off + prog_sz > sec_sz) {
640
+ pr_warn("sec '%s': program at offset %zu crosses section boundary\n",
641
+ sec_name, sec_off);
642
+ return -LIBBPF_ERRNO__FORMAT;
643
+ }
644
+
645
+ pr_debug("sec '%s': found program '%s' at insn offset %zu (%zu bytes), code size %zu insns (%zu bytes)\n",
646
+ sec_name, name, sec_off / BPF_INSN_SZ, sec_off, prog_sz / BPF_INSN_SZ, prog_sz);
647
+
648
+ progs = libbpf_reallocarray(progs, nr_progs + 1, sizeof(*progs));
649
+ if (!progs) {
650
+ /*
651
+ * In this case the original obj->programs
652
+ * is still valid, so don't need special treat for
653
+ * bpf_close_object().
654
+ */
655
+ pr_warn("sec '%s': failed to alloc memory for new program '%s'\n",
656
+ sec_name, name);
657
+ return -ENOMEM;
658
+ }
659
+ obj->programs = progs;
660
+
661
+ prog = &progs[nr_progs];
662
+
663
+ err = bpf_object__init_prog(obj, prog, name, sec_idx, sec_name,
664
+ sec_off, data + sec_off, prog_sz);
665
+ if (err)
666
+ return err;
667
+
668
+ nr_progs++;
669
+ obj->nr_programs = nr_progs;
670
+
671
+ sec_off += prog_sz;
341672 }
342673
343
- pr_debug("found program %s\n", prog.section_name);
344
- obj->programs = progs;
345
- obj->nr_programs = nr_progs + 1;
346
- prog.obj = obj;
347
- progs[nr_progs] = prog;
348674 return 0;
349675 }
350676
351
-static int
352
-bpf_object__init_prog_names(struct bpf_object *obj)
677
+static __u32 get_kernel_version(void)
353678 {
354
- Elf_Data *symbols = obj->efile.symbols;
355
- struct bpf_program *prog;
356
- size_t pi, si;
679
+ __u32 major, minor, patch;
680
+ struct utsname info;
357681
358
- for (pi = 0; pi < obj->nr_programs; pi++) {
359
- const char *name = NULL;
682
+ uname(&info);
683
+ if (sscanf(info.release, "%u.%u.%u", &major, &minor, &patch) != 3)
684
+ return 0;
685
+ return KERNEL_VERSION(major, minor, patch);
686
+}
360687
361
- prog = &obj->programs[pi];
688
+static const struct btf_member *
689
+find_member_by_offset(const struct btf_type *t, __u32 bit_offset)
690
+{
691
+ struct btf_member *m;
692
+ int i;
362693
363
- for (si = 0; si < symbols->d_size / sizeof(GElf_Sym) && !name;
364
- si++) {
365
- GElf_Sym sym;
694
+ for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
695
+ if (btf_member_bit_offset(t, i) == bit_offset)
696
+ return m;
697
+ }
366698
367
- if (!gelf_getsym(symbols, si, &sym))
368
- continue;
369
- if (sym.st_shndx != prog->idx)
370
- continue;
371
- if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL)
372
- continue;
699
+ return NULL;
700
+}
373701
374
- name = elf_strptr(obj->efile.elf,
375
- obj->efile.strtabidx,
376
- sym.st_name);
377
- if (!name) {
378
- pr_warning("failed to get sym name string for prog %s\n",
379
- prog->section_name);
380
- return -LIBBPF_ERRNO__LIBELF;
381
- }
702
+static const struct btf_member *
703
+find_member_by_name(const struct btf *btf, const struct btf_type *t,
704
+ const char *name)
705
+{
706
+ struct btf_member *m;
707
+ int i;
708
+
709
+ for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
710
+ if (!strcmp(btf__name_by_offset(btf, m->name_off), name))
711
+ return m;
712
+ }
713
+
714
+ return NULL;
715
+}
716
+
717
+#define STRUCT_OPS_VALUE_PREFIX "bpf_struct_ops_"
718
+static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
719
+ const char *name, __u32 kind);
720
+
721
+static int
722
+find_struct_ops_kern_types(const struct btf *btf, const char *tname,
723
+ const struct btf_type **type, __u32 *type_id,
724
+ const struct btf_type **vtype, __u32 *vtype_id,
725
+ const struct btf_member **data_member)
726
+{
727
+ const struct btf_type *kern_type, *kern_vtype;
728
+ const struct btf_member *kern_data_member;
729
+ __s32 kern_vtype_id, kern_type_id;
730
+ __u32 i;
731
+
732
+ kern_type_id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT);
733
+ if (kern_type_id < 0) {
734
+ pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n",
735
+ tname);
736
+ return kern_type_id;
737
+ }
738
+ kern_type = btf__type_by_id(btf, kern_type_id);
739
+
740
+ /* Find the corresponding "map_value" type that will be used
741
+ * in map_update(BPF_MAP_TYPE_STRUCT_OPS). For example,
742
+ * find "struct bpf_struct_ops_tcp_congestion_ops" from the
743
+ * btf_vmlinux.
744
+ */
745
+ kern_vtype_id = find_btf_by_prefix_kind(btf, STRUCT_OPS_VALUE_PREFIX,
746
+ tname, BTF_KIND_STRUCT);
747
+ if (kern_vtype_id < 0) {
748
+ pr_warn("struct_ops init_kern: struct %s%s is not found in kernel BTF\n",
749
+ STRUCT_OPS_VALUE_PREFIX, tname);
750
+ return kern_vtype_id;
751
+ }
752
+ kern_vtype = btf__type_by_id(btf, kern_vtype_id);
753
+
754
+ /* Find "struct tcp_congestion_ops" from
755
+ * struct bpf_struct_ops_tcp_congestion_ops {
756
+ * [ ... ]
757
+ * struct tcp_congestion_ops data;
758
+ * }
759
+ */
760
+ kern_data_member = btf_members(kern_vtype);
761
+ for (i = 0; i < btf_vlen(kern_vtype); i++, kern_data_member++) {
762
+ if (kern_data_member->type == kern_type_id)
763
+ break;
764
+ }
765
+ if (i == btf_vlen(kern_vtype)) {
766
+ pr_warn("struct_ops init_kern: struct %s data is not found in struct %s%s\n",
767
+ tname, STRUCT_OPS_VALUE_PREFIX, tname);
768
+ return -EINVAL;
769
+ }
770
+
771
+ *type = kern_type;
772
+ *type_id = kern_type_id;
773
+ *vtype = kern_vtype;
774
+ *vtype_id = kern_vtype_id;
775
+ *data_member = kern_data_member;
776
+
777
+ return 0;
778
+}
779
+
780
+static bool bpf_map__is_struct_ops(const struct bpf_map *map)
781
+{
782
+ return map->def.type == BPF_MAP_TYPE_STRUCT_OPS;
783
+}
784
+
785
+/* Init the map's fields that depend on kern_btf */
786
+static int bpf_map__init_kern_struct_ops(struct bpf_map *map,
787
+ const struct btf *btf,
788
+ const struct btf *kern_btf)
789
+{
790
+ const struct btf_member *member, *kern_member, *kern_data_member;
791
+ const struct btf_type *type, *kern_type, *kern_vtype;
792
+ __u32 i, kern_type_id, kern_vtype_id, kern_data_off;
793
+ struct bpf_struct_ops *st_ops;
794
+ void *data, *kern_data;
795
+ const char *tname;
796
+ int err;
797
+
798
+ st_ops = map->st_ops;
799
+ type = st_ops->type;
800
+ tname = st_ops->tname;
801
+ err = find_struct_ops_kern_types(kern_btf, tname,
802
+ &kern_type, &kern_type_id,
803
+ &kern_vtype, &kern_vtype_id,
804
+ &kern_data_member);
805
+ if (err)
806
+ return err;
807
+
808
+ pr_debug("struct_ops init_kern %s: type_id:%u kern_type_id:%u kern_vtype_id:%u\n",
809
+ map->name, st_ops->type_id, kern_type_id, kern_vtype_id);
810
+
811
+ map->def.value_size = kern_vtype->size;
812
+ map->btf_vmlinux_value_type_id = kern_vtype_id;
813
+
814
+ st_ops->kern_vdata = calloc(1, kern_vtype->size);
815
+ if (!st_ops->kern_vdata)
816
+ return -ENOMEM;
817
+
818
+ data = st_ops->data;
819
+ kern_data_off = kern_data_member->offset / 8;
820
+ kern_data = st_ops->kern_vdata + kern_data_off;
821
+
822
+ member = btf_members(type);
823
+ for (i = 0; i < btf_vlen(type); i++, member++) {
824
+ const struct btf_type *mtype, *kern_mtype;
825
+ __u32 mtype_id, kern_mtype_id;
826
+ void *mdata, *kern_mdata;
827
+ __s64 msize, kern_msize;
828
+ __u32 moff, kern_moff;
829
+ __u32 kern_member_idx;
830
+ const char *mname;
831
+
832
+ mname = btf__name_by_offset(btf, member->name_off);
833
+ kern_member = find_member_by_name(kern_btf, kern_type, mname);
834
+ if (!kern_member) {
835
+ pr_warn("struct_ops init_kern %s: Cannot find member %s in kernel BTF\n",
836
+ map->name, mname);
837
+ return -ENOTSUP;
382838 }
383839
384
- if (!name && prog->idx == obj->efile.text_shndx)
385
- name = ".text";
840
+ kern_member_idx = kern_member - btf_members(kern_type);
841
+ if (btf_member_bitfield_size(type, i) ||
842
+ btf_member_bitfield_size(kern_type, kern_member_idx)) {
843
+ pr_warn("struct_ops init_kern %s: bitfield %s is not supported\n",
844
+ map->name, mname);
845
+ return -ENOTSUP;
846
+ }
386847
387
- if (!name) {
388
- pr_warning("failed to find sym for prog %s\n",
389
- prog->section_name);
848
+ moff = member->offset / 8;
849
+ kern_moff = kern_member->offset / 8;
850
+
851
+ mdata = data + moff;
852
+ kern_mdata = kern_data + kern_moff;
853
+
854
+ mtype = skip_mods_and_typedefs(btf, member->type, &mtype_id);
855
+ kern_mtype = skip_mods_and_typedefs(kern_btf, kern_member->type,
856
+ &kern_mtype_id);
857
+ if (BTF_INFO_KIND(mtype->info) !=
858
+ BTF_INFO_KIND(kern_mtype->info)) {
859
+ pr_warn("struct_ops init_kern %s: Unmatched member type %s %u != %u(kernel)\n",
860
+ map->name, mname, BTF_INFO_KIND(mtype->info),
861
+ BTF_INFO_KIND(kern_mtype->info));
862
+ return -ENOTSUP;
863
+ }
864
+
865
+ if (btf_is_ptr(mtype)) {
866
+ struct bpf_program *prog;
867
+
868
+ prog = st_ops->progs[i];
869
+ if (!prog)
870
+ continue;
871
+
872
+ kern_mtype = skip_mods_and_typedefs(kern_btf,
873
+ kern_mtype->type,
874
+ &kern_mtype_id);
875
+
876
+ /* mtype->type must be a func_proto which was
877
+ * guaranteed in bpf_object__collect_st_ops_relos(),
878
+ * so only check kern_mtype for func_proto here.
879
+ */
880
+ if (!btf_is_func_proto(kern_mtype)) {
881
+ pr_warn("struct_ops init_kern %s: kernel member %s is not a func ptr\n",
882
+ map->name, mname);
883
+ return -ENOTSUP;
884
+ }
885
+
886
+ prog->attach_btf_id = kern_type_id;
887
+ prog->expected_attach_type = kern_member_idx;
888
+
889
+ st_ops->kern_func_off[i] = kern_data_off + kern_moff;
890
+
891
+ pr_debug("struct_ops init_kern %s: func ptr %s is set to prog %s from data(+%u) to kern_data(+%u)\n",
892
+ map->name, mname, prog->name, moff,
893
+ kern_moff);
894
+
895
+ continue;
896
+ }
897
+
898
+ msize = btf__resolve_size(btf, mtype_id);
899
+ kern_msize = btf__resolve_size(kern_btf, kern_mtype_id);
900
+ if (msize < 0 || kern_msize < 0 || msize != kern_msize) {
901
+ pr_warn("struct_ops init_kern %s: Error in size of member %s: %zd != %zd(kernel)\n",
902
+ map->name, mname, (ssize_t)msize,
903
+ (ssize_t)kern_msize);
904
+ return -ENOTSUP;
905
+ }
906
+
907
+ pr_debug("struct_ops init_kern %s: copy %s %u bytes from data(+%u) to kern_data(+%u)\n",
908
+ map->name, mname, (unsigned int)msize,
909
+ moff, kern_moff);
910
+ memcpy(kern_mdata, mdata, msize);
911
+ }
912
+
913
+ return 0;
914
+}
915
+
916
+static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj)
917
+{
918
+ struct bpf_map *map;
919
+ size_t i;
920
+ int err;
921
+
922
+ for (i = 0; i < obj->nr_maps; i++) {
923
+ map = &obj->maps[i];
924
+
925
+ if (!bpf_map__is_struct_ops(map))
926
+ continue;
927
+
928
+ err = bpf_map__init_kern_struct_ops(map, obj->btf,
929
+ obj->btf_vmlinux);
930
+ if (err)
931
+ return err;
932
+ }
933
+
934
+ return 0;
935
+}
936
+
937
+static int bpf_object__init_struct_ops_maps(struct bpf_object *obj)
938
+{
939
+ const struct btf_type *type, *datasec;
940
+ const struct btf_var_secinfo *vsi;
941
+ struct bpf_struct_ops *st_ops;
942
+ const char *tname, *var_name;
943
+ __s32 type_id, datasec_id;
944
+ const struct btf *btf;
945
+ struct bpf_map *map;
946
+ __u32 i;
947
+
948
+ if (obj->efile.st_ops_shndx == -1)
949
+ return 0;
950
+
951
+ btf = obj->btf;
952
+ datasec_id = btf__find_by_name_kind(btf, STRUCT_OPS_SEC,
953
+ BTF_KIND_DATASEC);
954
+ if (datasec_id < 0) {
955
+ pr_warn("struct_ops init: DATASEC %s not found\n",
956
+ STRUCT_OPS_SEC);
957
+ return -EINVAL;
958
+ }
959
+
960
+ datasec = btf__type_by_id(btf, datasec_id);
961
+ vsi = btf_var_secinfos(datasec);
962
+ for (i = 0; i < btf_vlen(datasec); i++, vsi++) {
963
+ type = btf__type_by_id(obj->btf, vsi->type);
964
+ var_name = btf__name_by_offset(obj->btf, type->name_off);
965
+
966
+ type_id = btf__resolve_type(obj->btf, vsi->type);
967
+ if (type_id < 0) {
968
+ pr_warn("struct_ops init: Cannot resolve var type_id %u in DATASEC %s\n",
969
+ vsi->type, STRUCT_OPS_SEC);
390970 return -EINVAL;
391971 }
392972
393
- prog->name = strdup(name);
394
- if (!prog->name) {
395
- pr_warning("failed to allocate memory for prog sym %s\n",
396
- name);
397
- return -ENOMEM;
973
+ type = btf__type_by_id(obj->btf, type_id);
974
+ tname = btf__name_by_offset(obj->btf, type->name_off);
975
+ if (!tname[0]) {
976
+ pr_warn("struct_ops init: anonymous type is not supported\n");
977
+ return -ENOTSUP;
398978 }
979
+ if (!btf_is_struct(type)) {
980
+ pr_warn("struct_ops init: %s is not a struct\n", tname);
981
+ return -EINVAL;
982
+ }
983
+
984
+ map = bpf_object__add_map(obj);
985
+ if (IS_ERR(map))
986
+ return PTR_ERR(map);
987
+
988
+ map->sec_idx = obj->efile.st_ops_shndx;
989
+ map->sec_offset = vsi->offset;
990
+ map->name = strdup(var_name);
991
+ if (!map->name)
992
+ return -ENOMEM;
993
+
994
+ map->def.type = BPF_MAP_TYPE_STRUCT_OPS;
995
+ map->def.key_size = sizeof(int);
996
+ map->def.value_size = type->size;
997
+ map->def.max_entries = 1;
998
+
999
+ map->st_ops = calloc(1, sizeof(*map->st_ops));
1000
+ if (!map->st_ops)
1001
+ return -ENOMEM;
1002
+ st_ops = map->st_ops;
1003
+ st_ops->data = malloc(type->size);
1004
+ st_ops->progs = calloc(btf_vlen(type), sizeof(*st_ops->progs));
1005
+ st_ops->kern_func_off = malloc(btf_vlen(type) *
1006
+ sizeof(*st_ops->kern_func_off));
1007
+ if (!st_ops->data || !st_ops->progs || !st_ops->kern_func_off)
1008
+ return -ENOMEM;
1009
+
1010
+ if (vsi->offset + type->size > obj->efile.st_ops_data->d_size) {
1011
+ pr_warn("struct_ops init: var %s is beyond the end of DATASEC %s\n",
1012
+ var_name, STRUCT_OPS_SEC);
1013
+ return -EINVAL;
1014
+ }
1015
+
1016
+ memcpy(st_ops->data,
1017
+ obj->efile.st_ops_data->d_buf + vsi->offset,
1018
+ type->size);
1019
+ st_ops->tname = tname;
1020
+ st_ops->type = type;
1021
+ st_ops->type_id = type_id;
1022
+
1023
+ pr_debug("struct_ops init: struct %s(type_id=%u) %s found at offset %u\n",
1024
+ tname, type_id, var_name, vsi->offset);
3991025 }
4001026
4011027 return 0;
4021028 }
4031029
4041030 static struct bpf_object *bpf_object__new(const char *path,
405
- void *obj_buf,
406
- size_t obj_buf_sz)
1031
+ const void *obj_buf,
1032
+ size_t obj_buf_sz,
1033
+ const char *obj_name)
4071034 {
4081035 struct bpf_object *obj;
1036
+ char *end;
4091037
4101038 obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1);
4111039 if (!obj) {
412
- pr_warning("alloc memory failed for %s\n", path);
1040
+ pr_warn("alloc memory failed for %s\n", path);
4131041 return ERR_PTR(-ENOMEM);
4141042 }
4151043
4161044 strcpy(obj->path, path);
417
- obj->efile.fd = -1;
1045
+ if (obj_name) {
1046
+ strncpy(obj->name, obj_name, sizeof(obj->name) - 1);
1047
+ obj->name[sizeof(obj->name) - 1] = 0;
1048
+ } else {
1049
+ /* Using basename() GNU version which doesn't modify arg. */
1050
+ strncpy(obj->name, basename((void *)path),
1051
+ sizeof(obj->name) - 1);
1052
+ end = strchr(obj->name, '.');
1053
+ if (end)
1054
+ *end = 0;
1055
+ }
4181056
1057
+ obj->efile.fd = -1;
4191058 /*
420
- * Caller of this function should also calls
1059
+ * Caller of this function should also call
4211060 * bpf_object__elf_finish() after data collection to return
4221061 * obj_buf to user. If not, we should duplicate the buffer to
4231062 * avoid user freeing them before elf finish.
....@@ -425,7 +1064,15 @@
4251064 obj->efile.obj_buf = obj_buf;
4261065 obj->efile.obj_buf_sz = obj_buf_sz;
4271066 obj->efile.maps_shndx = -1;
1067
+ obj->efile.btf_maps_shndx = -1;
1068
+ obj->efile.data_shndx = -1;
1069
+ obj->efile.rodata_shndx = -1;
1070
+ obj->efile.bss_shndx = -1;
1071
+ obj->efile.st_ops_shndx = -1;
1072
+ obj->kconfig_map_idx = -1;
1073
+ obj->rodata_map_idx = -1;
4281074
1075
+ obj->kern_version = get_kernel_version();
4291076 obj->loaded = false;
4301077
4311078 INIT_LIST_HEAD(&obj->list);
....@@ -443,13 +1090,22 @@
4431090 obj->efile.elf = NULL;
4441091 }
4451092 obj->efile.symbols = NULL;
1093
+ obj->efile.data = NULL;
1094
+ obj->efile.rodata = NULL;
1095
+ obj->efile.bss = NULL;
1096
+ obj->efile.st_ops_data = NULL;
4461097
447
- zfree(&obj->efile.reloc);
448
- obj->efile.nr_reloc = 0;
1098
+ zfree(&obj->efile.reloc_sects);
1099
+ obj->efile.nr_reloc_sects = 0;
4491100 zclose(obj->efile.fd);
4501101 obj->efile.obj_buf = NULL;
4511102 obj->efile.obj_buf_sz = 0;
4521103 }
1104
+
1105
+/* if libelf is old and doesn't support mmap(), fall back to read() */
1106
+#ifndef ELF_C_READ_MMAP
1107
+#define ELF_C_READ_MMAP ELF_C_READ
1108
+#endif
4531109
4541110 static int bpf_object__elf_init(struct bpf_object *obj)
4551111 {
....@@ -457,7 +1113,7 @@
4571113 GElf_Ehdr *ep;
4581114
4591115 if (obj_elf_valid(obj)) {
460
- pr_warning("elf init: internal error\n");
1116
+ pr_warn("elf: init internal error\n");
4611117 return -LIBBPF_ERRNO__LIBELF;
4621118 }
4631119
....@@ -466,42 +1122,54 @@
4661122 * obj_buf should have been validated by
4671123 * bpf_object__open_buffer().
4681124 */
469
- obj->efile.elf = elf_memory(obj->efile.obj_buf,
1125
+ obj->efile.elf = elf_memory((char *)obj->efile.obj_buf,
4701126 obj->efile.obj_buf_sz);
4711127 } else {
4721128 obj->efile.fd = open(obj->path, O_RDONLY);
4731129 if (obj->efile.fd < 0) {
474
- char errmsg[STRERR_BUFSIZE];
475
- char *cp = str_error(errno, errmsg, sizeof(errmsg));
1130
+ char errmsg[STRERR_BUFSIZE], *cp;
4761131
477
- pr_warning("failed to open %s: %s\n", obj->path, cp);
478
- return -errno;
1132
+ err = -errno;
1133
+ cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
1134
+ pr_warn("elf: failed to open %s: %s\n", obj->path, cp);
1135
+ return err;
4791136 }
4801137
481
- obj->efile.elf = elf_begin(obj->efile.fd,
482
- LIBBPF_ELF_C_READ_MMAP,
483
- NULL);
1138
+ obj->efile.elf = elf_begin(obj->efile.fd, ELF_C_READ_MMAP, NULL);
4841139 }
4851140
4861141 if (!obj->efile.elf) {
487
- pr_warning("failed to open %s as ELF file\n",
488
- obj->path);
1142
+ pr_warn("elf: failed to open %s as ELF file: %s\n", obj->path, elf_errmsg(-1));
4891143 err = -LIBBPF_ERRNO__LIBELF;
4901144 goto errout;
4911145 }
4921146
4931147 if (!gelf_getehdr(obj->efile.elf, &obj->efile.ehdr)) {
494
- pr_warning("failed to get EHDR from %s\n",
495
- obj->path);
1148
+ pr_warn("elf: failed to get ELF header from %s: %s\n", obj->path, elf_errmsg(-1));
4961149 err = -LIBBPF_ERRNO__FORMAT;
4971150 goto errout;
4981151 }
4991152 ep = &obj->efile.ehdr;
5001153
1154
+ if (elf_getshdrstrndx(obj->efile.elf, &obj->efile.shstrndx)) {
1155
+ pr_warn("elf: failed to get section names section index for %s: %s\n",
1156
+ obj->path, elf_errmsg(-1));
1157
+ err = -LIBBPF_ERRNO__FORMAT;
1158
+ goto errout;
1159
+ }
1160
+
1161
+ /* Elf is corrupted/truncated, avoid calling elf_strptr. */
1162
+ if (!elf_rawdata(elf_getscn(obj->efile.elf, obj->efile.shstrndx), NULL)) {
1163
+ pr_warn("elf: failed to get section names strings from %s: %s\n",
1164
+ obj->path, elf_errmsg(-1));
1165
+ err = -LIBBPF_ERRNO__FORMAT;
1166
+ goto errout;
1167
+ }
1168
+
5011169 /* Old LLVM set e_machine to EM_NONE */
502
- if ((ep->e_type != ET_REL) || (ep->e_machine && (ep->e_machine != EM_BPF))) {
503
- pr_warning("%s is not an eBPF object file\n",
504
- obj->path);
1170
+ if (ep->e_type != ET_REL ||
1171
+ (ep->e_machine && ep->e_machine != EM_BPF)) {
1172
+ pr_warn("elf: %s is not a valid eBPF object file\n", obj->path);
5051173 err = -LIBBPF_ERRNO__FORMAT;
5061174 goto errout;
5071175 }
....@@ -512,88 +1180,589 @@
5121180 return err;
5131181 }
5141182
515
-static int
516
-bpf_object__check_endianness(struct bpf_object *obj)
1183
+static int bpf_object__check_endianness(struct bpf_object *obj)
5171184 {
518
- static unsigned int const endian = 1;
519
-
520
- switch (obj->efile.ehdr.e_ident[EI_DATA]) {
521
- case ELFDATA2LSB:
522
- /* We are big endian, BPF obj is little endian. */
523
- if (*(unsigned char const *)&endian != 1)
524
- goto mismatch;
525
- break;
526
-
527
- case ELFDATA2MSB:
528
- /* We are little endian, BPF obj is big endian. */
529
- if (*(unsigned char const *)&endian != 0)
530
- goto mismatch;
531
- break;
532
- default:
533
- return -LIBBPF_ERRNO__ENDIAN;
534
- }
535
-
536
- return 0;
537
-
538
-mismatch:
539
- pr_warning("Error: endianness mismatch.\n");
1185
+#if __BYTE_ORDER == __LITTLE_ENDIAN
1186
+ if (obj->efile.ehdr.e_ident[EI_DATA] == ELFDATA2LSB)
1187
+ return 0;
1188
+#elif __BYTE_ORDER == __BIG_ENDIAN
1189
+ if (obj->efile.ehdr.e_ident[EI_DATA] == ELFDATA2MSB)
1190
+ return 0;
1191
+#else
1192
+# error "Unrecognized __BYTE_ORDER__"
1193
+#endif
1194
+ pr_warn("elf: endianness mismatch in %s.\n", obj->path);
5401195 return -LIBBPF_ERRNO__ENDIAN;
5411196 }
5421197
5431198 static int
544
-bpf_object__init_license(struct bpf_object *obj,
545
- void *data, size_t size)
1199
+bpf_object__init_license(struct bpf_object *obj, void *data, size_t size)
5461200 {
547
- memcpy(obj->license, data,
548
- min(size, sizeof(obj->license) - 1));
1201
+ memcpy(obj->license, data, min(size, sizeof(obj->license) - 1));
5491202 pr_debug("license of %s is %s\n", obj->path, obj->license);
5501203 return 0;
5511204 }
5521205
5531206 static int
554
-bpf_object__init_kversion(struct bpf_object *obj,
555
- void *data, size_t size)
1207
+bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size)
5561208 {
557
- u32 kver;
1209
+ __u32 kver;
5581210
5591211 if (size != sizeof(kver)) {
560
- pr_warning("invalid kver section in %s\n", obj->path);
1212
+ pr_warn("invalid kver section in %s\n", obj->path);
5611213 return -LIBBPF_ERRNO__FORMAT;
5621214 }
5631215 memcpy(&kver, data, sizeof(kver));
5641216 obj->kern_version = kver;
565
- pr_debug("kernel version of %s is %x\n", obj->path,
566
- obj->kern_version);
1217
+ pr_debug("kernel version of %s is %x\n", obj->path, obj->kern_version);
5671218 return 0;
5681219 }
5691220
570
-static int compare_bpf_map(const void *_a, const void *_b)
1221
+static bool bpf_map_type__is_map_in_map(enum bpf_map_type type)
5711222 {
572
- const struct bpf_map *a = _a;
573
- const struct bpf_map *b = _b;
1223
+ if (type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
1224
+ type == BPF_MAP_TYPE_HASH_OF_MAPS)
1225
+ return true;
1226
+ return false;
1227
+}
5741228
575
- return a->offset - b->offset;
1229
+int bpf_object__section_size(const struct bpf_object *obj, const char *name,
1230
+ __u32 *size)
1231
+{
1232
+ int ret = -ENOENT;
1233
+
1234
+ *size = 0;
1235
+ if (!name) {
1236
+ return -EINVAL;
1237
+ } else if (!strcmp(name, DATA_SEC)) {
1238
+ if (obj->efile.data)
1239
+ *size = obj->efile.data->d_size;
1240
+ } else if (!strcmp(name, BSS_SEC)) {
1241
+ if (obj->efile.bss)
1242
+ *size = obj->efile.bss->d_size;
1243
+ } else if (!strcmp(name, RODATA_SEC)) {
1244
+ if (obj->efile.rodata)
1245
+ *size = obj->efile.rodata->d_size;
1246
+ } else if (!strcmp(name, STRUCT_OPS_SEC)) {
1247
+ if (obj->efile.st_ops_data)
1248
+ *size = obj->efile.st_ops_data->d_size;
1249
+ } else {
1250
+ Elf_Scn *scn = elf_sec_by_name(obj, name);
1251
+ Elf_Data *data = elf_sec_data(obj, scn);
1252
+
1253
+ if (data) {
1254
+ ret = 0; /* found it */
1255
+ *size = data->d_size;
1256
+ }
1257
+ }
1258
+
1259
+ return *size ? 0 : ret;
1260
+}
1261
+
1262
+int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
1263
+ __u32 *off)
1264
+{
1265
+ Elf_Data *symbols = obj->efile.symbols;
1266
+ const char *sname;
1267
+ size_t si;
1268
+
1269
+ if (!name || !off)
1270
+ return -EINVAL;
1271
+
1272
+ for (si = 0; si < symbols->d_size / sizeof(GElf_Sym); si++) {
1273
+ GElf_Sym sym;
1274
+
1275
+ if (!gelf_getsym(symbols, si, &sym))
1276
+ continue;
1277
+ if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL ||
1278
+ GELF_ST_TYPE(sym.st_info) != STT_OBJECT)
1279
+ continue;
1280
+
1281
+ sname = elf_sym_str(obj, sym.st_name);
1282
+ if (!sname) {
1283
+ pr_warn("failed to get sym name string for var %s\n",
1284
+ name);
1285
+ return -EIO;
1286
+ }
1287
+ if (strcmp(name, sname) == 0) {
1288
+ *off = sym.st_value;
1289
+ return 0;
1290
+ }
1291
+ }
1292
+
1293
+ return -ENOENT;
1294
+}
1295
+
1296
+static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
1297
+{
1298
+ struct bpf_map *new_maps;
1299
+ size_t new_cap;
1300
+ int i;
1301
+
1302
+ if (obj->nr_maps < obj->maps_cap)
1303
+ return &obj->maps[obj->nr_maps++];
1304
+
1305
+ new_cap = max((size_t)4, obj->maps_cap * 3 / 2);
1306
+ new_maps = libbpf_reallocarray(obj->maps, new_cap, sizeof(*obj->maps));
1307
+ if (!new_maps) {
1308
+ pr_warn("alloc maps for object failed\n");
1309
+ return ERR_PTR(-ENOMEM);
1310
+ }
1311
+
1312
+ obj->maps_cap = new_cap;
1313
+ obj->maps = new_maps;
1314
+
1315
+ /* zero out new maps */
1316
+ memset(obj->maps + obj->nr_maps, 0,
1317
+ (obj->maps_cap - obj->nr_maps) * sizeof(*obj->maps));
1318
+ /*
1319
+ * fill all fd with -1 so won't close incorrect fd (fd=0 is stdin)
1320
+ * when failure (zclose won't close negative fd)).
1321
+ */
1322
+ for (i = obj->nr_maps; i < obj->maps_cap; i++) {
1323
+ obj->maps[i].fd = -1;
1324
+ obj->maps[i].inner_map_fd = -1;
1325
+ }
1326
+
1327
+ return &obj->maps[obj->nr_maps++];
1328
+}
1329
+
1330
+static size_t bpf_map_mmap_sz(const struct bpf_map *map)
1331
+{
1332
+ long page_sz = sysconf(_SC_PAGE_SIZE);
1333
+ size_t map_sz;
1334
+
1335
+ map_sz = (size_t)roundup(map->def.value_size, 8) * map->def.max_entries;
1336
+ map_sz = roundup(map_sz, page_sz);
1337
+ return map_sz;
1338
+}
1339
+
1340
+static char *internal_map_name(struct bpf_object *obj,
1341
+ enum libbpf_map_type type)
1342
+{
1343
+ char map_name[BPF_OBJ_NAME_LEN], *p;
1344
+ const char *sfx = libbpf_type_to_btf_name[type];
1345
+ int sfx_len = max((size_t)7, strlen(sfx));
1346
+ int pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1,
1347
+ strlen(obj->name));
1348
+
1349
+ snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name,
1350
+ sfx_len, libbpf_type_to_btf_name[type]);
1351
+
1352
+ /* sanitise map name to characters allowed by kernel */
1353
+ for (p = map_name; *p && p < map_name + sizeof(map_name); p++)
1354
+ if (!isalnum(*p) && *p != '_' && *p != '.')
1355
+ *p = '_';
1356
+
1357
+ return strdup(map_name);
5761358 }
5771359
5781360 static int
579
-bpf_object__init_maps(struct bpf_object *obj)
1361
+bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
1362
+ int sec_idx, void *data, size_t data_sz)
5801363 {
581
- int i, map_idx, map_def_sz, nr_maps = 0;
582
- Elf_Scn *scn;
583
- Elf_Data *data;
1364
+ struct bpf_map_def *def;
1365
+ struct bpf_map *map;
1366
+ int err;
1367
+
1368
+ map = bpf_object__add_map(obj);
1369
+ if (IS_ERR(map))
1370
+ return PTR_ERR(map);
1371
+
1372
+ map->libbpf_type = type;
1373
+ map->sec_idx = sec_idx;
1374
+ map->sec_offset = 0;
1375
+ map->name = internal_map_name(obj, type);
1376
+ if (!map->name) {
1377
+ pr_warn("failed to alloc map name\n");
1378
+ return -ENOMEM;
1379
+ }
1380
+
1381
+ def = &map->def;
1382
+ def->type = BPF_MAP_TYPE_ARRAY;
1383
+ def->key_size = sizeof(int);
1384
+ def->value_size = data_sz;
1385
+ def->max_entries = 1;
1386
+ def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG
1387
+ ? BPF_F_RDONLY_PROG : 0;
1388
+ def->map_flags |= BPF_F_MMAPABLE;
1389
+
1390
+ pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n",
1391
+ map->name, map->sec_idx, map->sec_offset, def->map_flags);
1392
+
1393
+ map->mmaped = mmap(NULL, bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE,
1394
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
1395
+ if (map->mmaped == MAP_FAILED) {
1396
+ err = -errno;
1397
+ map->mmaped = NULL;
1398
+ pr_warn("failed to alloc map '%s' content buffer: %d\n",
1399
+ map->name, err);
1400
+ zfree(&map->name);
1401
+ return err;
1402
+ }
1403
+
1404
+ if (data)
1405
+ memcpy(map->mmaped, data, data_sz);
1406
+
1407
+ pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name);
1408
+ return 0;
1409
+}
1410
+
1411
+static int bpf_object__init_global_data_maps(struct bpf_object *obj)
1412
+{
1413
+ int err;
1414
+
1415
+ /*
1416
+ * Populate obj->maps with libbpf internal maps.
1417
+ */
1418
+ if (obj->efile.data_shndx >= 0) {
1419
+ err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA,
1420
+ obj->efile.data_shndx,
1421
+ obj->efile.data->d_buf,
1422
+ obj->efile.data->d_size);
1423
+ if (err)
1424
+ return err;
1425
+ }
1426
+ if (obj->efile.rodata_shndx >= 0) {
1427
+ err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA,
1428
+ obj->efile.rodata_shndx,
1429
+ obj->efile.rodata->d_buf,
1430
+ obj->efile.rodata->d_size);
1431
+ if (err)
1432
+ return err;
1433
+
1434
+ obj->rodata_map_idx = obj->nr_maps - 1;
1435
+ }
1436
+ if (obj->efile.bss_shndx >= 0) {
1437
+ err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS,
1438
+ obj->efile.bss_shndx,
1439
+ NULL,
1440
+ obj->efile.bss->d_size);
1441
+ if (err)
1442
+ return err;
1443
+ }
1444
+ return 0;
1445
+}
1446
+
1447
+
1448
+static struct extern_desc *find_extern_by_name(const struct bpf_object *obj,
1449
+ const void *name)
1450
+{
1451
+ int i;
1452
+
1453
+ for (i = 0; i < obj->nr_extern; i++) {
1454
+ if (strcmp(obj->externs[i].name, name) == 0)
1455
+ return &obj->externs[i];
1456
+ }
1457
+ return NULL;
1458
+}
1459
+
1460
+static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val,
1461
+ char value)
1462
+{
1463
+ switch (ext->kcfg.type) {
1464
+ case KCFG_BOOL:
1465
+ if (value == 'm') {
1466
+ pr_warn("extern (kcfg) %s=%c should be tristate or char\n",
1467
+ ext->name, value);
1468
+ return -EINVAL;
1469
+ }
1470
+ *(bool *)ext_val = value == 'y' ? true : false;
1471
+ break;
1472
+ case KCFG_TRISTATE:
1473
+ if (value == 'y')
1474
+ *(enum libbpf_tristate *)ext_val = TRI_YES;
1475
+ else if (value == 'm')
1476
+ *(enum libbpf_tristate *)ext_val = TRI_MODULE;
1477
+ else /* value == 'n' */
1478
+ *(enum libbpf_tristate *)ext_val = TRI_NO;
1479
+ break;
1480
+ case KCFG_CHAR:
1481
+ *(char *)ext_val = value;
1482
+ break;
1483
+ case KCFG_UNKNOWN:
1484
+ case KCFG_INT:
1485
+ case KCFG_CHAR_ARR:
1486
+ default:
1487
+ pr_warn("extern (kcfg) %s=%c should be bool, tristate, or char\n",
1488
+ ext->name, value);
1489
+ return -EINVAL;
1490
+ }
1491
+ ext->is_set = true;
1492
+ return 0;
1493
+}
1494
+
1495
+static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val,
1496
+ const char *value)
1497
+{
1498
+ size_t len;
1499
+
1500
+ if (ext->kcfg.type != KCFG_CHAR_ARR) {
1501
+ pr_warn("extern (kcfg) %s=%s should be char array\n", ext->name, value);
1502
+ return -EINVAL;
1503
+ }
1504
+
1505
+ len = strlen(value);
1506
+ if (value[len - 1] != '"') {
1507
+ pr_warn("extern (kcfg) '%s': invalid string config '%s'\n",
1508
+ ext->name, value);
1509
+ return -EINVAL;
1510
+ }
1511
+
1512
+ /* strip quotes */
1513
+ len -= 2;
1514
+ if (len >= ext->kcfg.sz) {
1515
+ pr_warn("extern (kcfg) '%s': long string config %s of (%zu bytes) truncated to %d bytes\n",
1516
+ ext->name, value, len, ext->kcfg.sz - 1);
1517
+ len = ext->kcfg.sz - 1;
1518
+ }
1519
+ memcpy(ext_val, value + 1, len);
1520
+ ext_val[len] = '\0';
1521
+ ext->is_set = true;
1522
+ return 0;
1523
+}
1524
+
1525
+static int parse_u64(const char *value, __u64 *res)
1526
+{
1527
+ char *value_end;
1528
+ int err;
1529
+
1530
+ errno = 0;
1531
+ *res = strtoull(value, &value_end, 0);
1532
+ if (errno) {
1533
+ err = -errno;
1534
+ pr_warn("failed to parse '%s' as integer: %d\n", value, err);
1535
+ return err;
1536
+ }
1537
+ if (*value_end) {
1538
+ pr_warn("failed to parse '%s' as integer completely\n", value);
1539
+ return -EINVAL;
1540
+ }
1541
+ return 0;
1542
+}
1543
+
1544
+static bool is_kcfg_value_in_range(const struct extern_desc *ext, __u64 v)
1545
+{
1546
+ int bit_sz = ext->kcfg.sz * 8;
1547
+
1548
+ if (ext->kcfg.sz == 8)
1549
+ return true;
1550
+
1551
+ /* Validate that value stored in u64 fits in integer of `ext->sz`
1552
+ * bytes size without any loss of information. If the target integer
1553
+ * is signed, we rely on the following limits of integer type of
1554
+ * Y bits and subsequent transformation:
1555
+ *
1556
+ * -2^(Y-1) <= X <= 2^(Y-1) - 1
1557
+ * 0 <= X + 2^(Y-1) <= 2^Y - 1
1558
+ * 0 <= X + 2^(Y-1) < 2^Y
1559
+ *
1560
+ * For unsigned target integer, check that all the (64 - Y) bits are
1561
+ * zero.
1562
+ */
1563
+ if (ext->kcfg.is_signed)
1564
+ return v + (1ULL << (bit_sz - 1)) < (1ULL << bit_sz);
1565
+ else
1566
+ return (v >> bit_sz) == 0;
1567
+}
1568
+
1569
+static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val,
1570
+ __u64 value)
1571
+{
1572
+ if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) {
1573
+ pr_warn("extern (kcfg) %s=%llu should be integer\n",
1574
+ ext->name, (unsigned long long)value);
1575
+ return -EINVAL;
1576
+ }
1577
+ if (!is_kcfg_value_in_range(ext, value)) {
1578
+ pr_warn("extern (kcfg) %s=%llu value doesn't fit in %d bytes\n",
1579
+ ext->name, (unsigned long long)value, ext->kcfg.sz);
1580
+ return -ERANGE;
1581
+ }
1582
+ switch (ext->kcfg.sz) {
1583
+ case 1: *(__u8 *)ext_val = value; break;
1584
+ case 2: *(__u16 *)ext_val = value; break;
1585
+ case 4: *(__u32 *)ext_val = value; break;
1586
+ case 8: *(__u64 *)ext_val = value; break;
1587
+ default:
1588
+ return -EINVAL;
1589
+ }
1590
+ ext->is_set = true;
1591
+ return 0;
1592
+}
1593
+
1594
+static int bpf_object__process_kconfig_line(struct bpf_object *obj,
1595
+ char *buf, void *data)
1596
+{
1597
+ struct extern_desc *ext;
1598
+ char *sep, *value;
1599
+ int len, err = 0;
1600
+ void *ext_val;
1601
+ __u64 num;
1602
+
1603
+ if (strncmp(buf, "CONFIG_", 7))
1604
+ return 0;
1605
+
1606
+ sep = strchr(buf, '=');
1607
+ if (!sep) {
1608
+ pr_warn("failed to parse '%s': no separator\n", buf);
1609
+ return -EINVAL;
1610
+ }
1611
+
1612
+ /* Trim ending '\n' */
1613
+ len = strlen(buf);
1614
+ if (buf[len - 1] == '\n')
1615
+ buf[len - 1] = '\0';
1616
+ /* Split on '=' and ensure that a value is present. */
1617
+ *sep = '\0';
1618
+ if (!sep[1]) {
1619
+ *sep = '=';
1620
+ pr_warn("failed to parse '%s': no value\n", buf);
1621
+ return -EINVAL;
1622
+ }
1623
+
1624
+ ext = find_extern_by_name(obj, buf);
1625
+ if (!ext || ext->is_set)
1626
+ return 0;
1627
+
1628
+ ext_val = data + ext->kcfg.data_off;
1629
+ value = sep + 1;
1630
+
1631
+ switch (*value) {
1632
+ case 'y': case 'n': case 'm':
1633
+ err = set_kcfg_value_tri(ext, ext_val, *value);
1634
+ break;
1635
+ case '"':
1636
+ err = set_kcfg_value_str(ext, ext_val, value);
1637
+ break;
1638
+ default:
1639
+ /* assume integer */
1640
+ err = parse_u64(value, &num);
1641
+ if (err) {
1642
+ pr_warn("extern (kcfg) %s=%s should be integer\n",
1643
+ ext->name, value);
1644
+ return err;
1645
+ }
1646
+ err = set_kcfg_value_num(ext, ext_val, num);
1647
+ break;
1648
+ }
1649
+ if (err)
1650
+ return err;
1651
+ pr_debug("extern (kcfg) %s=%s\n", ext->name, value);
1652
+ return 0;
1653
+}
1654
+
1655
+static int bpf_object__read_kconfig_file(struct bpf_object *obj, void *data)
1656
+{
1657
+ char buf[PATH_MAX];
1658
+ struct utsname uts;
1659
+ int len, err = 0;
1660
+ gzFile file;
1661
+
1662
+ uname(&uts);
1663
+ len = snprintf(buf, PATH_MAX, "/boot/config-%s", uts.release);
1664
+ if (len < 0)
1665
+ return -EINVAL;
1666
+ else if (len >= PATH_MAX)
1667
+ return -ENAMETOOLONG;
1668
+
1669
+ /* gzopen also accepts uncompressed files. */
1670
+ file = gzopen(buf, "r");
1671
+ if (!file)
1672
+ file = gzopen("/proc/config.gz", "r");
1673
+
1674
+ if (!file) {
1675
+ pr_warn("failed to open system Kconfig\n");
1676
+ return -ENOENT;
1677
+ }
1678
+
1679
+ while (gzgets(file, buf, sizeof(buf))) {
1680
+ err = bpf_object__process_kconfig_line(obj, buf, data);
1681
+ if (err) {
1682
+ pr_warn("error parsing system Kconfig line '%s': %d\n",
1683
+ buf, err);
1684
+ goto out;
1685
+ }
1686
+ }
1687
+
1688
+out:
1689
+ gzclose(file);
1690
+ return err;
1691
+}
1692
+
1693
+static int bpf_object__read_kconfig_mem(struct bpf_object *obj,
1694
+ const char *config, void *data)
1695
+{
1696
+ char buf[PATH_MAX];
1697
+ int err = 0;
1698
+ FILE *file;
1699
+
1700
+ file = fmemopen((void *)config, strlen(config), "r");
1701
+ if (!file) {
1702
+ err = -errno;
1703
+ pr_warn("failed to open in-memory Kconfig: %d\n", err);
1704
+ return err;
1705
+ }
1706
+
1707
+ while (fgets(buf, sizeof(buf), file)) {
1708
+ err = bpf_object__process_kconfig_line(obj, buf, data);
1709
+ if (err) {
1710
+ pr_warn("error parsing in-memory Kconfig line '%s': %d\n",
1711
+ buf, err);
1712
+ break;
1713
+ }
1714
+ }
1715
+
1716
+ fclose(file);
1717
+ return err;
1718
+}
1719
+
1720
+static int bpf_object__init_kconfig_map(struct bpf_object *obj)
1721
+{
1722
+ struct extern_desc *last_ext = NULL, *ext;
1723
+ size_t map_sz;
1724
+ int i, err;
1725
+
1726
+ for (i = 0; i < obj->nr_extern; i++) {
1727
+ ext = &obj->externs[i];
1728
+ if (ext->type == EXT_KCFG)
1729
+ last_ext = ext;
1730
+ }
1731
+
1732
+ if (!last_ext)
1733
+ return 0;
1734
+
1735
+ map_sz = last_ext->kcfg.data_off + last_ext->kcfg.sz;
1736
+ err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG,
1737
+ obj->efile.symbols_shndx,
1738
+ NULL, map_sz);
1739
+ if (err)
1740
+ return err;
1741
+
1742
+ obj->kconfig_map_idx = obj->nr_maps - 1;
1743
+
1744
+ return 0;
1745
+}
1746
+
1747
+static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
1748
+{
5841749 Elf_Data *symbols = obj->efile.symbols;
1750
+ int i, map_def_sz = 0, nr_maps = 0, nr_syms;
1751
+ Elf_Data *data = NULL;
1752
+ Elf_Scn *scn;
5851753
5861754 if (obj->efile.maps_shndx < 0)
587
- return -EINVAL;
1755
+ return 0;
1756
+
5881757 if (!symbols)
5891758 return -EINVAL;
5901759
591
- scn = elf_getscn(obj->efile.elf, obj->efile.maps_shndx);
592
- if (scn)
593
- data = elf_getdata(scn, NULL);
1760
+
1761
+ scn = elf_sec_by_idx(obj, obj->efile.maps_shndx);
1762
+ data = elf_sec_data(obj, scn);
5941763 if (!scn || !data) {
595
- pr_warning("failed to get Elf_Data from map section %d\n",
596
- obj->efile.maps_shndx);
1764
+ pr_warn("elf: failed to get legacy map definitions for %s\n",
1765
+ obj->path);
5971766 return -EINVAL;
5981767 }
5991768
....@@ -604,7 +1773,8 @@
6041773 *
6051774 * TODO: Detect array of map and report error.
6061775 */
607
- for (i = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) {
1776
+ nr_syms = symbols->d_size / sizeof(GElf_Sym);
1777
+ for (i = 0; i < nr_syms; i++) {
6081778 GElf_Sym sym;
6091779
6101780 if (!gelf_getsym(symbols, i, &sym))
....@@ -613,68 +1783,57 @@
6131783 continue;
6141784 nr_maps++;
6151785 }
616
-
617
- /* Alloc obj->maps and fill nr_maps. */
618
- pr_debug("maps in %s: %d maps in %zd bytes\n", obj->path,
619
- nr_maps, data->d_size);
620
-
621
- if (!nr_maps)
622
- return 0;
623
-
6241786 /* Assume equally sized map definitions */
625
- map_def_sz = data->d_size / nr_maps;
626
- if (!data->d_size || (data->d_size % nr_maps) != 0) {
627
- pr_warning("unable to determine map definition size "
628
- "section %s, %d maps in %zd bytes\n",
629
- obj->path, nr_maps, data->d_size);
1787
+ pr_debug("elf: found %d legacy map definitions (%zd bytes) in %s\n",
1788
+ nr_maps, data->d_size, obj->path);
1789
+
1790
+ if (!data->d_size || nr_maps == 0 || (data->d_size % nr_maps) != 0) {
1791
+ pr_warn("elf: unable to determine legacy map definition size in %s\n",
1792
+ obj->path);
6301793 return -EINVAL;
6311794 }
1795
+ map_def_sz = data->d_size / nr_maps;
6321796
633
- obj->maps = calloc(nr_maps, sizeof(obj->maps[0]));
634
- if (!obj->maps) {
635
- pr_warning("alloc maps for object failed\n");
636
- return -ENOMEM;
637
- }
638
- obj->nr_maps = nr_maps;
639
-
640
- /*
641
- * fill all fd with -1 so won't close incorrect
642
- * fd (fd=0 is stdin) when failure (zclose won't close
643
- * negative fd)).
644
- */
645
- for (i = 0; i < nr_maps; i++)
646
- obj->maps[i].fd = -1;
647
-
648
- /*
649
- * Fill obj->maps using data in "maps" section.
650
- */
651
- for (i = 0, map_idx = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) {
1797
+ /* Fill obj->maps using data in "maps" section. */
1798
+ for (i = 0; i < nr_syms; i++) {
6521799 GElf_Sym sym;
6531800 const char *map_name;
6541801 struct bpf_map_def *def;
1802
+ struct bpf_map *map;
6551803
6561804 if (!gelf_getsym(symbols, i, &sym))
6571805 continue;
6581806 if (sym.st_shndx != obj->efile.maps_shndx)
6591807 continue;
6601808
661
- map_name = elf_strptr(obj->efile.elf,
662
- obj->efile.strtabidx,
663
- sym.st_name);
664
- obj->maps[map_idx].offset = sym.st_value;
1809
+ map = bpf_object__add_map(obj);
1810
+ if (IS_ERR(map))
1811
+ return PTR_ERR(map);
1812
+
1813
+ map_name = elf_sym_str(obj, sym.st_name);
1814
+ if (!map_name) {
1815
+ pr_warn("failed to get map #%d name sym string for obj %s\n",
1816
+ i, obj->path);
1817
+ return -LIBBPF_ERRNO__FORMAT;
1818
+ }
1819
+
1820
+ map->libbpf_type = LIBBPF_MAP_UNSPEC;
1821
+ map->sec_idx = sym.st_shndx;
1822
+ map->sec_offset = sym.st_value;
1823
+ pr_debug("map '%s' (legacy): at sec_idx %d, offset %zu.\n",
1824
+ map_name, map->sec_idx, map->sec_offset);
6651825 if (sym.st_value + map_def_sz > data->d_size) {
666
- pr_warning("corrupted maps section in %s: last map \"%s\" too small\n",
667
- obj->path, map_name);
1826
+ pr_warn("corrupted maps section in %s: last map \"%s\" too small\n",
1827
+ obj->path, map_name);
6681828 return -EINVAL;
6691829 }
6701830
671
- obj->maps[map_idx].name = strdup(map_name);
672
- if (!obj->maps[map_idx].name) {
673
- pr_warning("failed to alloc map name\n");
1831
+ map->name = strdup(map_name);
1832
+ if (!map->name) {
1833
+ pr_warn("failed to alloc map name\n");
6741834 return -ENOMEM;
6751835 }
676
- pr_debug("map %d is \"%s\"\n", map_idx,
677
- obj->maps[map_idx].name);
1836
+ pr_debug("map %d is \"%s\"\n", i, map->name);
6781837 def = (struct bpf_map_def *)(data->d_buf + sym.st_value);
6791838 /*
6801839 * If the definition of the map in the object file fits in
....@@ -683,7 +1842,7 @@
6831842 * calloc above.
6841843 */
6851844 if (map_def_sz <= sizeof(struct bpf_map_def)) {
686
- memcpy(&obj->maps[map_idx].def, def, map_def_sz);
1845
+ memcpy(&map->def, def, map_def_sz);
6871846 } else {
6881847 /*
6891848 * Here the map structure being read is bigger than what
....@@ -692,367 +1851,1803 @@
6921851 * incompatible.
6931852 */
6941853 char *b;
1854
+
6951855 for (b = ((char *)def) + sizeof(struct bpf_map_def);
6961856 b < ((char *)def) + map_def_sz; b++) {
6971857 if (*b != 0) {
698
- pr_warning("maps section in %s: \"%s\" "
699
- "has unrecognized, non-zero "
700
- "options\n",
701
- obj->path, map_name);
702
- return -EINVAL;
1858
+ pr_warn("maps section in %s: \"%s\" has unrecognized, non-zero options\n",
1859
+ obj->path, map_name);
1860
+ if (strict)
1861
+ return -EINVAL;
7031862 }
7041863 }
705
- memcpy(&obj->maps[map_idx].def, def,
706
- sizeof(struct bpf_map_def));
1864
+ memcpy(&map->def, def, sizeof(struct bpf_map_def));
7071865 }
708
- map_idx++;
1866
+ }
1867
+ return 0;
1868
+}
1869
+
1870
+static const struct btf_type *
1871
+skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id)
1872
+{
1873
+ const struct btf_type *t = btf__type_by_id(btf, id);
1874
+
1875
+ if (res_id)
1876
+ *res_id = id;
1877
+
1878
+ while (btf_is_mod(t) || btf_is_typedef(t)) {
1879
+ if (res_id)
1880
+ *res_id = t->type;
1881
+ t = btf__type_by_id(btf, t->type);
7091882 }
7101883
711
- qsort(obj->maps, obj->nr_maps, sizeof(obj->maps[0]), compare_bpf_map);
1884
+ return t;
1885
+}
1886
+
1887
+static const struct btf_type *
1888
+resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id)
1889
+{
1890
+ const struct btf_type *t;
1891
+
1892
+ t = skip_mods_and_typedefs(btf, id, NULL);
1893
+ if (!btf_is_ptr(t))
1894
+ return NULL;
1895
+
1896
+ t = skip_mods_and_typedefs(btf, t->type, res_id);
1897
+
1898
+ return btf_is_func_proto(t) ? t : NULL;
1899
+}
1900
+
1901
+static const char *btf_kind_str(const struct btf_type *t)
1902
+{
1903
+ switch (btf_kind(t)) {
1904
+ case BTF_KIND_UNKN: return "void";
1905
+ case BTF_KIND_INT: return "int";
1906
+ case BTF_KIND_PTR: return "ptr";
1907
+ case BTF_KIND_ARRAY: return "array";
1908
+ case BTF_KIND_STRUCT: return "struct";
1909
+ case BTF_KIND_UNION: return "union";
1910
+ case BTF_KIND_ENUM: return "enum";
1911
+ case BTF_KIND_FWD: return "fwd";
1912
+ case BTF_KIND_TYPEDEF: return "typedef";
1913
+ case BTF_KIND_VOLATILE: return "volatile";
1914
+ case BTF_KIND_CONST: return "const";
1915
+ case BTF_KIND_RESTRICT: return "restrict";
1916
+ case BTF_KIND_FUNC: return "func";
1917
+ case BTF_KIND_FUNC_PROTO: return "func_proto";
1918
+ case BTF_KIND_VAR: return "var";
1919
+ case BTF_KIND_DATASEC: return "datasec";
1920
+ default: return "unknown";
1921
+ }
1922
+}
1923
+
1924
+/*
1925
+ * Fetch integer attribute of BTF map definition. Such attributes are
1926
+ * represented using a pointer to an array, in which dimensionality of array
1927
+ * encodes specified integer value. E.g., int (*type)[BPF_MAP_TYPE_ARRAY];
1928
+ * encodes `type => BPF_MAP_TYPE_ARRAY` key/value pair completely using BTF
1929
+ * type definition, while using only sizeof(void *) space in ELF data section.
1930
+ */
1931
+static bool get_map_field_int(const char *map_name, const struct btf *btf,
1932
+ const struct btf_member *m, __u32 *res)
1933
+{
1934
+ const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL);
1935
+ const char *name = btf__name_by_offset(btf, m->name_off);
1936
+ const struct btf_array *arr_info;
1937
+ const struct btf_type *arr_t;
1938
+
1939
+ if (!btf_is_ptr(t)) {
1940
+ pr_warn("map '%s': attr '%s': expected PTR, got %s.\n",
1941
+ map_name, name, btf_kind_str(t));
1942
+ return false;
1943
+ }
1944
+
1945
+ arr_t = btf__type_by_id(btf, t->type);
1946
+ if (!arr_t) {
1947
+ pr_warn("map '%s': attr '%s': type [%u] not found.\n",
1948
+ map_name, name, t->type);
1949
+ return false;
1950
+ }
1951
+ if (!btf_is_array(arr_t)) {
1952
+ pr_warn("map '%s': attr '%s': expected ARRAY, got %s.\n",
1953
+ map_name, name, btf_kind_str(arr_t));
1954
+ return false;
1955
+ }
1956
+ arr_info = btf_array(arr_t);
1957
+ *res = arr_info->nelems;
1958
+ return true;
1959
+}
1960
+
1961
+static int build_map_pin_path(struct bpf_map *map, const char *path)
1962
+{
1963
+ char buf[PATH_MAX];
1964
+ int len;
1965
+
1966
+ if (!path)
1967
+ path = "/sys/fs/bpf";
1968
+
1969
+ len = snprintf(buf, PATH_MAX, "%s/%s", path, bpf_map__name(map));
1970
+ if (len < 0)
1971
+ return -EINVAL;
1972
+ else if (len >= PATH_MAX)
1973
+ return -ENAMETOOLONG;
1974
+
1975
+ return bpf_map__set_pin_path(map, buf);
1976
+}
1977
+
1978
+
1979
+static int parse_btf_map_def(struct bpf_object *obj,
1980
+ struct bpf_map *map,
1981
+ const struct btf_type *def,
1982
+ bool strict, bool is_inner,
1983
+ const char *pin_root_path)
1984
+{
1985
+ const struct btf_type *t;
1986
+ const struct btf_member *m;
1987
+ int vlen, i;
1988
+
1989
+ vlen = btf_vlen(def);
1990
+ m = btf_members(def);
1991
+ for (i = 0; i < vlen; i++, m++) {
1992
+ const char *name = btf__name_by_offset(obj->btf, m->name_off);
1993
+
1994
+ if (!name) {
1995
+ pr_warn("map '%s': invalid field #%d.\n", map->name, i);
1996
+ return -EINVAL;
1997
+ }
1998
+ if (strcmp(name, "type") == 0) {
1999
+ if (!get_map_field_int(map->name, obj->btf, m,
2000
+ &map->def.type))
2001
+ return -EINVAL;
2002
+ pr_debug("map '%s': found type = %u.\n",
2003
+ map->name, map->def.type);
2004
+ } else if (strcmp(name, "max_entries") == 0) {
2005
+ if (!get_map_field_int(map->name, obj->btf, m,
2006
+ &map->def.max_entries))
2007
+ return -EINVAL;
2008
+ pr_debug("map '%s': found max_entries = %u.\n",
2009
+ map->name, map->def.max_entries);
2010
+ } else if (strcmp(name, "map_flags") == 0) {
2011
+ if (!get_map_field_int(map->name, obj->btf, m,
2012
+ &map->def.map_flags))
2013
+ return -EINVAL;
2014
+ pr_debug("map '%s': found map_flags = %u.\n",
2015
+ map->name, map->def.map_flags);
2016
+ } else if (strcmp(name, "numa_node") == 0) {
2017
+ if (!get_map_field_int(map->name, obj->btf, m, &map->numa_node))
2018
+ return -EINVAL;
2019
+ pr_debug("map '%s': found numa_node = %u.\n", map->name, map->numa_node);
2020
+ } else if (strcmp(name, "key_size") == 0) {
2021
+ __u32 sz;
2022
+
2023
+ if (!get_map_field_int(map->name, obj->btf, m, &sz))
2024
+ return -EINVAL;
2025
+ pr_debug("map '%s': found key_size = %u.\n",
2026
+ map->name, sz);
2027
+ if (map->def.key_size && map->def.key_size != sz) {
2028
+ pr_warn("map '%s': conflicting key size %u != %u.\n",
2029
+ map->name, map->def.key_size, sz);
2030
+ return -EINVAL;
2031
+ }
2032
+ map->def.key_size = sz;
2033
+ } else if (strcmp(name, "key") == 0) {
2034
+ __s64 sz;
2035
+
2036
+ t = btf__type_by_id(obj->btf, m->type);
2037
+ if (!t) {
2038
+ pr_warn("map '%s': key type [%d] not found.\n",
2039
+ map->name, m->type);
2040
+ return -EINVAL;
2041
+ }
2042
+ if (!btf_is_ptr(t)) {
2043
+ pr_warn("map '%s': key spec is not PTR: %s.\n",
2044
+ map->name, btf_kind_str(t));
2045
+ return -EINVAL;
2046
+ }
2047
+ sz = btf__resolve_size(obj->btf, t->type);
2048
+ if (sz < 0) {
2049
+ pr_warn("map '%s': can't determine key size for type [%u]: %zd.\n",
2050
+ map->name, t->type, (ssize_t)sz);
2051
+ return sz;
2052
+ }
2053
+ pr_debug("map '%s': found key [%u], sz = %zd.\n",
2054
+ map->name, t->type, (ssize_t)sz);
2055
+ if (map->def.key_size && map->def.key_size != sz) {
2056
+ pr_warn("map '%s': conflicting key size %u != %zd.\n",
2057
+ map->name, map->def.key_size, (ssize_t)sz);
2058
+ return -EINVAL;
2059
+ }
2060
+ map->def.key_size = sz;
2061
+ map->btf_key_type_id = t->type;
2062
+ } else if (strcmp(name, "value_size") == 0) {
2063
+ __u32 sz;
2064
+
2065
+ if (!get_map_field_int(map->name, obj->btf, m, &sz))
2066
+ return -EINVAL;
2067
+ pr_debug("map '%s': found value_size = %u.\n",
2068
+ map->name, sz);
2069
+ if (map->def.value_size && map->def.value_size != sz) {
2070
+ pr_warn("map '%s': conflicting value size %u != %u.\n",
2071
+ map->name, map->def.value_size, sz);
2072
+ return -EINVAL;
2073
+ }
2074
+ map->def.value_size = sz;
2075
+ } else if (strcmp(name, "value") == 0) {
2076
+ __s64 sz;
2077
+
2078
+ t = btf__type_by_id(obj->btf, m->type);
2079
+ if (!t) {
2080
+ pr_warn("map '%s': value type [%d] not found.\n",
2081
+ map->name, m->type);
2082
+ return -EINVAL;
2083
+ }
2084
+ if (!btf_is_ptr(t)) {
2085
+ pr_warn("map '%s': value spec is not PTR: %s.\n",
2086
+ map->name, btf_kind_str(t));
2087
+ return -EINVAL;
2088
+ }
2089
+ sz = btf__resolve_size(obj->btf, t->type);
2090
+ if (sz < 0) {
2091
+ pr_warn("map '%s': can't determine value size for type [%u]: %zd.\n",
2092
+ map->name, t->type, (ssize_t)sz);
2093
+ return sz;
2094
+ }
2095
+ pr_debug("map '%s': found value [%u], sz = %zd.\n",
2096
+ map->name, t->type, (ssize_t)sz);
2097
+ if (map->def.value_size && map->def.value_size != sz) {
2098
+ pr_warn("map '%s': conflicting value size %u != %zd.\n",
2099
+ map->name, map->def.value_size, (ssize_t)sz);
2100
+ return -EINVAL;
2101
+ }
2102
+ map->def.value_size = sz;
2103
+ map->btf_value_type_id = t->type;
2104
+ }
2105
+ else if (strcmp(name, "values") == 0) {
2106
+ int err;
2107
+
2108
+ if (is_inner) {
2109
+ pr_warn("map '%s': multi-level inner maps not supported.\n",
2110
+ map->name);
2111
+ return -ENOTSUP;
2112
+ }
2113
+ if (i != vlen - 1) {
2114
+ pr_warn("map '%s': '%s' member should be last.\n",
2115
+ map->name, name);
2116
+ return -EINVAL;
2117
+ }
2118
+ if (!bpf_map_type__is_map_in_map(map->def.type)) {
2119
+ pr_warn("map '%s': should be map-in-map.\n",
2120
+ map->name);
2121
+ return -ENOTSUP;
2122
+ }
2123
+ if (map->def.value_size && map->def.value_size != 4) {
2124
+ pr_warn("map '%s': conflicting value size %u != 4.\n",
2125
+ map->name, map->def.value_size);
2126
+ return -EINVAL;
2127
+ }
2128
+ map->def.value_size = 4;
2129
+ t = btf__type_by_id(obj->btf, m->type);
2130
+ if (!t) {
2131
+ pr_warn("map '%s': map-in-map inner type [%d] not found.\n",
2132
+ map->name, m->type);
2133
+ return -EINVAL;
2134
+ }
2135
+ if (!btf_is_array(t) || btf_array(t)->nelems) {
2136
+ pr_warn("map '%s': map-in-map inner spec is not a zero-sized array.\n",
2137
+ map->name);
2138
+ return -EINVAL;
2139
+ }
2140
+ t = skip_mods_and_typedefs(obj->btf, btf_array(t)->type,
2141
+ NULL);
2142
+ if (!btf_is_ptr(t)) {
2143
+ pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
2144
+ map->name, btf_kind_str(t));
2145
+ return -EINVAL;
2146
+ }
2147
+ t = skip_mods_and_typedefs(obj->btf, t->type, NULL);
2148
+ if (!btf_is_struct(t)) {
2149
+ pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
2150
+ map->name, btf_kind_str(t));
2151
+ return -EINVAL;
2152
+ }
2153
+
2154
+ map->inner_map = calloc(1, sizeof(*map->inner_map));
2155
+ if (!map->inner_map)
2156
+ return -ENOMEM;
2157
+ map->inner_map->sec_idx = obj->efile.btf_maps_shndx;
2158
+ map->inner_map->name = malloc(strlen(map->name) +
2159
+ sizeof(".inner") + 1);
2160
+ if (!map->inner_map->name)
2161
+ return -ENOMEM;
2162
+ sprintf(map->inner_map->name, "%s.inner", map->name);
2163
+
2164
+ err = parse_btf_map_def(obj, map->inner_map, t, strict,
2165
+ true /* is_inner */, NULL);
2166
+ if (err)
2167
+ return err;
2168
+ } else if (strcmp(name, "pinning") == 0) {
2169
+ __u32 val;
2170
+ int err;
2171
+
2172
+ if (is_inner) {
2173
+ pr_debug("map '%s': inner def can't be pinned.\n",
2174
+ map->name);
2175
+ return -EINVAL;
2176
+ }
2177
+ if (!get_map_field_int(map->name, obj->btf, m, &val))
2178
+ return -EINVAL;
2179
+ pr_debug("map '%s': found pinning = %u.\n",
2180
+ map->name, val);
2181
+
2182
+ if (val != LIBBPF_PIN_NONE &&
2183
+ val != LIBBPF_PIN_BY_NAME) {
2184
+ pr_warn("map '%s': invalid pinning value %u.\n",
2185
+ map->name, val);
2186
+ return -EINVAL;
2187
+ }
2188
+ if (val == LIBBPF_PIN_BY_NAME) {
2189
+ err = build_map_pin_path(map, pin_root_path);
2190
+ if (err) {
2191
+ pr_warn("map '%s': couldn't build pin path.\n",
2192
+ map->name);
2193
+ return err;
2194
+ }
2195
+ }
2196
+ } else {
2197
+ if (strict) {
2198
+ pr_warn("map '%s': unknown field '%s'.\n",
2199
+ map->name, name);
2200
+ return -ENOTSUP;
2201
+ }
2202
+ pr_debug("map '%s': ignoring unknown field '%s'.\n",
2203
+ map->name, name);
2204
+ }
2205
+ }
2206
+
2207
+ if (map->def.type == BPF_MAP_TYPE_UNSPEC) {
2208
+ pr_warn("map '%s': map type isn't specified.\n", map->name);
2209
+ return -EINVAL;
2210
+ }
2211
+
2212
+ return 0;
2213
+}
2214
+
2215
+static int bpf_object__init_user_btf_map(struct bpf_object *obj,
2216
+ const struct btf_type *sec,
2217
+ int var_idx, int sec_idx,
2218
+ const Elf_Data *data, bool strict,
2219
+ const char *pin_root_path)
2220
+{
2221
+ const struct btf_type *var, *def;
2222
+ const struct btf_var_secinfo *vi;
2223
+ const struct btf_var *var_extra;
2224
+ const char *map_name;
2225
+ struct bpf_map *map;
2226
+
2227
+ vi = btf_var_secinfos(sec) + var_idx;
2228
+ var = btf__type_by_id(obj->btf, vi->type);
2229
+ var_extra = btf_var(var);
2230
+ map_name = btf__name_by_offset(obj->btf, var->name_off);
2231
+
2232
+ if (map_name == NULL || map_name[0] == '\0') {
2233
+ pr_warn("map #%d: empty name.\n", var_idx);
2234
+ return -EINVAL;
2235
+ }
2236
+ if ((__u64)vi->offset + vi->size > data->d_size) {
2237
+ pr_warn("map '%s' BTF data is corrupted.\n", map_name);
2238
+ return -EINVAL;
2239
+ }
2240
+ if (!btf_is_var(var)) {
2241
+ pr_warn("map '%s': unexpected var kind %s.\n",
2242
+ map_name, btf_kind_str(var));
2243
+ return -EINVAL;
2244
+ }
2245
+ if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED &&
2246
+ var_extra->linkage != BTF_VAR_STATIC) {
2247
+ pr_warn("map '%s': unsupported var linkage %u.\n",
2248
+ map_name, var_extra->linkage);
2249
+ return -EOPNOTSUPP;
2250
+ }
2251
+
2252
+ def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
2253
+ if (!btf_is_struct(def)) {
2254
+ pr_warn("map '%s': unexpected def kind %s.\n",
2255
+ map_name, btf_kind_str(var));
2256
+ return -EINVAL;
2257
+ }
2258
+ if (def->size > vi->size) {
2259
+ pr_warn("map '%s': invalid def size.\n", map_name);
2260
+ return -EINVAL;
2261
+ }
2262
+
2263
+ map = bpf_object__add_map(obj);
2264
+ if (IS_ERR(map))
2265
+ return PTR_ERR(map);
2266
+ map->name = strdup(map_name);
2267
+ if (!map->name) {
2268
+ pr_warn("map '%s': failed to alloc map name.\n", map_name);
2269
+ return -ENOMEM;
2270
+ }
2271
+ map->libbpf_type = LIBBPF_MAP_UNSPEC;
2272
+ map->def.type = BPF_MAP_TYPE_UNSPEC;
2273
+ map->sec_idx = sec_idx;
2274
+ map->sec_offset = vi->offset;
2275
+ map->btf_var_idx = var_idx;
2276
+ pr_debug("map '%s': at sec_idx %d, offset %zu.\n",
2277
+ map_name, map->sec_idx, map->sec_offset);
2278
+
2279
+ return parse_btf_map_def(obj, map, def, strict, false, pin_root_path);
2280
+}
2281
+
2282
+static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
2283
+ const char *pin_root_path)
2284
+{
2285
+ const struct btf_type *sec = NULL;
2286
+ int nr_types, i, vlen, err;
2287
+ const struct btf_type *t;
2288
+ const char *name;
2289
+ Elf_Data *data;
2290
+ Elf_Scn *scn;
2291
+
2292
+ if (obj->efile.btf_maps_shndx < 0)
2293
+ return 0;
2294
+
2295
+ scn = elf_sec_by_idx(obj, obj->efile.btf_maps_shndx);
2296
+ data = elf_sec_data(obj, scn);
2297
+ if (!scn || !data) {
2298
+ pr_warn("elf: failed to get %s map definitions for %s\n",
2299
+ MAPS_ELF_SEC, obj->path);
2300
+ return -EINVAL;
2301
+ }
2302
+
2303
+ nr_types = btf__get_nr_types(obj->btf);
2304
+ for (i = 1; i <= nr_types; i++) {
2305
+ t = btf__type_by_id(obj->btf, i);
2306
+ if (!btf_is_datasec(t))
2307
+ continue;
2308
+ name = btf__name_by_offset(obj->btf, t->name_off);
2309
+ if (strcmp(name, MAPS_ELF_SEC) == 0) {
2310
+ sec = t;
2311
+ obj->efile.btf_maps_sec_btf_id = i;
2312
+ break;
2313
+ }
2314
+ }
2315
+
2316
+ if (!sec) {
2317
+ pr_warn("DATASEC '%s' not found.\n", MAPS_ELF_SEC);
2318
+ return -ENOENT;
2319
+ }
2320
+
2321
+ vlen = btf_vlen(sec);
2322
+ for (i = 0; i < vlen; i++) {
2323
+ err = bpf_object__init_user_btf_map(obj, sec, i,
2324
+ obj->efile.btf_maps_shndx,
2325
+ data, strict,
2326
+ pin_root_path);
2327
+ if (err)
2328
+ return err;
2329
+ }
2330
+
2331
+ return 0;
2332
+}
2333
+
2334
+static int bpf_object__init_maps(struct bpf_object *obj,
2335
+ const struct bpf_object_open_opts *opts)
2336
+{
2337
+ const char *pin_root_path;
2338
+ bool strict;
2339
+ int err;
2340
+
2341
+ strict = !OPTS_GET(opts, relaxed_maps, false);
2342
+ pin_root_path = OPTS_GET(opts, pin_root_path, NULL);
2343
+
2344
+ err = bpf_object__init_user_maps(obj, strict);
2345
+ err = err ?: bpf_object__init_user_btf_maps(obj, strict, pin_root_path);
2346
+ err = err ?: bpf_object__init_global_data_maps(obj);
2347
+ err = err ?: bpf_object__init_kconfig_map(obj);
2348
+ err = err ?: bpf_object__init_struct_ops_maps(obj);
2349
+ if (err)
2350
+ return err;
2351
+
7122352 return 0;
7132353 }
7142354
7152355 static bool section_have_execinstr(struct bpf_object *obj, int idx)
7162356 {
717
- Elf_Scn *scn;
7182357 GElf_Shdr sh;
7192358
720
- scn = elf_getscn(obj->efile.elf, idx);
721
- if (!scn)
2359
+ if (elf_sec_hdr(obj, elf_sec_by_idx(obj, idx), &sh))
7222360 return false;
7232361
724
- if (gelf_getshdr(scn, &sh) != &sh)
725
- return false;
2362
+ return sh.sh_flags & SHF_EXECINSTR;
2363
+}
7262364
727
- if (sh.sh_flags & SHF_EXECINSTR)
2365
+static bool btf_needs_sanitization(struct bpf_object *obj)
2366
+{
2367
+ bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC);
2368
+ bool has_datasec = kernel_supports(FEAT_BTF_DATASEC);
2369
+ bool has_func = kernel_supports(FEAT_BTF_FUNC);
2370
+
2371
+ return !has_func || !has_datasec || !has_func_global;
2372
+}
2373
+
2374
+static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
2375
+{
2376
+ bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC);
2377
+ bool has_datasec = kernel_supports(FEAT_BTF_DATASEC);
2378
+ bool has_func = kernel_supports(FEAT_BTF_FUNC);
2379
+ struct btf_type *t;
2380
+ int i, j, vlen;
2381
+
2382
+ for (i = 1; i <= btf__get_nr_types(btf); i++) {
2383
+ t = (struct btf_type *)btf__type_by_id(btf, i);
2384
+
2385
+ if (!has_datasec && btf_is_var(t)) {
2386
+ /* replace VAR with INT */
2387
+ t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0);
2388
+ /*
2389
+ * using size = 1 is the safest choice, 4 will be too
2390
+ * big and cause kernel BTF validation failure if
2391
+ * original variable took less than 4 bytes
2392
+ */
2393
+ t->size = 1;
2394
+ *(int *)(t + 1) = BTF_INT_ENC(0, 0, 8);
2395
+ } else if (!has_datasec && btf_is_datasec(t)) {
2396
+ /* replace DATASEC with STRUCT */
2397
+ const struct btf_var_secinfo *v = btf_var_secinfos(t);
2398
+ struct btf_member *m = btf_members(t);
2399
+ struct btf_type *vt;
2400
+ char *name;
2401
+
2402
+ name = (char *)btf__name_by_offset(btf, t->name_off);
2403
+ while (*name) {
2404
+ if (*name == '.')
2405
+ *name = '_';
2406
+ name++;
2407
+ }
2408
+
2409
+ vlen = btf_vlen(t);
2410
+ t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, vlen);
2411
+ for (j = 0; j < vlen; j++, v++, m++) {
2412
+ /* order of field assignments is important */
2413
+ m->offset = v->offset * 8;
2414
+ m->type = v->type;
2415
+ /* preserve variable name as member name */
2416
+ vt = (void *)btf__type_by_id(btf, v->type);
2417
+ m->name_off = vt->name_off;
2418
+ }
2419
+ } else if (!has_func && btf_is_func_proto(t)) {
2420
+ /* replace FUNC_PROTO with ENUM */
2421
+ vlen = btf_vlen(t);
2422
+ t->info = BTF_INFO_ENC(BTF_KIND_ENUM, 0, vlen);
2423
+ t->size = sizeof(__u32); /* kernel enforced */
2424
+ } else if (!has_func && btf_is_func(t)) {
2425
+ /* replace FUNC with TYPEDEF */
2426
+ t->info = BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0);
2427
+ } else if (!has_func_global && btf_is_func(t)) {
2428
+ /* replace BTF_FUNC_GLOBAL with BTF_FUNC_STATIC */
2429
+ t->info = BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0);
2430
+ }
2431
+ }
2432
+}
2433
+
2434
+static bool libbpf_needs_btf(const struct bpf_object *obj)
2435
+{
2436
+ return obj->efile.btf_maps_shndx >= 0 ||
2437
+ obj->efile.st_ops_shndx >= 0 ||
2438
+ obj->nr_extern > 0;
2439
+}
2440
+
2441
+static bool kernel_needs_btf(const struct bpf_object *obj)
2442
+{
2443
+ return obj->efile.st_ops_shndx >= 0;
2444
+}
2445
+
2446
+static int bpf_object__init_btf(struct bpf_object *obj,
2447
+ Elf_Data *btf_data,
2448
+ Elf_Data *btf_ext_data)
2449
+{
2450
+ int err = -ENOENT;
2451
+
2452
+ if (btf_data) {
2453
+ obj->btf = btf__new(btf_data->d_buf, btf_data->d_size);
2454
+ if (IS_ERR(obj->btf)) {
2455
+ err = PTR_ERR(obj->btf);
2456
+ obj->btf = NULL;
2457
+ pr_warn("Error loading ELF section %s: %d.\n",
2458
+ BTF_ELF_SEC, err);
2459
+ goto out;
2460
+ }
2461
+ /* enforce 8-byte pointers for BPF-targeted BTFs */
2462
+ btf__set_pointer_size(obj->btf, 8);
2463
+ err = 0;
2464
+ }
2465
+ if (btf_ext_data) {
2466
+ if (!obj->btf) {
2467
+ pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n",
2468
+ BTF_EXT_ELF_SEC, BTF_ELF_SEC);
2469
+ goto out;
2470
+ }
2471
+ obj->btf_ext = btf_ext__new(btf_ext_data->d_buf,
2472
+ btf_ext_data->d_size);
2473
+ if (IS_ERR(obj->btf_ext)) {
2474
+ pr_warn("Error loading ELF section %s: %ld. Ignored and continue.\n",
2475
+ BTF_EXT_ELF_SEC, PTR_ERR(obj->btf_ext));
2476
+ obj->btf_ext = NULL;
2477
+ goto out;
2478
+ }
2479
+ }
2480
+out:
2481
+ if (err && libbpf_needs_btf(obj)) {
2482
+ pr_warn("BTF is required, but is missing or corrupted.\n");
2483
+ return err;
2484
+ }
2485
+ return 0;
2486
+}
2487
+
2488
+static int bpf_object__finalize_btf(struct bpf_object *obj)
2489
+{
2490
+ int err;
2491
+
2492
+ if (!obj->btf)
2493
+ return 0;
2494
+
2495
+ err = btf__finalize_data(obj, obj->btf);
2496
+ if (err) {
2497
+ pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err);
2498
+ return err;
2499
+ }
2500
+
2501
+ return 0;
2502
+}
2503
+
2504
+static inline bool libbpf_prog_needs_vmlinux_btf(struct bpf_program *prog)
2505
+{
2506
+ if (prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
2507
+ prog->type == BPF_PROG_TYPE_LSM)
2508
+ return true;
2509
+
2510
+ /* BPF_PROG_TYPE_TRACING programs which do not attach to other programs
2511
+ * also need vmlinux BTF
2512
+ */
2513
+ if (prog->type == BPF_PROG_TYPE_TRACING && !prog->attach_prog_fd)
7282514 return true;
7292515
7302516 return false;
7312517 }
7322518
733
-static int bpf_object__elf_collect(struct bpf_object *obj)
2519
+static int bpf_object__load_vmlinux_btf(struct bpf_object *obj)
7342520 {
735
- Elf *elf = obj->efile.elf;
736
- GElf_Ehdr *ep = &obj->efile.ehdr;
737
- Elf_Scn *scn = NULL;
738
- int idx = 0, err = 0;
2521
+ bool need_vmlinux_btf = false;
2522
+ struct bpf_program *prog;
2523
+ int i, err;
7392524
740
- /* Elf is corrupted/truncated, avoid calling elf_strptr. */
741
- if (!elf_rawdata(elf_getscn(elf, ep->e_shstrndx), NULL)) {
742
- pr_warning("failed to get e_shstrndx from %s\n",
743
- obj->path);
744
- return -LIBBPF_ERRNO__FORMAT;
2525
+ /* CO-RE relocations need kernel BTF */
2526
+ if (obj->btf_ext && obj->btf_ext->core_relo_info.len)
2527
+ need_vmlinux_btf = true;
2528
+
2529
+ /* Support for typed ksyms needs kernel BTF */
2530
+ for (i = 0; i < obj->nr_extern; i++) {
2531
+ const struct extern_desc *ext;
2532
+
2533
+ ext = &obj->externs[i];
2534
+ if (ext->type == EXT_KSYM && ext->ksym.type_id) {
2535
+ need_vmlinux_btf = true;
2536
+ break;
2537
+ }
7452538 }
7462539
747
- while ((scn = elf_nextscn(elf, scn)) != NULL) {
748
- char *name;
749
- GElf_Shdr sh;
750
- Elf_Data *data;
751
-
752
- idx++;
753
- if (gelf_getshdr(scn, &sh) != &sh) {
754
- pr_warning("failed to get section(%d) header from %s\n",
755
- idx, obj->path);
756
- err = -LIBBPF_ERRNO__FORMAT;
757
- goto out;
2540
+ bpf_object__for_each_program(prog, obj) {
2541
+ if (!prog->load)
2542
+ continue;
2543
+ if (libbpf_prog_needs_vmlinux_btf(prog)) {
2544
+ need_vmlinux_btf = true;
2545
+ break;
7582546 }
759
-
760
- name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name);
761
- if (!name) {
762
- pr_warning("failed to get section(%d) name from %s\n",
763
- idx, obj->path);
764
- err = -LIBBPF_ERRNO__FORMAT;
765
- goto out;
766
- }
767
-
768
- data = elf_getdata(scn, 0);
769
- if (!data) {
770
- pr_warning("failed to get section(%d) data from %s(%s)\n",
771
- idx, name, obj->path);
772
- err = -LIBBPF_ERRNO__FORMAT;
773
- goto out;
774
- }
775
- pr_debug("section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",
776
- idx, name, (unsigned long)data->d_size,
777
- (int)sh.sh_link, (unsigned long)sh.sh_flags,
778
- (int)sh.sh_type);
779
-
780
- if (strcmp(name, "license") == 0)
781
- err = bpf_object__init_license(obj,
782
- data->d_buf,
783
- data->d_size);
784
- else if (strcmp(name, "version") == 0)
785
- err = bpf_object__init_kversion(obj,
786
- data->d_buf,
787
- data->d_size);
788
- else if (strcmp(name, "maps") == 0)
789
- obj->efile.maps_shndx = idx;
790
- else if (strcmp(name, BTF_ELF_SEC) == 0) {
791
- obj->btf = btf__new(data->d_buf, data->d_size,
792
- __pr_debug);
793
- if (IS_ERR(obj->btf)) {
794
- pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n",
795
- BTF_ELF_SEC, PTR_ERR(obj->btf));
796
- obj->btf = NULL;
797
- }
798
- } else if (sh.sh_type == SHT_SYMTAB) {
799
- if (obj->efile.symbols) {
800
- pr_warning("bpf: multiple SYMTAB in %s\n",
801
- obj->path);
802
- err = -LIBBPF_ERRNO__FORMAT;
803
- } else {
804
- obj->efile.symbols = data;
805
- obj->efile.strtabidx = sh.sh_link;
806
- }
807
- } else if ((sh.sh_type == SHT_PROGBITS) &&
808
- (sh.sh_flags & SHF_EXECINSTR) &&
809
- (data->d_size > 0)) {
810
- if (strcmp(name, ".text") == 0)
811
- obj->efile.text_shndx = idx;
812
- err = bpf_object__add_program(obj, data->d_buf,
813
- data->d_size, name, idx);
814
- if (err) {
815
- char errmsg[STRERR_BUFSIZE];
816
- char *cp = str_error(-err, errmsg, sizeof(errmsg));
817
-
818
- pr_warning("failed to alloc program %s (%s): %s",
819
- name, obj->path, cp);
820
- }
821
- } else if (sh.sh_type == SHT_REL) {
822
- void *reloc = obj->efile.reloc;
823
- int nr_reloc = obj->efile.nr_reloc + 1;
824
- int sec = sh.sh_info; /* points to other section */
825
-
826
- /* Only do relo for section with exec instructions */
827
- if (!section_have_execinstr(obj, sec)) {
828
- pr_debug("skip relo %s(%d) for section(%d)\n",
829
- name, idx, sec);
830
- continue;
831
- }
832
-
833
- reloc = reallocarray(reloc, nr_reloc,
834
- sizeof(*obj->efile.reloc));
835
- if (!reloc) {
836
- pr_warning("realloc failed\n");
837
- err = -ENOMEM;
838
- } else {
839
- int n = nr_reloc - 1;
840
-
841
- obj->efile.reloc = reloc;
842
- obj->efile.nr_reloc = nr_reloc;
843
-
844
- obj->efile.reloc[n].shdr = sh;
845
- obj->efile.reloc[n].data = data;
846
- }
847
- } else {
848
- pr_debug("skip section(%d) %s\n", idx, name);
849
- }
850
- if (err)
851
- goto out;
8522547 }
8532548
854
- if (!obj->efile.strtabidx || obj->efile.strtabidx >= idx) {
855
- pr_warning("Corrupted ELF file: index of strtab invalid\n");
856
- return LIBBPF_ERRNO__FORMAT;
2549
+ if (!need_vmlinux_btf)
2550
+ return 0;
2551
+
2552
+ obj->btf_vmlinux = libbpf_find_kernel_btf();
2553
+ if (IS_ERR(obj->btf_vmlinux)) {
2554
+ err = PTR_ERR(obj->btf_vmlinux);
2555
+ pr_warn("Error loading vmlinux BTF: %d\n", err);
2556
+ obj->btf_vmlinux = NULL;
2557
+ return err;
8572558 }
858
- if (obj->efile.maps_shndx >= 0) {
859
- err = bpf_object__init_maps(obj);
860
- if (err)
861
- goto out;
2559
+ return 0;
2560
+}
2561
+
2562
+static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
2563
+{
2564
+ struct btf *kern_btf = obj->btf;
2565
+ bool btf_mandatory, sanitize;
2566
+ int err = 0;
2567
+
2568
+ if (!obj->btf)
2569
+ return 0;
2570
+
2571
+ if (!kernel_supports(FEAT_BTF)) {
2572
+ if (kernel_needs_btf(obj)) {
2573
+ err = -EOPNOTSUPP;
2574
+ goto report;
2575
+ }
2576
+ pr_debug("Kernel doesn't support BTF, skipping uploading it.\n");
2577
+ return 0;
8622578 }
863
- err = bpf_object__init_prog_names(obj);
864
-out:
2579
+
2580
+ sanitize = btf_needs_sanitization(obj);
2581
+ if (sanitize) {
2582
+ const void *raw_data;
2583
+ __u32 sz;
2584
+
2585
+ /* clone BTF to sanitize a copy and leave the original intact */
2586
+ raw_data = btf__get_raw_data(obj->btf, &sz);
2587
+ kern_btf = btf__new(raw_data, sz);
2588
+ if (IS_ERR(kern_btf))
2589
+ return PTR_ERR(kern_btf);
2590
+
2591
+ /* enforce 8-byte pointers for BPF-targeted BTFs */
2592
+ btf__set_pointer_size(obj->btf, 8);
2593
+ bpf_object__sanitize_btf(obj, kern_btf);
2594
+ }
2595
+
2596
+ err = btf__load(kern_btf);
2597
+ if (sanitize) {
2598
+ if (!err) {
2599
+ /* move fd to libbpf's BTF */
2600
+ btf__set_fd(obj->btf, btf__fd(kern_btf));
2601
+ btf__set_fd(kern_btf, -1);
2602
+ }
2603
+ btf__free(kern_btf);
2604
+ }
2605
+report:
2606
+ if (err) {
2607
+ btf_mandatory = kernel_needs_btf(obj);
2608
+ pr_warn("Error loading .BTF into kernel: %d. %s\n", err,
2609
+ btf_mandatory ? "BTF is mandatory, can't proceed."
2610
+ : "BTF is optional, ignoring.");
2611
+ if (!btf_mandatory)
2612
+ err = 0;
2613
+ }
8652614 return err;
8662615 }
8672616
868
-static struct bpf_program *
869
-bpf_object__find_prog_by_idx(struct bpf_object *obj, int idx)
2617
+static const char *elf_sym_str(const struct bpf_object *obj, size_t off)
8702618 {
871
- struct bpf_program *prog;
872
- size_t i;
2619
+ const char *name;
8732620
874
- for (i = 0; i < obj->nr_programs; i++) {
875
- prog = &obj->programs[i];
876
- if (prog->idx == idx)
877
- return prog;
2621
+ name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, off);
2622
+ if (!name) {
2623
+ pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
2624
+ off, obj->path, elf_errmsg(-1));
2625
+ return NULL;
2626
+ }
2627
+
2628
+ return name;
2629
+}
2630
+
2631
+static const char *elf_sec_str(const struct bpf_object *obj, size_t off)
2632
+{
2633
+ const char *name;
2634
+
2635
+ name = elf_strptr(obj->efile.elf, obj->efile.shstrndx, off);
2636
+ if (!name) {
2637
+ pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
2638
+ off, obj->path, elf_errmsg(-1));
2639
+ return NULL;
2640
+ }
2641
+
2642
+ return name;
2643
+}
2644
+
2645
+static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx)
2646
+{
2647
+ Elf_Scn *scn;
2648
+
2649
+ scn = elf_getscn(obj->efile.elf, idx);
2650
+ if (!scn) {
2651
+ pr_warn("elf: failed to get section(%zu) from %s: %s\n",
2652
+ idx, obj->path, elf_errmsg(-1));
2653
+ return NULL;
2654
+ }
2655
+ return scn;
2656
+}
2657
+
2658
+static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name)
2659
+{
2660
+ Elf_Scn *scn = NULL;
2661
+ Elf *elf = obj->efile.elf;
2662
+ const char *sec_name;
2663
+
2664
+ while ((scn = elf_nextscn(elf, scn)) != NULL) {
2665
+ sec_name = elf_sec_name(obj, scn);
2666
+ if (!sec_name)
2667
+ return NULL;
2668
+
2669
+ if (strcmp(sec_name, name) != 0)
2670
+ continue;
2671
+
2672
+ return scn;
8782673 }
8792674 return NULL;
8802675 }
8812676
2677
+static int elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn, GElf_Shdr *hdr)
2678
+{
2679
+ if (!scn)
2680
+ return -EINVAL;
2681
+
2682
+ if (gelf_getshdr(scn, hdr) != hdr) {
2683
+ pr_warn("elf: failed to get section(%zu) header from %s: %s\n",
2684
+ elf_ndxscn(scn), obj->path, elf_errmsg(-1));
2685
+ return -EINVAL;
2686
+ }
2687
+
2688
+ return 0;
2689
+}
2690
+
2691
+static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn)
2692
+{
2693
+ const char *name;
2694
+ GElf_Shdr sh;
2695
+
2696
+ if (!scn)
2697
+ return NULL;
2698
+
2699
+ if (elf_sec_hdr(obj, scn, &sh))
2700
+ return NULL;
2701
+
2702
+ name = elf_sec_str(obj, sh.sh_name);
2703
+ if (!name) {
2704
+ pr_warn("elf: failed to get section(%zu) name from %s: %s\n",
2705
+ elf_ndxscn(scn), obj->path, elf_errmsg(-1));
2706
+ return NULL;
2707
+ }
2708
+
2709
+ return name;
2710
+}
2711
+
2712
+static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn)
2713
+{
2714
+ Elf_Data *data;
2715
+
2716
+ if (!scn)
2717
+ return NULL;
2718
+
2719
+ data = elf_getdata(scn, 0);
2720
+ if (!data) {
2721
+ pr_warn("elf: failed to get section(%zu) %s data from %s: %s\n",
2722
+ elf_ndxscn(scn), elf_sec_name(obj, scn) ?: "<?>",
2723
+ obj->path, elf_errmsg(-1));
2724
+ return NULL;
2725
+ }
2726
+
2727
+ return data;
2728
+}
2729
+
2730
+static int elf_sym_by_sec_off(const struct bpf_object *obj, size_t sec_idx,
2731
+ size_t off, __u32 sym_type, GElf_Sym *sym)
2732
+{
2733
+ Elf_Data *symbols = obj->efile.symbols;
2734
+ size_t n = symbols->d_size / sizeof(GElf_Sym);
2735
+ int i;
2736
+
2737
+ for (i = 0; i < n; i++) {
2738
+ if (!gelf_getsym(symbols, i, sym))
2739
+ continue;
2740
+ if (sym->st_shndx != sec_idx || sym->st_value != off)
2741
+ continue;
2742
+ if (GELF_ST_TYPE(sym->st_info) != sym_type)
2743
+ continue;
2744
+ return 0;
2745
+ }
2746
+
2747
+ return -ENOENT;
2748
+}
2749
+
2750
+static bool is_sec_name_dwarf(const char *name)
2751
+{
2752
+ /* approximation, but the actual list is too long */
2753
+ return strncmp(name, ".debug_", sizeof(".debug_") - 1) == 0;
2754
+}
2755
+
2756
+static bool ignore_elf_section(GElf_Shdr *hdr, const char *name)
2757
+{
2758
+ /* no special handling of .strtab */
2759
+ if (hdr->sh_type == SHT_STRTAB)
2760
+ return true;
2761
+
2762
+ /* ignore .llvm_addrsig section as well */
2763
+ if (hdr->sh_type == 0x6FFF4C03 /* SHT_LLVM_ADDRSIG */)
2764
+ return true;
2765
+
2766
+ /* no subprograms will lead to an empty .text section, ignore it */
2767
+ if (hdr->sh_type == SHT_PROGBITS && hdr->sh_size == 0 &&
2768
+ strcmp(name, ".text") == 0)
2769
+ return true;
2770
+
2771
+ /* DWARF sections */
2772
+ if (is_sec_name_dwarf(name))
2773
+ return true;
2774
+
2775
+ if (strncmp(name, ".rel", sizeof(".rel") - 1) == 0) {
2776
+ name += sizeof(".rel") - 1;
2777
+ /* DWARF section relocations */
2778
+ if (is_sec_name_dwarf(name))
2779
+ return true;
2780
+
2781
+ /* .BTF and .BTF.ext don't need relocations */
2782
+ if (strcmp(name, BTF_ELF_SEC) == 0 ||
2783
+ strcmp(name, BTF_EXT_ELF_SEC) == 0)
2784
+ return true;
2785
+ }
2786
+
2787
+ return false;
2788
+}
2789
+
2790
+static int cmp_progs(const void *_a, const void *_b)
2791
+{
2792
+ const struct bpf_program *a = _a;
2793
+ const struct bpf_program *b = _b;
2794
+
2795
+ if (a->sec_idx != b->sec_idx)
2796
+ return a->sec_idx < b->sec_idx ? -1 : 1;
2797
+
2798
+ /* sec_insn_off can't be the same within the section */
2799
+ return a->sec_insn_off < b->sec_insn_off ? -1 : 1;
2800
+}
2801
+
2802
+static int bpf_object__elf_collect(struct bpf_object *obj)
2803
+{
2804
+ Elf *elf = obj->efile.elf;
2805
+ Elf_Data *btf_ext_data = NULL;
2806
+ Elf_Data *btf_data = NULL;
2807
+ int idx = 0, err = 0;
2808
+ const char *name;
2809
+ Elf_Data *data;
2810
+ Elf_Scn *scn;
2811
+ GElf_Shdr sh;
2812
+
2813
+ /* a bunch of ELF parsing functionality depends on processing symbols,
2814
+ * so do the first pass and find the symbol table
2815
+ */
2816
+ scn = NULL;
2817
+ while ((scn = elf_nextscn(elf, scn)) != NULL) {
2818
+ if (elf_sec_hdr(obj, scn, &sh))
2819
+ return -LIBBPF_ERRNO__FORMAT;
2820
+
2821
+ if (sh.sh_type == SHT_SYMTAB) {
2822
+ if (obj->efile.symbols) {
2823
+ pr_warn("elf: multiple symbol tables in %s\n", obj->path);
2824
+ return -LIBBPF_ERRNO__FORMAT;
2825
+ }
2826
+
2827
+ data = elf_sec_data(obj, scn);
2828
+ if (!data)
2829
+ return -LIBBPF_ERRNO__FORMAT;
2830
+
2831
+ obj->efile.symbols = data;
2832
+ obj->efile.symbols_shndx = elf_ndxscn(scn);
2833
+ obj->efile.strtabidx = sh.sh_link;
2834
+ }
2835
+ }
2836
+
2837
+ scn = NULL;
2838
+ while ((scn = elf_nextscn(elf, scn)) != NULL) {
2839
+ idx++;
2840
+
2841
+ if (elf_sec_hdr(obj, scn, &sh))
2842
+ return -LIBBPF_ERRNO__FORMAT;
2843
+
2844
+ name = elf_sec_str(obj, sh.sh_name);
2845
+ if (!name)
2846
+ return -LIBBPF_ERRNO__FORMAT;
2847
+
2848
+ if (ignore_elf_section(&sh, name))
2849
+ continue;
2850
+
2851
+ data = elf_sec_data(obj, scn);
2852
+ if (!data)
2853
+ return -LIBBPF_ERRNO__FORMAT;
2854
+
2855
+ pr_debug("elf: section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",
2856
+ idx, name, (unsigned long)data->d_size,
2857
+ (int)sh.sh_link, (unsigned long)sh.sh_flags,
2858
+ (int)sh.sh_type);
2859
+
2860
+ if (strcmp(name, "license") == 0) {
2861
+ err = bpf_object__init_license(obj, data->d_buf, data->d_size);
2862
+ if (err)
2863
+ return err;
2864
+ } else if (strcmp(name, "version") == 0) {
2865
+ err = bpf_object__init_kversion(obj, data->d_buf, data->d_size);
2866
+ if (err)
2867
+ return err;
2868
+ } else if (strcmp(name, "maps") == 0) {
2869
+ obj->efile.maps_shndx = idx;
2870
+ } else if (strcmp(name, MAPS_ELF_SEC) == 0) {
2871
+ obj->efile.btf_maps_shndx = idx;
2872
+ } else if (strcmp(name, BTF_ELF_SEC) == 0) {
2873
+ btf_data = data;
2874
+ } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) {
2875
+ btf_ext_data = data;
2876
+ } else if (sh.sh_type == SHT_SYMTAB) {
2877
+ /* already processed during the first pass above */
2878
+ } else if (sh.sh_type == SHT_PROGBITS && data->d_size > 0) {
2879
+ if (sh.sh_flags & SHF_EXECINSTR) {
2880
+ if (strcmp(name, ".text") == 0)
2881
+ obj->efile.text_shndx = idx;
2882
+ err = bpf_object__add_programs(obj, data, name, idx);
2883
+ if (err)
2884
+ return err;
2885
+ } else if (strcmp(name, DATA_SEC) == 0) {
2886
+ obj->efile.data = data;
2887
+ obj->efile.data_shndx = idx;
2888
+ } else if (strcmp(name, RODATA_SEC) == 0) {
2889
+ obj->efile.rodata = data;
2890
+ obj->efile.rodata_shndx = idx;
2891
+ } else if (strcmp(name, STRUCT_OPS_SEC) == 0) {
2892
+ obj->efile.st_ops_data = data;
2893
+ obj->efile.st_ops_shndx = idx;
2894
+ } else {
2895
+ pr_info("elf: skipping unrecognized data section(%d) %s\n",
2896
+ idx, name);
2897
+ }
2898
+ } else if (sh.sh_type == SHT_REL) {
2899
+ int nr_sects = obj->efile.nr_reloc_sects;
2900
+ void *sects = obj->efile.reloc_sects;
2901
+ int sec = sh.sh_info; /* points to other section */
2902
+
2903
+ /* Only do relo for section with exec instructions */
2904
+ if (!section_have_execinstr(obj, sec) &&
2905
+ strcmp(name, ".rel" STRUCT_OPS_SEC) &&
2906
+ strcmp(name, ".rel" MAPS_ELF_SEC)) {
2907
+ pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n",
2908
+ idx, name, sec,
2909
+ elf_sec_name(obj, elf_sec_by_idx(obj, sec)) ?: "<?>");
2910
+ continue;
2911
+ }
2912
+
2913
+ sects = libbpf_reallocarray(sects, nr_sects + 1,
2914
+ sizeof(*obj->efile.reloc_sects));
2915
+ if (!sects)
2916
+ return -ENOMEM;
2917
+
2918
+ obj->efile.reloc_sects = sects;
2919
+ obj->efile.nr_reloc_sects++;
2920
+
2921
+ obj->efile.reloc_sects[nr_sects].shdr = sh;
2922
+ obj->efile.reloc_sects[nr_sects].data = data;
2923
+ } else if (sh.sh_type == SHT_NOBITS && strcmp(name, BSS_SEC) == 0) {
2924
+ obj->efile.bss = data;
2925
+ obj->efile.bss_shndx = idx;
2926
+ } else {
2927
+ pr_info("elf: skipping section(%d) %s (size %zu)\n", idx, name,
2928
+ (size_t)sh.sh_size);
2929
+ }
2930
+ }
2931
+
2932
+ if (!obj->efile.strtabidx || obj->efile.strtabidx > idx) {
2933
+ pr_warn("elf: symbol strings section missing or invalid in %s\n", obj->path);
2934
+ return -LIBBPF_ERRNO__FORMAT;
2935
+ }
2936
+
2937
+ /* sort BPF programs by section name and in-section instruction offset
2938
+ * for faster search */
2939
+ qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs);
2940
+
2941
+ return bpf_object__init_btf(obj, btf_data, btf_ext_data);
2942
+}
2943
+
2944
+static bool sym_is_extern(const GElf_Sym *sym)
2945
+{
2946
+ int bind = GELF_ST_BIND(sym->st_info);
2947
+ /* externs are symbols w/ type=NOTYPE, bind=GLOBAL|WEAK, section=UND */
2948
+ return sym->st_shndx == SHN_UNDEF &&
2949
+ (bind == STB_GLOBAL || bind == STB_WEAK) &&
2950
+ GELF_ST_TYPE(sym->st_info) == STT_NOTYPE;
2951
+}
2952
+
2953
+static int find_extern_btf_id(const struct btf *btf, const char *ext_name)
2954
+{
2955
+ const struct btf_type *t;
2956
+ const char *var_name;
2957
+ int i, n;
2958
+
2959
+ if (!btf)
2960
+ return -ESRCH;
2961
+
2962
+ n = btf__get_nr_types(btf);
2963
+ for (i = 1; i <= n; i++) {
2964
+ t = btf__type_by_id(btf, i);
2965
+
2966
+ if (!btf_is_var(t))
2967
+ continue;
2968
+
2969
+ var_name = btf__name_by_offset(btf, t->name_off);
2970
+ if (strcmp(var_name, ext_name))
2971
+ continue;
2972
+
2973
+ if (btf_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN)
2974
+ return -EINVAL;
2975
+
2976
+ return i;
2977
+ }
2978
+
2979
+ return -ENOENT;
2980
+}
2981
+
2982
+static int find_extern_sec_btf_id(struct btf *btf, int ext_btf_id) {
2983
+ const struct btf_var_secinfo *vs;
2984
+ const struct btf_type *t;
2985
+ int i, j, n;
2986
+
2987
+ if (!btf)
2988
+ return -ESRCH;
2989
+
2990
+ n = btf__get_nr_types(btf);
2991
+ for (i = 1; i <= n; i++) {
2992
+ t = btf__type_by_id(btf, i);
2993
+
2994
+ if (!btf_is_datasec(t))
2995
+ continue;
2996
+
2997
+ vs = btf_var_secinfos(t);
2998
+ for (j = 0; j < btf_vlen(t); j++, vs++) {
2999
+ if (vs->type == ext_btf_id)
3000
+ return i;
3001
+ }
3002
+ }
3003
+
3004
+ return -ENOENT;
3005
+}
3006
+
3007
+static enum kcfg_type find_kcfg_type(const struct btf *btf, int id,
3008
+ bool *is_signed)
3009
+{
3010
+ const struct btf_type *t;
3011
+ const char *name;
3012
+
3013
+ t = skip_mods_and_typedefs(btf, id, NULL);
3014
+ name = btf__name_by_offset(btf, t->name_off);
3015
+
3016
+ if (is_signed)
3017
+ *is_signed = false;
3018
+ switch (btf_kind(t)) {
3019
+ case BTF_KIND_INT: {
3020
+ int enc = btf_int_encoding(t);
3021
+
3022
+ if (enc & BTF_INT_BOOL)
3023
+ return t->size == 1 ? KCFG_BOOL : KCFG_UNKNOWN;
3024
+ if (is_signed)
3025
+ *is_signed = enc & BTF_INT_SIGNED;
3026
+ if (t->size == 1)
3027
+ return KCFG_CHAR;
3028
+ if (t->size < 1 || t->size > 8 || (t->size & (t->size - 1)))
3029
+ return KCFG_UNKNOWN;
3030
+ return KCFG_INT;
3031
+ }
3032
+ case BTF_KIND_ENUM:
3033
+ if (t->size != 4)
3034
+ return KCFG_UNKNOWN;
3035
+ if (strcmp(name, "libbpf_tristate"))
3036
+ return KCFG_UNKNOWN;
3037
+ return KCFG_TRISTATE;
3038
+ case BTF_KIND_ARRAY:
3039
+ if (btf_array(t)->nelems == 0)
3040
+ return KCFG_UNKNOWN;
3041
+ if (find_kcfg_type(btf, btf_array(t)->type, NULL) != KCFG_CHAR)
3042
+ return KCFG_UNKNOWN;
3043
+ return KCFG_CHAR_ARR;
3044
+ default:
3045
+ return KCFG_UNKNOWN;
3046
+ }
3047
+}
3048
+
3049
+static int cmp_externs(const void *_a, const void *_b)
3050
+{
3051
+ const struct extern_desc *a = _a;
3052
+ const struct extern_desc *b = _b;
3053
+
3054
+ if (a->type != b->type)
3055
+ return a->type < b->type ? -1 : 1;
3056
+
3057
+ if (a->type == EXT_KCFG) {
3058
+ /* descending order by alignment requirements */
3059
+ if (a->kcfg.align != b->kcfg.align)
3060
+ return a->kcfg.align > b->kcfg.align ? -1 : 1;
3061
+ /* ascending order by size, within same alignment class */
3062
+ if (a->kcfg.sz != b->kcfg.sz)
3063
+ return a->kcfg.sz < b->kcfg.sz ? -1 : 1;
3064
+ }
3065
+
3066
+ /* resolve ties by name */
3067
+ return strcmp(a->name, b->name);
3068
+}
3069
+
3070
+static int find_int_btf_id(const struct btf *btf)
3071
+{
3072
+ const struct btf_type *t;
3073
+ int i, n;
3074
+
3075
+ n = btf__get_nr_types(btf);
3076
+ for (i = 1; i <= n; i++) {
3077
+ t = btf__type_by_id(btf, i);
3078
+
3079
+ if (btf_is_int(t) && btf_int_bits(t) == 32)
3080
+ return i;
3081
+ }
3082
+
3083
+ return 0;
3084
+}
3085
+
3086
+static int bpf_object__collect_externs(struct bpf_object *obj)
3087
+{
3088
+ struct btf_type *sec, *kcfg_sec = NULL, *ksym_sec = NULL;
3089
+ const struct btf_type *t;
3090
+ struct extern_desc *ext;
3091
+ int i, n, off;
3092
+ const char *ext_name, *sec_name;
3093
+ Elf_Scn *scn;
3094
+ GElf_Shdr sh;
3095
+
3096
+ if (!obj->efile.symbols)
3097
+ return 0;
3098
+
3099
+ scn = elf_sec_by_idx(obj, obj->efile.symbols_shndx);
3100
+ if (elf_sec_hdr(obj, scn, &sh))
3101
+ return -LIBBPF_ERRNO__FORMAT;
3102
+
3103
+ n = sh.sh_size / sh.sh_entsize;
3104
+ pr_debug("looking for externs among %d symbols...\n", n);
3105
+
3106
+ for (i = 0; i < n; i++) {
3107
+ GElf_Sym sym;
3108
+
3109
+ if (!gelf_getsym(obj->efile.symbols, i, &sym))
3110
+ return -LIBBPF_ERRNO__FORMAT;
3111
+ if (!sym_is_extern(&sym))
3112
+ continue;
3113
+ ext_name = elf_sym_str(obj, sym.st_name);
3114
+ if (!ext_name || !ext_name[0])
3115
+ continue;
3116
+
3117
+ ext = obj->externs;
3118
+ ext = libbpf_reallocarray(ext, obj->nr_extern + 1, sizeof(*ext));
3119
+ if (!ext)
3120
+ return -ENOMEM;
3121
+ obj->externs = ext;
3122
+ ext = &ext[obj->nr_extern];
3123
+ memset(ext, 0, sizeof(*ext));
3124
+ obj->nr_extern++;
3125
+
3126
+ ext->btf_id = find_extern_btf_id(obj->btf, ext_name);
3127
+ if (ext->btf_id <= 0) {
3128
+ pr_warn("failed to find BTF for extern '%s': %d\n",
3129
+ ext_name, ext->btf_id);
3130
+ return ext->btf_id;
3131
+ }
3132
+ t = btf__type_by_id(obj->btf, ext->btf_id);
3133
+ ext->name = btf__name_by_offset(obj->btf, t->name_off);
3134
+ ext->sym_idx = i;
3135
+ ext->is_weak = GELF_ST_BIND(sym.st_info) == STB_WEAK;
3136
+
3137
+ ext->sec_btf_id = find_extern_sec_btf_id(obj->btf, ext->btf_id);
3138
+ if (ext->sec_btf_id <= 0) {
3139
+ pr_warn("failed to find BTF for extern '%s' [%d] section: %d\n",
3140
+ ext_name, ext->btf_id, ext->sec_btf_id);
3141
+ return ext->sec_btf_id;
3142
+ }
3143
+ sec = (void *)btf__type_by_id(obj->btf, ext->sec_btf_id);
3144
+ sec_name = btf__name_by_offset(obj->btf, sec->name_off);
3145
+
3146
+ if (strcmp(sec_name, KCONFIG_SEC) == 0) {
3147
+ kcfg_sec = sec;
3148
+ ext->type = EXT_KCFG;
3149
+ ext->kcfg.sz = btf__resolve_size(obj->btf, t->type);
3150
+ if (ext->kcfg.sz <= 0) {
3151
+ pr_warn("failed to resolve size of extern (kcfg) '%s': %d\n",
3152
+ ext_name, ext->kcfg.sz);
3153
+ return ext->kcfg.sz;
3154
+ }
3155
+ ext->kcfg.align = btf__align_of(obj->btf, t->type);
3156
+ if (ext->kcfg.align <= 0) {
3157
+ pr_warn("failed to determine alignment of extern (kcfg) '%s': %d\n",
3158
+ ext_name, ext->kcfg.align);
3159
+ return -EINVAL;
3160
+ }
3161
+ ext->kcfg.type = find_kcfg_type(obj->btf, t->type,
3162
+ &ext->kcfg.is_signed);
3163
+ if (ext->kcfg.type == KCFG_UNKNOWN) {
3164
+ pr_warn("extern (kcfg) '%s' type is unsupported\n", ext_name);
3165
+ return -ENOTSUP;
3166
+ }
3167
+ } else if (strcmp(sec_name, KSYMS_SEC) == 0) {
3168
+ ksym_sec = sec;
3169
+ ext->type = EXT_KSYM;
3170
+ skip_mods_and_typedefs(obj->btf, t->type,
3171
+ &ext->ksym.type_id);
3172
+ } else {
3173
+ pr_warn("unrecognized extern section '%s'\n", sec_name);
3174
+ return -ENOTSUP;
3175
+ }
3176
+ }
3177
+ pr_debug("collected %d externs total\n", obj->nr_extern);
3178
+
3179
+ if (!obj->nr_extern)
3180
+ return 0;
3181
+
3182
+ /* sort externs by type, for kcfg ones also by (align, size, name) */
3183
+ qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs);
3184
+
3185
+ /* for .ksyms section, we need to turn all externs into allocated
3186
+ * variables in BTF to pass kernel verification; we do this by
3187
+ * pretending that each extern is a 8-byte variable
3188
+ */
3189
+ if (ksym_sec) {
3190
+ /* find existing 4-byte integer type in BTF to use for fake
3191
+ * extern variables in DATASEC
3192
+ */
3193
+ int int_btf_id = find_int_btf_id(obj->btf);
3194
+
3195
+ for (i = 0; i < obj->nr_extern; i++) {
3196
+ ext = &obj->externs[i];
3197
+ if (ext->type != EXT_KSYM)
3198
+ continue;
3199
+ pr_debug("extern (ksym) #%d: symbol %d, name %s\n",
3200
+ i, ext->sym_idx, ext->name);
3201
+ }
3202
+
3203
+ sec = ksym_sec;
3204
+ n = btf_vlen(sec);
3205
+ for (i = 0, off = 0; i < n; i++, off += sizeof(int)) {
3206
+ struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
3207
+ struct btf_type *vt;
3208
+
3209
+ vt = (void *)btf__type_by_id(obj->btf, vs->type);
3210
+ ext_name = btf__name_by_offset(obj->btf, vt->name_off);
3211
+ ext = find_extern_by_name(obj, ext_name);
3212
+ if (!ext) {
3213
+ pr_warn("failed to find extern definition for BTF var '%s'\n",
3214
+ ext_name);
3215
+ return -ESRCH;
3216
+ }
3217
+ btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
3218
+ vt->type = int_btf_id;
3219
+ vs->offset = off;
3220
+ vs->size = sizeof(int);
3221
+ }
3222
+ sec->size = off;
3223
+ }
3224
+
3225
+ if (kcfg_sec) {
3226
+ sec = kcfg_sec;
3227
+ /* for kcfg externs calculate their offsets within a .kconfig map */
3228
+ off = 0;
3229
+ for (i = 0; i < obj->nr_extern; i++) {
3230
+ ext = &obj->externs[i];
3231
+ if (ext->type != EXT_KCFG)
3232
+ continue;
3233
+
3234
+ ext->kcfg.data_off = roundup(off, ext->kcfg.align);
3235
+ off = ext->kcfg.data_off + ext->kcfg.sz;
3236
+ pr_debug("extern (kcfg) #%d: symbol %d, off %u, name %s\n",
3237
+ i, ext->sym_idx, ext->kcfg.data_off, ext->name);
3238
+ }
3239
+ sec->size = off;
3240
+ n = btf_vlen(sec);
3241
+ for (i = 0; i < n; i++) {
3242
+ struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
3243
+
3244
+ t = btf__type_by_id(obj->btf, vs->type);
3245
+ ext_name = btf__name_by_offset(obj->btf, t->name_off);
3246
+ ext = find_extern_by_name(obj, ext_name);
3247
+ if (!ext) {
3248
+ pr_warn("failed to find extern definition for BTF var '%s'\n",
3249
+ ext_name);
3250
+ return -ESRCH;
3251
+ }
3252
+ btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
3253
+ vs->offset = ext->kcfg.data_off;
3254
+ }
3255
+ }
3256
+ return 0;
3257
+}
3258
+
8823259 struct bpf_program *
883
-bpf_object__find_program_by_title(struct bpf_object *obj, const char *title)
3260
+bpf_object__find_program_by_title(const struct bpf_object *obj,
3261
+ const char *title)
8843262 {
8853263 struct bpf_program *pos;
8863264
8873265 bpf_object__for_each_program(pos, obj) {
888
- if (pos->section_name && !strcmp(pos->section_name, title))
3266
+ if (pos->sec_name && !strcmp(pos->sec_name, title))
8893267 return pos;
8903268 }
8913269 return NULL;
8923270 }
8933271
3272
+static bool prog_is_subprog(const struct bpf_object *obj,
3273
+ const struct bpf_program *prog)
3274
+{
3275
+ /* For legacy reasons, libbpf supports an entry-point BPF programs
3276
+ * without SEC() attribute, i.e., those in the .text section. But if
3277
+ * there are 2 or more such programs in the .text section, they all
3278
+ * must be subprograms called from entry-point BPF programs in
3279
+ * designated SEC()'tions, otherwise there is no way to distinguish
3280
+ * which of those programs should be loaded vs which are a subprogram.
3281
+ * Similarly, if there is a function/program in .text and at least one
3282
+ * other BPF program with custom SEC() attribute, then we just assume
3283
+ * .text programs are subprograms (even if they are not called from
3284
+ * other programs), because libbpf never explicitly supported mixing
3285
+ * SEC()-designated BPF programs and .text entry-point BPF programs.
3286
+ */
3287
+ return prog->sec_idx == obj->efile.text_shndx && obj->nr_programs > 1;
3288
+}
3289
+
3290
+struct bpf_program *
3291
+bpf_object__find_program_by_name(const struct bpf_object *obj,
3292
+ const char *name)
3293
+{
3294
+ struct bpf_program *prog;
3295
+
3296
+ bpf_object__for_each_program(prog, obj) {
3297
+ if (prog_is_subprog(obj, prog))
3298
+ continue;
3299
+ if (!strcmp(prog->name, name))
3300
+ return prog;
3301
+ }
3302
+ return NULL;
3303
+}
3304
+
3305
+static bool bpf_object__shndx_is_data(const struct bpf_object *obj,
3306
+ int shndx)
3307
+{
3308
+ return shndx == obj->efile.data_shndx ||
3309
+ shndx == obj->efile.bss_shndx ||
3310
+ shndx == obj->efile.rodata_shndx;
3311
+}
3312
+
3313
+static bool bpf_object__shndx_is_maps(const struct bpf_object *obj,
3314
+ int shndx)
3315
+{
3316
+ return shndx == obj->efile.maps_shndx ||
3317
+ shndx == obj->efile.btf_maps_shndx;
3318
+}
3319
+
3320
+static enum libbpf_map_type
3321
+bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)
3322
+{
3323
+ if (shndx == obj->efile.data_shndx)
3324
+ return LIBBPF_MAP_DATA;
3325
+ else if (shndx == obj->efile.bss_shndx)
3326
+ return LIBBPF_MAP_BSS;
3327
+ else if (shndx == obj->efile.rodata_shndx)
3328
+ return LIBBPF_MAP_RODATA;
3329
+ else if (shndx == obj->efile.symbols_shndx)
3330
+ return LIBBPF_MAP_KCONFIG;
3331
+ else
3332
+ return LIBBPF_MAP_UNSPEC;
3333
+}
3334
+
3335
+static int bpf_program__record_reloc(struct bpf_program *prog,
3336
+ struct reloc_desc *reloc_desc,
3337
+ __u32 insn_idx, const char *sym_name,
3338
+ const GElf_Sym *sym, const GElf_Rel *rel)
3339
+{
3340
+ struct bpf_insn *insn = &prog->insns[insn_idx];
3341
+ size_t map_idx, nr_maps = prog->obj->nr_maps;
3342
+ struct bpf_object *obj = prog->obj;
3343
+ __u32 shdr_idx = sym->st_shndx;
3344
+ enum libbpf_map_type type;
3345
+ const char *sym_sec_name;
3346
+ struct bpf_map *map;
3347
+
3348
+ reloc_desc->processed = false;
3349
+
3350
+ /* sub-program call relocation */
3351
+ if (insn->code == (BPF_JMP | BPF_CALL)) {
3352
+ if (insn->src_reg != BPF_PSEUDO_CALL) {
3353
+ pr_warn("prog '%s': incorrect bpf_call opcode\n", prog->name);
3354
+ return -LIBBPF_ERRNO__RELOC;
3355
+ }
3356
+ /* text_shndx can be 0, if no default "main" program exists */
3357
+ if (!shdr_idx || shdr_idx != obj->efile.text_shndx) {
3358
+ sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
3359
+ pr_warn("prog '%s': bad call relo against '%s' in section '%s'\n",
3360
+ prog->name, sym_name, sym_sec_name);
3361
+ return -LIBBPF_ERRNO__RELOC;
3362
+ }
3363
+ if (sym->st_value % BPF_INSN_SZ) {
3364
+ pr_warn("prog '%s': bad call relo against '%s' at offset %zu\n",
3365
+ prog->name, sym_name, (size_t)sym->st_value);
3366
+ return -LIBBPF_ERRNO__RELOC;
3367
+ }
3368
+ reloc_desc->type = RELO_CALL;
3369
+ reloc_desc->insn_idx = insn_idx;
3370
+ reloc_desc->sym_off = sym->st_value;
3371
+ return 0;
3372
+ }
3373
+
3374
+ if (insn->code != (BPF_LD | BPF_IMM | BPF_DW)) {
3375
+ pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n",
3376
+ prog->name, sym_name, insn_idx, insn->code);
3377
+ return -LIBBPF_ERRNO__RELOC;
3378
+ }
3379
+
3380
+ if (sym_is_extern(sym)) {
3381
+ int sym_idx = GELF_R_SYM(rel->r_info);
3382
+ int i, n = obj->nr_extern;
3383
+ struct extern_desc *ext;
3384
+
3385
+ for (i = 0; i < n; i++) {
3386
+ ext = &obj->externs[i];
3387
+ if (ext->sym_idx == sym_idx)
3388
+ break;
3389
+ }
3390
+ if (i >= n) {
3391
+ pr_warn("prog '%s': extern relo failed to find extern for '%s' (%d)\n",
3392
+ prog->name, sym_name, sym_idx);
3393
+ return -LIBBPF_ERRNO__RELOC;
3394
+ }
3395
+ pr_debug("prog '%s': found extern #%d '%s' (sym %d) for insn #%u\n",
3396
+ prog->name, i, ext->name, ext->sym_idx, insn_idx);
3397
+ reloc_desc->type = RELO_EXTERN;
3398
+ reloc_desc->insn_idx = insn_idx;
3399
+ reloc_desc->sym_off = i; /* sym_off stores extern index */
3400
+ return 0;
3401
+ }
3402
+
3403
+ if (!shdr_idx || shdr_idx >= SHN_LORESERVE) {
3404
+ pr_warn("prog '%s': invalid relo against '%s' in special section 0x%x; forgot to initialize global var?..\n",
3405
+ prog->name, sym_name, shdr_idx);
3406
+ return -LIBBPF_ERRNO__RELOC;
3407
+ }
3408
+
3409
+ type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx);
3410
+ sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
3411
+
3412
+ /* generic map reference relocation */
3413
+ if (type == LIBBPF_MAP_UNSPEC) {
3414
+ if (!bpf_object__shndx_is_maps(obj, shdr_idx)) {
3415
+ pr_warn("prog '%s': bad map relo against '%s' in section '%s'\n",
3416
+ prog->name, sym_name, sym_sec_name);
3417
+ return -LIBBPF_ERRNO__RELOC;
3418
+ }
3419
+ for (map_idx = 0; map_idx < nr_maps; map_idx++) {
3420
+ map = &obj->maps[map_idx];
3421
+ if (map->libbpf_type != type ||
3422
+ map->sec_idx != sym->st_shndx ||
3423
+ map->sec_offset != sym->st_value)
3424
+ continue;
3425
+ pr_debug("prog '%s': found map %zd (%s, sec %d, off %zu) for insn #%u\n",
3426
+ prog->name, map_idx, map->name, map->sec_idx,
3427
+ map->sec_offset, insn_idx);
3428
+ break;
3429
+ }
3430
+ if (map_idx >= nr_maps) {
3431
+ pr_warn("prog '%s': map relo failed to find map for section '%s', off %zu\n",
3432
+ prog->name, sym_sec_name, (size_t)sym->st_value);
3433
+ return -LIBBPF_ERRNO__RELOC;
3434
+ }
3435
+ reloc_desc->type = RELO_LD64;
3436
+ reloc_desc->insn_idx = insn_idx;
3437
+ reloc_desc->map_idx = map_idx;
3438
+ reloc_desc->sym_off = 0; /* sym->st_value determines map_idx */
3439
+ return 0;
3440
+ }
3441
+
3442
+ /* global data map relocation */
3443
+ if (!bpf_object__shndx_is_data(obj, shdr_idx)) {
3444
+ pr_warn("prog '%s': bad data relo against section '%s'\n",
3445
+ prog->name, sym_sec_name);
3446
+ return -LIBBPF_ERRNO__RELOC;
3447
+ }
3448
+ for (map_idx = 0; map_idx < nr_maps; map_idx++) {
3449
+ map = &obj->maps[map_idx];
3450
+ if (map->libbpf_type != type)
3451
+ continue;
3452
+ pr_debug("prog '%s': found data map %zd (%s, sec %d, off %zu) for insn %u\n",
3453
+ prog->name, map_idx, map->name, map->sec_idx,
3454
+ map->sec_offset, insn_idx);
3455
+ break;
3456
+ }
3457
+ if (map_idx >= nr_maps) {
3458
+ pr_warn("prog '%s': data relo failed to find map for section '%s'\n",
3459
+ prog->name, sym_sec_name);
3460
+ return -LIBBPF_ERRNO__RELOC;
3461
+ }
3462
+
3463
+ reloc_desc->type = RELO_DATA;
3464
+ reloc_desc->insn_idx = insn_idx;
3465
+ reloc_desc->map_idx = map_idx;
3466
+ reloc_desc->sym_off = sym->st_value;
3467
+ return 0;
3468
+}
3469
+
3470
+static bool prog_contains_insn(const struct bpf_program *prog, size_t insn_idx)
3471
+{
3472
+ return insn_idx >= prog->sec_insn_off &&
3473
+ insn_idx < prog->sec_insn_off + prog->sec_insn_cnt;
3474
+}
3475
+
3476
+static struct bpf_program *find_prog_by_sec_insn(const struct bpf_object *obj,
3477
+ size_t sec_idx, size_t insn_idx)
3478
+{
3479
+ int l = 0, r = obj->nr_programs - 1, m;
3480
+ struct bpf_program *prog;
3481
+
3482
+ if (!obj->nr_programs)
3483
+ return NULL;
3484
+
3485
+ while (l < r) {
3486
+ m = l + (r - l + 1) / 2;
3487
+ prog = &obj->programs[m];
3488
+
3489
+ if (prog->sec_idx < sec_idx ||
3490
+ (prog->sec_idx == sec_idx && prog->sec_insn_off <= insn_idx))
3491
+ l = m;
3492
+ else
3493
+ r = m - 1;
3494
+ }
3495
+ /* matching program could be at index l, but it still might be the
3496
+ * wrong one, so we need to double check conditions for the last time
3497
+ */
3498
+ prog = &obj->programs[l];
3499
+ if (prog->sec_idx == sec_idx && prog_contains_insn(prog, insn_idx))
3500
+ return prog;
3501
+ return NULL;
3502
+}
3503
+
8943504 static int
895
-bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
896
- Elf_Data *data, struct bpf_object *obj)
3505
+bpf_object__collect_prog_relos(struct bpf_object *obj, GElf_Shdr *shdr, Elf_Data *data)
8973506 {
8983507 Elf_Data *symbols = obj->efile.symbols;
899
- int text_shndx = obj->efile.text_shndx;
900
- int maps_shndx = obj->efile.maps_shndx;
901
- struct bpf_map *maps = obj->maps;
902
- size_t nr_maps = obj->nr_maps;
903
- int i, nrels;
3508
+ const char *relo_sec_name, *sec_name;
3509
+ size_t sec_idx = shdr->sh_info;
3510
+ struct bpf_program *prog;
3511
+ struct reloc_desc *relos;
3512
+ int err, i, nrels;
3513
+ const char *sym_name;
3514
+ __u32 insn_idx;
3515
+ GElf_Sym sym;
3516
+ GElf_Rel rel;
9043517
905
- pr_debug("collecting relocating info for: '%s'\n",
906
- prog->section_name);
3518
+ relo_sec_name = elf_sec_str(obj, shdr->sh_name);
3519
+ sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
3520
+ if (!relo_sec_name || !sec_name)
3521
+ return -EINVAL;
3522
+
3523
+ pr_debug("sec '%s': collecting relocation for section(%zu) '%s'\n",
3524
+ relo_sec_name, sec_idx, sec_name);
9073525 nrels = shdr->sh_size / shdr->sh_entsize;
9083526
909
- prog->reloc_desc = malloc(sizeof(*prog->reloc_desc) * nrels);
910
- if (!prog->reloc_desc) {
911
- pr_warning("failed to alloc memory in relocation\n");
912
- return -ENOMEM;
913
- }
914
- prog->nr_reloc = nrels;
915
-
9163527 for (i = 0; i < nrels; i++) {
917
- GElf_Sym sym;
918
- GElf_Rel rel;
919
- unsigned int insn_idx;
920
- struct bpf_insn *insns = prog->insns;
921
- size_t map_idx;
922
-
9233528 if (!gelf_getrel(data, i, &rel)) {
924
- pr_warning("relocation: failed to get %d reloc\n", i);
3529
+ pr_warn("sec '%s': failed to get relo #%d\n", relo_sec_name, i);
3530
+ return -LIBBPF_ERRNO__FORMAT;
3531
+ }
3532
+ if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) {
3533
+ pr_warn("sec '%s': symbol 0x%zx not found for relo #%d\n",
3534
+ relo_sec_name, (size_t)GELF_R_SYM(rel.r_info), i);
3535
+ return -LIBBPF_ERRNO__FORMAT;
3536
+ }
3537
+ if (rel.r_offset % BPF_INSN_SZ) {
3538
+ pr_warn("sec '%s': invalid offset 0x%zx for relo #%d\n",
3539
+ relo_sec_name, (size_t)GELF_R_SYM(rel.r_info), i);
9253540 return -LIBBPF_ERRNO__FORMAT;
9263541 }
9273542
928
- if (!gelf_getsym(symbols,
929
- GELF_R_SYM(rel.r_info),
930
- &sym)) {
931
- pr_warning("relocation: symbol %"PRIx64" not found\n",
932
- GELF_R_SYM(rel.r_info));
933
- return -LIBBPF_ERRNO__FORMAT;
934
- }
935
- pr_debug("relo for %lld value %lld name %d\n",
936
- (long long) (rel.r_info >> 32),
937
- (long long) sym.st_value, sym.st_name);
3543
+ insn_idx = rel.r_offset / BPF_INSN_SZ;
3544
+ /* relocations against static functions are recorded as
3545
+ * relocations against the section that contains a function;
3546
+ * in such case, symbol will be STT_SECTION and sym.st_name
3547
+ * will point to empty string (0), so fetch section name
3548
+ * instead
3549
+ */
3550
+ if (GELF_ST_TYPE(sym.st_info) == STT_SECTION && sym.st_name == 0)
3551
+ sym_name = elf_sec_name(obj, elf_sec_by_idx(obj, sym.st_shndx));
3552
+ else
3553
+ sym_name = elf_sym_str(obj, sym.st_name);
3554
+ sym_name = sym_name ?: "<?";
9383555
939
- if (sym.st_shndx != maps_shndx && sym.st_shndx != text_shndx) {
940
- pr_warning("Program '%s' contains non-map related relo data pointing to section %u\n",
941
- prog->section_name, sym.st_shndx);
3556
+ pr_debug("sec '%s': relo #%d: insn #%u against '%s'\n",
3557
+ relo_sec_name, i, insn_idx, sym_name);
3558
+
3559
+ prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
3560
+ if (!prog) {
3561
+ pr_warn("sec '%s': relo #%d: program not found in section '%s' for insn #%u\n",
3562
+ relo_sec_name, i, sec_name, insn_idx);
9423563 return -LIBBPF_ERRNO__RELOC;
9433564 }
9443565
945
- insn_idx = rel.r_offset / sizeof(struct bpf_insn);
946
- pr_debug("relocation: insn_idx=%u\n", insn_idx);
3566
+ relos = libbpf_reallocarray(prog->reloc_desc,
3567
+ prog->nr_reloc + 1, sizeof(*relos));
3568
+ if (!relos)
3569
+ return -ENOMEM;
3570
+ prog->reloc_desc = relos;
9473571
948
- if (insns[insn_idx].code == (BPF_JMP | BPF_CALL)) {
949
- if (insns[insn_idx].src_reg != BPF_PSEUDO_CALL) {
950
- pr_warning("incorrect bpf_call opcode\n");
951
- return -LIBBPF_ERRNO__RELOC;
952
- }
953
- prog->reloc_desc[i].type = RELO_CALL;
954
- prog->reloc_desc[i].insn_idx = insn_idx;
955
- prog->reloc_desc[i].text_off = sym.st_value;
956
- obj->has_pseudo_calls = true;
957
- continue;
958
- }
3572
+ /* adjust insn_idx to local BPF program frame of reference */
3573
+ insn_idx -= prog->sec_insn_off;
3574
+ err = bpf_program__record_reloc(prog, &relos[prog->nr_reloc],
3575
+ insn_idx, sym_name, &sym, &rel);
3576
+ if (err)
3577
+ return err;
9593578
960
- if (insns[insn_idx].code != (BPF_LD | BPF_IMM | BPF_DW)) {
961
- pr_warning("bpf: relocation: invalid relo for insns[%d].code 0x%x\n",
962
- insn_idx, insns[insn_idx].code);
963
- return -LIBBPF_ERRNO__RELOC;
964
- }
965
-
966
- /* TODO: 'maps' is sorted. We can use bsearch to make it faster. */
967
- for (map_idx = 0; map_idx < nr_maps; map_idx++) {
968
- if (maps[map_idx].offset == sym.st_value) {
969
- pr_debug("relocation: find map %zd (%s) for insn %u\n",
970
- map_idx, maps[map_idx].name, insn_idx);
971
- break;
972
- }
973
- }
974
-
975
- if (map_idx >= nr_maps) {
976
- pr_warning("bpf relocation: map_idx %d large than %d\n",
977
- (int)map_idx, (int)nr_maps - 1);
978
- return -LIBBPF_ERRNO__RELOC;
979
- }
980
-
981
- prog->reloc_desc[i].type = RELO_LD64;
982
- prog->reloc_desc[i].insn_idx = insn_idx;
983
- prog->reloc_desc[i].map_idx = map_idx;
3579
+ prog->nr_reloc++;
9843580 }
9853581 return 0;
9863582 }
9873583
988
-static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf)
3584
+static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map)
9893585 {
990
- const struct btf_type *container_type;
991
- const struct btf_member *key, *value;
9923586 struct bpf_map_def *def = &map->def;
993
- const size_t max_name = 256;
994
- char container_name[max_name];
995
- __s64 key_size, value_size;
996
- __s32 container_id;
3587
+ __u32 key_type_id = 0, value_type_id = 0;
3588
+ int ret;
9973589
998
- if (snprintf(container_name, max_name, "____btf_map_%s", map->name) ==
999
- max_name) {
1000
- pr_warning("map:%s length of '____btf_map_%s' is too long\n",
1001
- map->name, map->name);
1002
- return -EINVAL;
3590
+ /* if it's BTF-defined map, we don't need to search for type IDs.
3591
+ * For struct_ops map, it does not need btf_key_type_id and
3592
+ * btf_value_type_id.
3593
+ */
3594
+ if (map->sec_idx == obj->efile.btf_maps_shndx ||
3595
+ bpf_map__is_struct_ops(map))
3596
+ return 0;
3597
+
3598
+ if (!bpf_map__is_internal(map)) {
3599
+ ret = btf__get_map_kv_tids(obj->btf, map->name, def->key_size,
3600
+ def->value_size, &key_type_id,
3601
+ &value_type_id);
3602
+ } else {
3603
+ /*
3604
+ * LLVM annotates global data differently in BTF, that is,
3605
+ * only as '.data', '.bss' or '.rodata'.
3606
+ */
3607
+ ret = btf__find_by_name(obj->btf,
3608
+ libbpf_type_to_btf_name[map->libbpf_type]);
3609
+ }
3610
+ if (ret < 0)
3611
+ return ret;
3612
+
3613
+ map->btf_key_type_id = key_type_id;
3614
+ map->btf_value_type_id = bpf_map__is_internal(map) ?
3615
+ ret : value_type_id;
3616
+ return 0;
3617
+}
3618
+
3619
+static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info)
3620
+{
3621
+ char file[PATH_MAX], buff[4096];
3622
+ FILE *fp;
3623
+ __u32 val;
3624
+ int err;
3625
+
3626
+ snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
3627
+ memset(info, 0, sizeof(*info));
3628
+
3629
+ fp = fopen(file, "r");
3630
+ if (!fp) {
3631
+ err = -errno;
3632
+ pr_warn("failed to open %s: %d. No procfs support?\n", file,
3633
+ err);
3634
+ return err;
10033635 }
10043636
1005
- container_id = btf__find_by_name(btf, container_name);
1006
- if (container_id < 0) {
1007
- pr_debug("map:%s container_name:%s cannot be found in BTF. Missing BPF_ANNOTATE_KV_PAIR?\n",
1008
- map->name, container_name);
1009
- return container_id;
3637
+ while (fgets(buff, sizeof(buff), fp)) {
3638
+ if (sscanf(buff, "map_type:\t%u", &val) == 1)
3639
+ info->type = val;
3640
+ else if (sscanf(buff, "key_size:\t%u", &val) == 1)
3641
+ info->key_size = val;
3642
+ else if (sscanf(buff, "value_size:\t%u", &val) == 1)
3643
+ info->value_size = val;
3644
+ else if (sscanf(buff, "max_entries:\t%u", &val) == 1)
3645
+ info->max_entries = val;
3646
+ else if (sscanf(buff, "map_flags:\t%i", &val) == 1)
3647
+ info->map_flags = val;
10103648 }
10113649
1012
- container_type = btf__type_by_id(btf, container_id);
1013
- if (!container_type) {
1014
- pr_warning("map:%s cannot find BTF type for container_id:%u\n",
1015
- map->name, container_id);
1016
- return -EINVAL;
1017
- }
1018
-
1019
- if (BTF_INFO_KIND(container_type->info) != BTF_KIND_STRUCT ||
1020
- BTF_INFO_VLEN(container_type->info) < 2) {
1021
- pr_warning("map:%s container_name:%s is an invalid container struct\n",
1022
- map->name, container_name);
1023
- return -EINVAL;
1024
- }
1025
-
1026
- key = (struct btf_member *)(container_type + 1);
1027
- value = key + 1;
1028
-
1029
- key_size = btf__resolve_size(btf, key->type);
1030
- if (key_size < 0) {
1031
- pr_warning("map:%s invalid BTF key_type_size\n",
1032
- map->name);
1033
- return key_size;
1034
- }
1035
-
1036
- if (def->key_size != key_size) {
1037
- pr_warning("map:%s btf_key_type_size:%u != map_def_key_size:%u\n",
1038
- map->name, (__u32)key_size, def->key_size);
1039
- return -EINVAL;
1040
- }
1041
-
1042
- value_size = btf__resolve_size(btf, value->type);
1043
- if (value_size < 0) {
1044
- pr_warning("map:%s invalid BTF value_type_size\n", map->name);
1045
- return value_size;
1046
- }
1047
-
1048
- if (def->value_size != value_size) {
1049
- pr_warning("map:%s btf_value_type_size:%u != map_def_value_size:%u\n",
1050
- map->name, (__u32)value_size, def->value_size);
1051
- return -EINVAL;
1052
- }
1053
-
1054
- map->btf_key_type_id = key->type;
1055
- map->btf_value_type_id = value->type;
3650
+ fclose(fp);
10563651
10573652 return 0;
10583653 }
....@@ -1060,15 +3655,22 @@
10603655 int bpf_map__reuse_fd(struct bpf_map *map, int fd)
10613656 {
10623657 struct bpf_map_info info = {};
1063
- __u32 len = sizeof(info);
3658
+ __u32 len = sizeof(info), name_len;
10643659 int new_fd, err;
10653660 char *new_name;
10663661
10673662 err = bpf_obj_get_info_by_fd(fd, &info, &len);
3663
+ if (err && errno == EINVAL)
3664
+ err = bpf_get_map_info_from_fdinfo(fd, &info);
10683665 if (err)
10693666 return err;
10703667
1071
- new_name = strdup(info.name);
3668
+ name_len = strlen(info.name);
3669
+ if (name_len == BPF_OBJ_NAME_LEN - 1 && strncmp(map->name, info.name, name_len) == 0)
3670
+ new_name = strdup(map->name);
3671
+ else
3672
+ new_name = strdup(info.name);
3673
+
10723674 if (!new_name)
10733675 return -errno;
10743676
....@@ -1100,6 +3702,7 @@
11003702 map->def.map_flags = info.map_flags;
11013703 map->btf_key_type_id = info.btf_key_type_id;
11023704 map->btf_value_type_id = info.btf_value_type_id;
3705
+ map->reused = true;
11033706
11043707 return 0;
11053708
....@@ -1110,272 +3713,3102 @@
11103713 return err;
11113714 }
11123715
1113
-static int
1114
-bpf_object__create_maps(struct bpf_object *obj)
3716
+__u32 bpf_map__max_entries(const struct bpf_map *map)
11153717 {
1116
- struct bpf_create_map_attr create_attr = {};
1117
- unsigned int i;
3718
+ return map->def.max_entries;
3719
+}
3720
+
3721
+int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
3722
+{
3723
+ if (map->fd >= 0)
3724
+ return -EBUSY;
3725
+ map->def.max_entries = max_entries;
3726
+ return 0;
3727
+}
3728
+
3729
+int bpf_map__resize(struct bpf_map *map, __u32 max_entries)
3730
+{
3731
+ if (!map || !max_entries)
3732
+ return -EINVAL;
3733
+
3734
+ return bpf_map__set_max_entries(map, max_entries);
3735
+}
3736
+
3737
+static int
3738
+bpf_object__probe_loading(struct bpf_object *obj)
3739
+{
3740
+ struct bpf_load_program_attr attr;
3741
+ char *cp, errmsg[STRERR_BUFSIZE];
3742
+ struct bpf_insn insns[] = {
3743
+ BPF_MOV64_IMM(BPF_REG_0, 0),
3744
+ BPF_EXIT_INSN(),
3745
+ };
3746
+ int ret;
3747
+
3748
+ /* make sure basic loading works */
3749
+
3750
+ memset(&attr, 0, sizeof(attr));
3751
+ attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
3752
+ attr.insns = insns;
3753
+ attr.insns_cnt = ARRAY_SIZE(insns);
3754
+ attr.license = "GPL";
3755
+
3756
+ ret = bpf_load_program_xattr(&attr, NULL, 0);
3757
+ if (ret < 0) {
3758
+ ret = errno;
3759
+ cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
3760
+ pr_warn("Error in %s():%s(%d). Couldn't load trivial BPF "
3761
+ "program. Make sure your kernel supports BPF "
3762
+ "(CONFIG_BPF_SYSCALL=y) and/or that RLIMIT_MEMLOCK is "
3763
+ "set to big enough value.\n", __func__, cp, ret);
3764
+ return -ret;
3765
+ }
3766
+ close(ret);
3767
+
3768
+ return 0;
3769
+}
3770
+
3771
+static int probe_fd(int fd)
3772
+{
3773
+ if (fd >= 0)
3774
+ close(fd);
3775
+ return fd >= 0;
3776
+}
3777
+
3778
+static int probe_kern_prog_name(void)
3779
+{
3780
+ struct bpf_load_program_attr attr;
3781
+ struct bpf_insn insns[] = {
3782
+ BPF_MOV64_IMM(BPF_REG_0, 0),
3783
+ BPF_EXIT_INSN(),
3784
+ };
3785
+ int ret;
3786
+
3787
+ /* make sure loading with name works */
3788
+
3789
+ memset(&attr, 0, sizeof(attr));
3790
+ attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
3791
+ attr.insns = insns;
3792
+ attr.insns_cnt = ARRAY_SIZE(insns);
3793
+ attr.license = "GPL";
3794
+ attr.name = "test";
3795
+ ret = bpf_load_program_xattr(&attr, NULL, 0);
3796
+ return probe_fd(ret);
3797
+}
3798
+
3799
+static int probe_kern_global_data(void)
3800
+{
3801
+ struct bpf_load_program_attr prg_attr;
3802
+ struct bpf_create_map_attr map_attr;
3803
+ char *cp, errmsg[STRERR_BUFSIZE];
3804
+ struct bpf_insn insns[] = {
3805
+ BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16),
3806
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42),
3807
+ BPF_MOV64_IMM(BPF_REG_0, 0),
3808
+ BPF_EXIT_INSN(),
3809
+ };
3810
+ int ret, map;
3811
+
3812
+ memset(&map_attr, 0, sizeof(map_attr));
3813
+ map_attr.map_type = BPF_MAP_TYPE_ARRAY;
3814
+ map_attr.key_size = sizeof(int);
3815
+ map_attr.value_size = 32;
3816
+ map_attr.max_entries = 1;
3817
+
3818
+ map = bpf_create_map_xattr(&map_attr);
3819
+ if (map < 0) {
3820
+ ret = -errno;
3821
+ cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
3822
+ pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
3823
+ __func__, cp, -ret);
3824
+ return ret;
3825
+ }
3826
+
3827
+ insns[0].imm = map;
3828
+
3829
+ memset(&prg_attr, 0, sizeof(prg_attr));
3830
+ prg_attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
3831
+ prg_attr.insns = insns;
3832
+ prg_attr.insns_cnt = ARRAY_SIZE(insns);
3833
+ prg_attr.license = "GPL";
3834
+
3835
+ ret = bpf_load_program_xattr(&prg_attr, NULL, 0);
3836
+ close(map);
3837
+ return probe_fd(ret);
3838
+}
3839
+
3840
+static int probe_kern_btf(void)
3841
+{
3842
+ static const char strs[] = "\0int";
3843
+ __u32 types[] = {
3844
+ /* int */
3845
+ BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
3846
+ };
3847
+
3848
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
3849
+ strs, sizeof(strs)));
3850
+}
3851
+
3852
+static int probe_kern_btf_func(void)
3853
+{
3854
+ static const char strs[] = "\0int\0x\0a";
3855
+ /* void x(int a) {} */
3856
+ __u32 types[] = {
3857
+ /* int */
3858
+ BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
3859
+ /* FUNC_PROTO */ /* [2] */
3860
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
3861
+ BTF_PARAM_ENC(7, 1),
3862
+ /* FUNC x */ /* [3] */
3863
+ BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2),
3864
+ };
3865
+
3866
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
3867
+ strs, sizeof(strs)));
3868
+}
3869
+
3870
+static int probe_kern_btf_func_global(void)
3871
+{
3872
+ static const char strs[] = "\0int\0x\0a";
3873
+ /* static void x(int a) {} */
3874
+ __u32 types[] = {
3875
+ /* int */
3876
+ BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
3877
+ /* FUNC_PROTO */ /* [2] */
3878
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
3879
+ BTF_PARAM_ENC(7, 1),
3880
+ /* FUNC x BTF_FUNC_GLOBAL */ /* [3] */
3881
+ BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2),
3882
+ };
3883
+
3884
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
3885
+ strs, sizeof(strs)));
3886
+}
3887
+
3888
+static int probe_kern_btf_datasec(void)
3889
+{
3890
+ static const char strs[] = "\0x\0.data";
3891
+ /* static int a; */
3892
+ __u32 types[] = {
3893
+ /* int */
3894
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
3895
+ /* VAR x */ /* [2] */
3896
+ BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
3897
+ BTF_VAR_STATIC,
3898
+ /* DATASEC val */ /* [3] */
3899
+ BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
3900
+ BTF_VAR_SECINFO_ENC(2, 0, 4),
3901
+ };
3902
+
3903
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
3904
+ strs, sizeof(strs)));
3905
+}
3906
+
3907
+static int probe_kern_array_mmap(void)
3908
+{
3909
+ struct bpf_create_map_attr attr = {
3910
+ .map_type = BPF_MAP_TYPE_ARRAY,
3911
+ .map_flags = BPF_F_MMAPABLE,
3912
+ .key_size = sizeof(int),
3913
+ .value_size = sizeof(int),
3914
+ .max_entries = 1,
3915
+ };
3916
+
3917
+ return probe_fd(bpf_create_map_xattr(&attr));
3918
+}
3919
+
3920
+static int probe_kern_exp_attach_type(void)
3921
+{
3922
+ struct bpf_load_program_attr attr;
3923
+ struct bpf_insn insns[] = {
3924
+ BPF_MOV64_IMM(BPF_REG_0, 0),
3925
+ BPF_EXIT_INSN(),
3926
+ };
3927
+
3928
+ memset(&attr, 0, sizeof(attr));
3929
+ /* use any valid combination of program type and (optional)
3930
+ * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS)
3931
+ * to see if kernel supports expected_attach_type field for
3932
+ * BPF_PROG_LOAD command
3933
+ */
3934
+ attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
3935
+ attr.expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE;
3936
+ attr.insns = insns;
3937
+ attr.insns_cnt = ARRAY_SIZE(insns);
3938
+ attr.license = "GPL";
3939
+
3940
+ return probe_fd(bpf_load_program_xattr(&attr, NULL, 0));
3941
+}
3942
+
3943
+static int probe_kern_probe_read_kernel(void)
3944
+{
3945
+ struct bpf_load_program_attr attr;
3946
+ struct bpf_insn insns[] = {
3947
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), /* r1 = r10 (fp) */
3948
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), /* r1 += -8 */
3949
+ BPF_MOV64_IMM(BPF_REG_2, 8), /* r2 = 8 */
3950
+ BPF_MOV64_IMM(BPF_REG_3, 0), /* r3 = 0 */
3951
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel),
3952
+ BPF_EXIT_INSN(),
3953
+ };
3954
+
3955
+ memset(&attr, 0, sizeof(attr));
3956
+ attr.prog_type = BPF_PROG_TYPE_KPROBE;
3957
+ attr.insns = insns;
3958
+ attr.insns_cnt = ARRAY_SIZE(insns);
3959
+ attr.license = "GPL";
3960
+
3961
+ return probe_fd(bpf_load_program_xattr(&attr, NULL, 0));
3962
+}
3963
+
3964
+static int probe_prog_bind_map(void)
3965
+{
3966
+ struct bpf_load_program_attr prg_attr;
3967
+ struct bpf_create_map_attr map_attr;
3968
+ char *cp, errmsg[STRERR_BUFSIZE];
3969
+ struct bpf_insn insns[] = {
3970
+ BPF_MOV64_IMM(BPF_REG_0, 0),
3971
+ BPF_EXIT_INSN(),
3972
+ };
3973
+ int ret, map, prog;
3974
+
3975
+ memset(&map_attr, 0, sizeof(map_attr));
3976
+ map_attr.map_type = BPF_MAP_TYPE_ARRAY;
3977
+ map_attr.key_size = sizeof(int);
3978
+ map_attr.value_size = 32;
3979
+ map_attr.max_entries = 1;
3980
+
3981
+ map = bpf_create_map_xattr(&map_attr);
3982
+ if (map < 0) {
3983
+ ret = -errno;
3984
+ cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
3985
+ pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
3986
+ __func__, cp, -ret);
3987
+ return ret;
3988
+ }
3989
+
3990
+ memset(&prg_attr, 0, sizeof(prg_attr));
3991
+ prg_attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
3992
+ prg_attr.insns = insns;
3993
+ prg_attr.insns_cnt = ARRAY_SIZE(insns);
3994
+ prg_attr.license = "GPL";
3995
+
3996
+ prog = bpf_load_program_xattr(&prg_attr, NULL, 0);
3997
+ if (prog < 0) {
3998
+ close(map);
3999
+ return 0;
4000
+ }
4001
+
4002
+ ret = bpf_prog_bind_map(prog, map, NULL);
4003
+
4004
+ close(map);
4005
+ close(prog);
4006
+
4007
+ return ret >= 0;
4008
+}
4009
+
4010
+enum kern_feature_result {
4011
+ FEAT_UNKNOWN = 0,
4012
+ FEAT_SUPPORTED = 1,
4013
+ FEAT_MISSING = 2,
4014
+};
4015
+
4016
+typedef int (*feature_probe_fn)(void);
4017
+
4018
+static struct kern_feature_desc {
4019
+ const char *desc;
4020
+ feature_probe_fn probe;
4021
+ enum kern_feature_result res;
4022
+} feature_probes[__FEAT_CNT] = {
4023
+ [FEAT_PROG_NAME] = {
4024
+ "BPF program name", probe_kern_prog_name,
4025
+ },
4026
+ [FEAT_GLOBAL_DATA] = {
4027
+ "global variables", probe_kern_global_data,
4028
+ },
4029
+ [FEAT_BTF] = {
4030
+ "minimal BTF", probe_kern_btf,
4031
+ },
4032
+ [FEAT_BTF_FUNC] = {
4033
+ "BTF functions", probe_kern_btf_func,
4034
+ },
4035
+ [FEAT_BTF_GLOBAL_FUNC] = {
4036
+ "BTF global function", probe_kern_btf_func_global,
4037
+ },
4038
+ [FEAT_BTF_DATASEC] = {
4039
+ "BTF data section and variable", probe_kern_btf_datasec,
4040
+ },
4041
+ [FEAT_ARRAY_MMAP] = {
4042
+ "ARRAY map mmap()", probe_kern_array_mmap,
4043
+ },
4044
+ [FEAT_EXP_ATTACH_TYPE] = {
4045
+ "BPF_PROG_LOAD expected_attach_type attribute",
4046
+ probe_kern_exp_attach_type,
4047
+ },
4048
+ [FEAT_PROBE_READ_KERN] = {
4049
+ "bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel,
4050
+ },
4051
+ [FEAT_PROG_BIND_MAP] = {
4052
+ "BPF_PROG_BIND_MAP support", probe_prog_bind_map,
4053
+ }
4054
+};
4055
+
4056
+static bool kernel_supports(enum kern_feature_id feat_id)
4057
+{
4058
+ struct kern_feature_desc *feat = &feature_probes[feat_id];
4059
+ int ret;
4060
+
4061
+ if (READ_ONCE(feat->res) == FEAT_UNKNOWN) {
4062
+ ret = feat->probe();
4063
+ if (ret > 0) {
4064
+ WRITE_ONCE(feat->res, FEAT_SUPPORTED);
4065
+ } else if (ret == 0) {
4066
+ WRITE_ONCE(feat->res, FEAT_MISSING);
4067
+ } else {
4068
+ pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret);
4069
+ WRITE_ONCE(feat->res, FEAT_MISSING);
4070
+ }
4071
+ }
4072
+
4073
+ return READ_ONCE(feat->res) == FEAT_SUPPORTED;
4074
+}
4075
+
4076
+static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
4077
+{
4078
+ struct bpf_map_info map_info = {};
4079
+ char msg[STRERR_BUFSIZE];
4080
+ __u32 map_info_len;
11184081 int err;
11194082
1120
- for (i = 0; i < obj->nr_maps; i++) {
1121
- struct bpf_map *map = &obj->maps[i];
1122
- struct bpf_map_def *def = &map->def;
1123
- char *cp, errmsg[STRERR_BUFSIZE];
1124
- int *pfd = &map->fd;
4083
+ map_info_len = sizeof(map_info);
11254084
1126
- if (map->fd >= 0) {
1127
- pr_debug("skip map create (preset) %s: fd=%d\n",
1128
- map->name, map->fd);
1129
- continue;
4085
+ err = bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len);
4086
+ if (err && errno == EINVAL)
4087
+ err = bpf_get_map_info_from_fdinfo(map_fd, &map_info);
4088
+ if (err) {
4089
+ pr_warn("failed to get map info for map FD %d: %s\n", map_fd,
4090
+ libbpf_strerror_r(errno, msg, sizeof(msg)));
4091
+ return false;
4092
+ }
4093
+
4094
+ return (map_info.type == map->def.type &&
4095
+ map_info.key_size == map->def.key_size &&
4096
+ map_info.value_size == map->def.value_size &&
4097
+ map_info.max_entries == map->def.max_entries &&
4098
+ map_info.map_flags == map->def.map_flags);
4099
+}
4100
+
4101
+static int
4102
+bpf_object__reuse_map(struct bpf_map *map)
4103
+{
4104
+ char *cp, errmsg[STRERR_BUFSIZE];
4105
+ int err, pin_fd;
4106
+
4107
+ pin_fd = bpf_obj_get(map->pin_path);
4108
+ if (pin_fd < 0) {
4109
+ err = -errno;
4110
+ if (err == -ENOENT) {
4111
+ pr_debug("found no pinned map to reuse at '%s'\n",
4112
+ map->pin_path);
4113
+ return 0;
11304114 }
11314115
4116
+ cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
4117
+ pr_warn("couldn't retrieve pinned map '%s': %s\n",
4118
+ map->pin_path, cp);
4119
+ return err;
4120
+ }
4121
+
4122
+ if (!map_is_reuse_compat(map, pin_fd)) {
4123
+ pr_warn("couldn't reuse pinned map at '%s': parameter mismatch\n",
4124
+ map->pin_path);
4125
+ close(pin_fd);
4126
+ return -EINVAL;
4127
+ }
4128
+
4129
+ err = bpf_map__reuse_fd(map, pin_fd);
4130
+ if (err) {
4131
+ close(pin_fd);
4132
+ return err;
4133
+ }
4134
+ map->pinned = true;
4135
+ pr_debug("reused pinned map at '%s'\n", map->pin_path);
4136
+
4137
+ return 0;
4138
+}
4139
+
4140
+static int
4141
+bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
4142
+{
4143
+ enum libbpf_map_type map_type = map->libbpf_type;
4144
+ char *cp, errmsg[STRERR_BUFSIZE];
4145
+ int err, zero = 0;
4146
+
4147
+ err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0);
4148
+ if (err) {
4149
+ err = -errno;
4150
+ cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
4151
+ pr_warn("Error setting initial map(%s) contents: %s\n",
4152
+ map->name, cp);
4153
+ return err;
4154
+ }
4155
+
4156
+ /* Freeze .rodata and .kconfig map as read-only from syscall side. */
4157
+ if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG) {
4158
+ err = bpf_map_freeze(map->fd);
4159
+ if (err) {
4160
+ err = -errno;
4161
+ cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
4162
+ pr_warn("Error freezing map(%s) as read-only: %s\n",
4163
+ map->name, cp);
4164
+ return err;
4165
+ }
4166
+ }
4167
+ return 0;
4168
+}
4169
+
4170
+static void bpf_map__destroy(struct bpf_map *map);
4171
+
4172
+static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map)
4173
+{
4174
+ struct bpf_create_map_attr create_attr;
4175
+ struct bpf_map_def *def = &map->def;
4176
+ int err = 0;
4177
+
4178
+ memset(&create_attr, 0, sizeof(create_attr));
4179
+
4180
+ if (kernel_supports(FEAT_PROG_NAME))
11324181 create_attr.name = map->name;
1133
- create_attr.map_ifindex = map->map_ifindex;
1134
- create_attr.map_type = def->type;
1135
- create_attr.map_flags = def->map_flags;
1136
- create_attr.key_size = def->key_size;
1137
- create_attr.value_size = def->value_size;
4182
+ create_attr.map_ifindex = map->map_ifindex;
4183
+ create_attr.map_type = def->type;
4184
+ create_attr.map_flags = def->map_flags;
4185
+ create_attr.key_size = def->key_size;
4186
+ create_attr.value_size = def->value_size;
4187
+ create_attr.numa_node = map->numa_node;
4188
+
4189
+ if (def->type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !def->max_entries) {
4190
+ int nr_cpus;
4191
+
4192
+ nr_cpus = libbpf_num_possible_cpus();
4193
+ if (nr_cpus < 0) {
4194
+ pr_warn("map '%s': failed to determine number of system CPUs: %d\n",
4195
+ map->name, nr_cpus);
4196
+ return nr_cpus;
4197
+ }
4198
+ pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus);
4199
+ create_attr.max_entries = nr_cpus;
4200
+ } else {
11384201 create_attr.max_entries = def->max_entries;
4202
+ }
4203
+
4204
+ if (bpf_map__is_struct_ops(map))
4205
+ create_attr.btf_vmlinux_value_type_id =
4206
+ map->btf_vmlinux_value_type_id;
4207
+
4208
+ create_attr.btf_fd = 0;
4209
+ create_attr.btf_key_type_id = 0;
4210
+ create_attr.btf_value_type_id = 0;
4211
+ if (obj->btf && btf__fd(obj->btf) >= 0 && !bpf_map_find_btf_info(obj, map)) {
4212
+ create_attr.btf_fd = btf__fd(obj->btf);
4213
+ create_attr.btf_key_type_id = map->btf_key_type_id;
4214
+ create_attr.btf_value_type_id = map->btf_value_type_id;
4215
+ }
4216
+
4217
+ if (bpf_map_type__is_map_in_map(def->type)) {
4218
+ if (map->inner_map) {
4219
+ err = bpf_object__create_map(obj, map->inner_map);
4220
+ if (err) {
4221
+ pr_warn("map '%s': failed to create inner map: %d\n",
4222
+ map->name, err);
4223
+ return err;
4224
+ }
4225
+ map->inner_map_fd = bpf_map__fd(map->inner_map);
4226
+ }
4227
+ if (map->inner_map_fd >= 0)
4228
+ create_attr.inner_map_fd = map->inner_map_fd;
4229
+ }
4230
+
4231
+ map->fd = bpf_create_map_xattr(&create_attr);
4232
+ if (map->fd < 0 && (create_attr.btf_key_type_id ||
4233
+ create_attr.btf_value_type_id)) {
4234
+ char *cp, errmsg[STRERR_BUFSIZE];
4235
+
4236
+ err = -errno;
4237
+ cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
4238
+ pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
4239
+ map->name, cp, err);
11394240 create_attr.btf_fd = 0;
11404241 create_attr.btf_key_type_id = 0;
11414242 create_attr.btf_value_type_id = 0;
4243
+ map->btf_key_type_id = 0;
4244
+ map->btf_value_type_id = 0;
4245
+ map->fd = bpf_create_map_xattr(&create_attr);
4246
+ }
11424247
1143
- if (obj->btf && !bpf_map_find_btf_info(map, obj->btf)) {
1144
- create_attr.btf_fd = btf__fd(obj->btf);
1145
- create_attr.btf_key_type_id = map->btf_key_type_id;
1146
- create_attr.btf_value_type_id = map->btf_value_type_id;
1147
- }
4248
+ err = map->fd < 0 ? -errno : 0;
11484249
1149
- *pfd = bpf_create_map_xattr(&create_attr);
1150
- if (*pfd < 0 && create_attr.btf_key_type_id) {
1151
- cp = str_error(errno, errmsg, sizeof(errmsg));
1152
- pr_warning("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
1153
- map->name, cp, errno);
1154
- create_attr.btf_fd = 0;
1155
- create_attr.btf_key_type_id = 0;
1156
- create_attr.btf_value_type_id = 0;
1157
- map->btf_key_type_id = 0;
1158
- map->btf_value_type_id = 0;
1159
- *pfd = bpf_create_map_xattr(&create_attr);
1160
- }
4250
+ if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) {
4251
+ bpf_map__destroy(map->inner_map);
4252
+ zfree(&map->inner_map);
4253
+ }
11614254
1162
- if (*pfd < 0) {
1163
- size_t j;
4255
+ return err;
4256
+}
11644257
1165
- err = *pfd;
1166
- cp = str_error(errno, errmsg, sizeof(errmsg));
1167
- pr_warning("failed to create map (name: '%s'): %s\n",
1168
- map->name, cp);
1169
- for (j = 0; j < i; j++)
1170
- zclose(obj->maps[j].fd);
4258
+static int init_map_slots(struct bpf_map *map)
4259
+{
4260
+ const struct bpf_map *targ_map;
4261
+ unsigned int i;
4262
+ int fd, err;
4263
+
4264
+ for (i = 0; i < map->init_slots_sz; i++) {
4265
+ if (!map->init_slots[i])
4266
+ continue;
4267
+
4268
+ targ_map = map->init_slots[i];
4269
+ fd = bpf_map__fd(targ_map);
4270
+ err = bpf_map_update_elem(map->fd, &i, &fd, 0);
4271
+ if (err) {
4272
+ err = -errno;
4273
+ pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n",
4274
+ map->name, i, targ_map->name,
4275
+ fd, err);
11714276 return err;
11724277 }
1173
- pr_debug("create map %s: fd=%d\n", map->name, *pfd);
4278
+ pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n",
4279
+ map->name, i, targ_map->name, fd);
11744280 }
4281
+
4282
+ zfree(&map->init_slots);
4283
+ map->init_slots_sz = 0;
11754284
11764285 return 0;
11774286 }
11784287
11794288 static int
1180
-bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj,
1181
- struct reloc_desc *relo)
4289
+bpf_object__create_maps(struct bpf_object *obj)
11824290 {
1183
- struct bpf_insn *insn, *new_insn;
1184
- struct bpf_program *text;
1185
- size_t new_cnt;
4291
+ struct bpf_map *map;
4292
+ char *cp, errmsg[STRERR_BUFSIZE];
4293
+ unsigned int i, j;
4294
+ int err;
4295
+ bool retried;
11864296
1187
- if (relo->type != RELO_CALL)
1188
- return -LIBBPF_ERRNO__RELOC;
4297
+ for (i = 0; i < obj->nr_maps; i++) {
4298
+ map = &obj->maps[i];
11894299
1190
- if (prog->idx == obj->efile.text_shndx) {
1191
- pr_warning("relo in .text insn %d into off %d\n",
1192
- relo->insn_idx, relo->text_off);
1193
- return -LIBBPF_ERRNO__RELOC;
4300
+ retried = false;
4301
+retry:
4302
+ if (map->pin_path) {
4303
+ err = bpf_object__reuse_map(map);
4304
+ if (err) {
4305
+ pr_warn("map '%s': error reusing pinned map\n",
4306
+ map->name);
4307
+ goto err_out;
4308
+ }
4309
+ if (retried && map->fd < 0) {
4310
+ pr_warn("map '%s': cannot find pinned map\n",
4311
+ map->name);
4312
+ err = -ENOENT;
4313
+ goto err_out;
4314
+ }
4315
+ }
4316
+
4317
+ if (map->fd >= 0) {
4318
+ pr_debug("map '%s': skipping creation (preset fd=%d)\n",
4319
+ map->name, map->fd);
4320
+ } else {
4321
+ err = bpf_object__create_map(obj, map);
4322
+ if (err)
4323
+ goto err_out;
4324
+
4325
+ pr_debug("map '%s': created successfully, fd=%d\n",
4326
+ map->name, map->fd);
4327
+
4328
+ if (bpf_map__is_internal(map)) {
4329
+ err = bpf_object__populate_internal_map(obj, map);
4330
+ if (err < 0) {
4331
+ zclose(map->fd);
4332
+ goto err_out;
4333
+ }
4334
+ }
4335
+
4336
+ if (map->init_slots_sz) {
4337
+ err = init_map_slots(map);
4338
+ if (err < 0) {
4339
+ zclose(map->fd);
4340
+ goto err_out;
4341
+ }
4342
+ }
4343
+ }
4344
+
4345
+ if (map->pin_path && !map->pinned) {
4346
+ err = bpf_map__pin(map, NULL);
4347
+ if (err) {
4348
+ zclose(map->fd);
4349
+ if (!retried && err == -EEXIST) {
4350
+ retried = true;
4351
+ goto retry;
4352
+ }
4353
+ pr_warn("map '%s': failed to auto-pin at '%s': %d\n",
4354
+ map->name, map->pin_path, err);
4355
+ goto err_out;
4356
+ }
4357
+ }
11944358 }
11954359
1196
- if (prog->main_prog_cnt == 0) {
1197
- text = bpf_object__find_prog_by_idx(obj, obj->efile.text_shndx);
1198
- if (!text) {
1199
- pr_warning("no .text section found yet relo into text exist\n");
1200
- return -LIBBPF_ERRNO__RELOC;
1201
- }
1202
- new_cnt = prog->insns_cnt + text->insns_cnt;
1203
- new_insn = reallocarray(prog->insns, new_cnt, sizeof(*insn));
1204
- if (!new_insn) {
1205
- pr_warning("oom in prog realloc\n");
1206
- return -ENOMEM;
1207
- }
1208
- memcpy(new_insn + prog->insns_cnt, text->insns,
1209
- text->insns_cnt * sizeof(*insn));
1210
- prog->insns = new_insn;
1211
- prog->main_prog_cnt = prog->insns_cnt;
1212
- prog->insns_cnt = new_cnt;
1213
- pr_debug("added %zd insn from %s to prog %s\n",
1214
- text->insns_cnt, text->section_name,
1215
- prog->section_name);
4360
+ return 0;
4361
+
4362
+err_out:
4363
+ cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
4364
+ pr_warn("map '%s': failed to create: %s(%d)\n", map->name, cp, err);
4365
+ pr_perm_msg(err);
4366
+ for (j = 0; j < i; j++)
4367
+ zclose(obj->maps[j].fd);
4368
+ return err;
4369
+}
4370
+
4371
+#define BPF_CORE_SPEC_MAX_LEN 64
4372
+
4373
+/* represents BPF CO-RE field or array element accessor */
4374
+struct bpf_core_accessor {
4375
+ __u32 type_id; /* struct/union type or array element type */
4376
+ __u32 idx; /* field index or array index */
4377
+ const char *name; /* field name or NULL for array accessor */
4378
+};
4379
+
4380
+struct bpf_core_spec {
4381
+ const struct btf *btf;
4382
+ /* high-level spec: named fields and array indices only */
4383
+ struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN];
4384
+ /* original unresolved (no skip_mods_or_typedefs) root type ID */
4385
+ __u32 root_type_id;
4386
+ /* CO-RE relocation kind */
4387
+ enum bpf_core_relo_kind relo_kind;
4388
+ /* high-level spec length */
4389
+ int len;
4390
+ /* raw, low-level spec: 1-to-1 with accessor spec string */
4391
+ int raw_spec[BPF_CORE_SPEC_MAX_LEN];
4392
+ /* raw spec length */
4393
+ int raw_len;
4394
+ /* field bit offset represented by spec */
4395
+ __u32 bit_offset;
4396
+};
4397
+
4398
+static bool str_is_empty(const char *s)
4399
+{
4400
+ return !s || !s[0];
4401
+}
4402
+
4403
+static bool is_flex_arr(const struct btf *btf,
4404
+ const struct bpf_core_accessor *acc,
4405
+ const struct btf_array *arr)
4406
+{
4407
+ const struct btf_type *t;
4408
+
4409
+ /* not a flexible array, if not inside a struct or has non-zero size */
4410
+ if (!acc->name || arr->nelems > 0)
4411
+ return false;
4412
+
4413
+ /* has to be the last member of enclosing struct */
4414
+ t = btf__type_by_id(btf, acc->type_id);
4415
+ return acc->idx == btf_vlen(t) - 1;
4416
+}
4417
+
4418
+static const char *core_relo_kind_str(enum bpf_core_relo_kind kind)
4419
+{
4420
+ switch (kind) {
4421
+ case BPF_FIELD_BYTE_OFFSET: return "byte_off";
4422
+ case BPF_FIELD_BYTE_SIZE: return "byte_sz";
4423
+ case BPF_FIELD_EXISTS: return "field_exists";
4424
+ case BPF_FIELD_SIGNED: return "signed";
4425
+ case BPF_FIELD_LSHIFT_U64: return "lshift_u64";
4426
+ case BPF_FIELD_RSHIFT_U64: return "rshift_u64";
4427
+ case BPF_TYPE_ID_LOCAL: return "local_type_id";
4428
+ case BPF_TYPE_ID_TARGET: return "target_type_id";
4429
+ case BPF_TYPE_EXISTS: return "type_exists";
4430
+ case BPF_TYPE_SIZE: return "type_size";
4431
+ case BPF_ENUMVAL_EXISTS: return "enumval_exists";
4432
+ case BPF_ENUMVAL_VALUE: return "enumval_value";
4433
+ default: return "unknown";
12164434 }
1217
- insn = &prog->insns[relo->insn_idx];
1218
- insn->imm += prog->main_prog_cnt - relo->insn_idx;
4435
+}
4436
+
4437
+static bool core_relo_is_field_based(enum bpf_core_relo_kind kind)
4438
+{
4439
+ switch (kind) {
4440
+ case BPF_FIELD_BYTE_OFFSET:
4441
+ case BPF_FIELD_BYTE_SIZE:
4442
+ case BPF_FIELD_EXISTS:
4443
+ case BPF_FIELD_SIGNED:
4444
+ case BPF_FIELD_LSHIFT_U64:
4445
+ case BPF_FIELD_RSHIFT_U64:
4446
+ return true;
4447
+ default:
4448
+ return false;
4449
+ }
4450
+}
4451
+
4452
+static bool core_relo_is_type_based(enum bpf_core_relo_kind kind)
4453
+{
4454
+ switch (kind) {
4455
+ case BPF_TYPE_ID_LOCAL:
4456
+ case BPF_TYPE_ID_TARGET:
4457
+ case BPF_TYPE_EXISTS:
4458
+ case BPF_TYPE_SIZE:
4459
+ return true;
4460
+ default:
4461
+ return false;
4462
+ }
4463
+}
4464
+
4465
+static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind)
4466
+{
4467
+ switch (kind) {
4468
+ case BPF_ENUMVAL_EXISTS:
4469
+ case BPF_ENUMVAL_VALUE:
4470
+ return true;
4471
+ default:
4472
+ return false;
4473
+ }
4474
+}
4475
+
4476
+/*
4477
+ * Turn bpf_core_relo into a low- and high-level spec representation,
4478
+ * validating correctness along the way, as well as calculating resulting
4479
+ * field bit offset, specified by accessor string. Low-level spec captures
4480
+ * every single level of nestedness, including traversing anonymous
4481
+ * struct/union members. High-level one only captures semantically meaningful
4482
+ * "turning points": named fields and array indicies.
4483
+ * E.g., for this case:
4484
+ *
4485
+ * struct sample {
4486
+ * int __unimportant;
4487
+ * struct {
4488
+ * int __1;
4489
+ * int __2;
4490
+ * int a[7];
4491
+ * };
4492
+ * };
4493
+ *
4494
+ * struct sample *s = ...;
4495
+ *
4496
+ * int x = &s->a[3]; // access string = '0:1:2:3'
4497
+ *
4498
+ * Low-level spec has 1:1 mapping with each element of access string (it's
4499
+ * just a parsed access string representation): [0, 1, 2, 3].
4500
+ *
4501
+ * High-level spec will capture only 3 points:
4502
+ * - intial zero-index access by pointer (&s->... is the same as &s[0]...);
4503
+ * - field 'a' access (corresponds to '2' in low-level spec);
4504
+ * - array element #3 access (corresponds to '3' in low-level spec).
4505
+ *
4506
+ * Type-based relocations (TYPE_EXISTS/TYPE_SIZE,
4507
+ * TYPE_ID_LOCAL/TYPE_ID_TARGET) don't capture any field information. Their
4508
+ * spec and raw_spec are kept empty.
4509
+ *
4510
+ * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access
4511
+ * string to specify enumerator's value index that need to be relocated.
4512
+ */
4513
+static int bpf_core_parse_spec(const struct btf *btf,
4514
+ __u32 type_id,
4515
+ const char *spec_str,
4516
+ enum bpf_core_relo_kind relo_kind,
4517
+ struct bpf_core_spec *spec)
4518
+{
4519
+ int access_idx, parsed_len, i;
4520
+ struct bpf_core_accessor *acc;
4521
+ const struct btf_type *t;
4522
+ const char *name;
4523
+ __u32 id;
4524
+ __s64 sz;
4525
+
4526
+ if (str_is_empty(spec_str) || *spec_str == ':')
4527
+ return -EINVAL;
4528
+
4529
+ memset(spec, 0, sizeof(*spec));
4530
+ spec->btf = btf;
4531
+ spec->root_type_id = type_id;
4532
+ spec->relo_kind = relo_kind;
4533
+
4534
+ /* type-based relocations don't have a field access string */
4535
+ if (core_relo_is_type_based(relo_kind)) {
4536
+ if (strcmp(spec_str, "0"))
4537
+ return -EINVAL;
4538
+ return 0;
4539
+ }
4540
+
4541
+ /* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */
4542
+ while (*spec_str) {
4543
+ if (*spec_str == ':')
4544
+ ++spec_str;
4545
+ if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1)
4546
+ return -EINVAL;
4547
+ if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
4548
+ return -E2BIG;
4549
+ spec_str += parsed_len;
4550
+ spec->raw_spec[spec->raw_len++] = access_idx;
4551
+ }
4552
+
4553
+ if (spec->raw_len == 0)
4554
+ return -EINVAL;
4555
+
4556
+ t = skip_mods_and_typedefs(btf, type_id, &id);
4557
+ if (!t)
4558
+ return -EINVAL;
4559
+
4560
+ access_idx = spec->raw_spec[0];
4561
+ acc = &spec->spec[0];
4562
+ acc->type_id = id;
4563
+ acc->idx = access_idx;
4564
+ spec->len++;
4565
+
4566
+ if (core_relo_is_enumval_based(relo_kind)) {
4567
+ if (!btf_is_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t))
4568
+ return -EINVAL;
4569
+
4570
+ /* record enumerator name in a first accessor */
4571
+ acc->name = btf__name_by_offset(btf, btf_enum(t)[access_idx].name_off);
4572
+ return 0;
4573
+ }
4574
+
4575
+ if (!core_relo_is_field_based(relo_kind))
4576
+ return -EINVAL;
4577
+
4578
+ sz = btf__resolve_size(btf, id);
4579
+ if (sz < 0)
4580
+ return sz;
4581
+ spec->bit_offset = access_idx * sz * 8;
4582
+
4583
+ for (i = 1; i < spec->raw_len; i++) {
4584
+ t = skip_mods_and_typedefs(btf, id, &id);
4585
+ if (!t)
4586
+ return -EINVAL;
4587
+
4588
+ access_idx = spec->raw_spec[i];
4589
+ acc = &spec->spec[spec->len];
4590
+
4591
+ if (btf_is_composite(t)) {
4592
+ const struct btf_member *m;
4593
+ __u32 bit_offset;
4594
+
4595
+ if (access_idx >= btf_vlen(t))
4596
+ return -EINVAL;
4597
+
4598
+ bit_offset = btf_member_bit_offset(t, access_idx);
4599
+ spec->bit_offset += bit_offset;
4600
+
4601
+ m = btf_members(t) + access_idx;
4602
+ if (m->name_off) {
4603
+ name = btf__name_by_offset(btf, m->name_off);
4604
+ if (str_is_empty(name))
4605
+ return -EINVAL;
4606
+
4607
+ acc->type_id = id;
4608
+ acc->idx = access_idx;
4609
+ acc->name = name;
4610
+ spec->len++;
4611
+ }
4612
+
4613
+ id = m->type;
4614
+ } else if (btf_is_array(t)) {
4615
+ const struct btf_array *a = btf_array(t);
4616
+ bool flex;
4617
+
4618
+ t = skip_mods_and_typedefs(btf, a->type, &id);
4619
+ if (!t)
4620
+ return -EINVAL;
4621
+
4622
+ flex = is_flex_arr(btf, acc - 1, a);
4623
+ if (!flex && access_idx >= a->nelems)
4624
+ return -EINVAL;
4625
+
4626
+ spec->spec[spec->len].type_id = id;
4627
+ spec->spec[spec->len].idx = access_idx;
4628
+ spec->len++;
4629
+
4630
+ sz = btf__resolve_size(btf, id);
4631
+ if (sz < 0)
4632
+ return sz;
4633
+ spec->bit_offset += access_idx * sz * 8;
4634
+ } else {
4635
+ pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n",
4636
+ type_id, spec_str, i, id, btf_kind_str(t));
4637
+ return -EINVAL;
4638
+ }
4639
+ }
4640
+
12194641 return 0;
12204642 }
12214643
1222
-static int
1223
-bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj)
4644
+static bool bpf_core_is_flavor_sep(const char *s)
12244645 {
1225
- int i, err;
4646
+ /* check X___Y name pattern, where X and Y are not underscores */
4647
+ return s[0] != '_' && /* X */
4648
+ s[1] == '_' && s[2] == '_' && s[3] == '_' && /* ___ */
4649
+ s[4] != '_'; /* Y */
4650
+}
12264651
1227
- if (!prog || !prog->reloc_desc)
4652
+/* Given 'some_struct_name___with_flavor' return the length of a name prefix
4653
+ * before last triple underscore. Struct name part after last triple
4654
+ * underscore is ignored by BPF CO-RE relocation during relocation matching.
4655
+ */
4656
+static size_t bpf_core_essential_name_len(const char *name)
4657
+{
4658
+ size_t n = strlen(name);
4659
+ int i;
4660
+
4661
+ for (i = n - 5; i >= 0; i--) {
4662
+ if (bpf_core_is_flavor_sep(name + i))
4663
+ return i + 1;
4664
+ }
4665
+ return n;
4666
+}
4667
+
4668
+/* dynamically sized list of type IDs */
4669
+struct ids_vec {
4670
+ __u32 *data;
4671
+ int len;
4672
+};
4673
+
4674
+static void bpf_core_free_cands(struct ids_vec *cand_ids)
4675
+{
4676
+ free(cand_ids->data);
4677
+ free(cand_ids);
4678
+}
4679
+
4680
+static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf,
4681
+ __u32 local_type_id,
4682
+ const struct btf *targ_btf)
4683
+{
4684
+ size_t local_essent_len, targ_essent_len;
4685
+ const char *local_name, *targ_name;
4686
+ const struct btf_type *t, *local_t;
4687
+ struct ids_vec *cand_ids;
4688
+ __u32 *new_ids;
4689
+ int i, err, n;
4690
+
4691
+ local_t = btf__type_by_id(local_btf, local_type_id);
4692
+ if (!local_t)
4693
+ return ERR_PTR(-EINVAL);
4694
+
4695
+ local_name = btf__name_by_offset(local_btf, local_t->name_off);
4696
+ if (str_is_empty(local_name))
4697
+ return ERR_PTR(-EINVAL);
4698
+ local_essent_len = bpf_core_essential_name_len(local_name);
4699
+
4700
+ cand_ids = calloc(1, sizeof(*cand_ids));
4701
+ if (!cand_ids)
4702
+ return ERR_PTR(-ENOMEM);
4703
+
4704
+ n = btf__get_nr_types(targ_btf);
4705
+ for (i = 1; i <= n; i++) {
4706
+ t = btf__type_by_id(targ_btf, i);
4707
+ if (btf_kind(t) != btf_kind(local_t))
4708
+ continue;
4709
+
4710
+ targ_name = btf__name_by_offset(targ_btf, t->name_off);
4711
+ if (str_is_empty(targ_name))
4712
+ continue;
4713
+
4714
+ targ_essent_len = bpf_core_essential_name_len(targ_name);
4715
+ if (targ_essent_len != local_essent_len)
4716
+ continue;
4717
+
4718
+ if (strncmp(local_name, targ_name, local_essent_len) == 0) {
4719
+ pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s\n",
4720
+ local_type_id, btf_kind_str(local_t),
4721
+ local_name, i, btf_kind_str(t), targ_name);
4722
+ new_ids = libbpf_reallocarray(cand_ids->data,
4723
+ cand_ids->len + 1,
4724
+ sizeof(*cand_ids->data));
4725
+ if (!new_ids) {
4726
+ err = -ENOMEM;
4727
+ goto err_out;
4728
+ }
4729
+ cand_ids->data = new_ids;
4730
+ cand_ids->data[cand_ids->len++] = i;
4731
+ }
4732
+ }
4733
+ return cand_ids;
4734
+err_out:
4735
+ bpf_core_free_cands(cand_ids);
4736
+ return ERR_PTR(err);
4737
+}
4738
+
4739
+/* Check two types for compatibility for the purpose of field access
4740
+ * relocation. const/volatile/restrict and typedefs are skipped to ensure we
4741
+ * are relocating semantically compatible entities:
4742
+ * - any two STRUCTs/UNIONs are compatible and can be mixed;
4743
+ * - any two FWDs are compatible, if their names match (modulo flavor suffix);
4744
+ * - any two PTRs are always compatible;
4745
+ * - for ENUMs, names should be the same (ignoring flavor suffix) or at
4746
+ * least one of enums should be anonymous;
4747
+ * - for ENUMs, check sizes, names are ignored;
4748
+ * - for INT, size and signedness are ignored;
4749
+ * - for ARRAY, dimensionality is ignored, element types are checked for
4750
+ * compatibility recursively;
4751
+ * - everything else shouldn't be ever a target of relocation.
4752
+ * These rules are not set in stone and probably will be adjusted as we get
4753
+ * more experience with using BPF CO-RE relocations.
4754
+ */
4755
+static int bpf_core_fields_are_compat(const struct btf *local_btf,
4756
+ __u32 local_id,
4757
+ const struct btf *targ_btf,
4758
+ __u32 targ_id)
4759
+{
4760
+ const struct btf_type *local_type, *targ_type;
4761
+
4762
+recur:
4763
+ local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
4764
+ targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
4765
+ if (!local_type || !targ_type)
4766
+ return -EINVAL;
4767
+
4768
+ if (btf_is_composite(local_type) && btf_is_composite(targ_type))
4769
+ return 1;
4770
+ if (btf_kind(local_type) != btf_kind(targ_type))
12284771 return 0;
12294772
1230
- for (i = 0; i < prog->nr_reloc; i++) {
1231
- if (prog->reloc_desc[i].type == RELO_LD64) {
1232
- struct bpf_insn *insns = prog->insns;
1233
- int insn_idx, map_idx;
4773
+ switch (btf_kind(local_type)) {
4774
+ case BTF_KIND_PTR:
4775
+ return 1;
4776
+ case BTF_KIND_FWD:
4777
+ case BTF_KIND_ENUM: {
4778
+ const char *local_name, *targ_name;
4779
+ size_t local_len, targ_len;
12344780
1235
- insn_idx = prog->reloc_desc[i].insn_idx;
1236
- map_idx = prog->reloc_desc[i].map_idx;
4781
+ local_name = btf__name_by_offset(local_btf,
4782
+ local_type->name_off);
4783
+ targ_name = btf__name_by_offset(targ_btf, targ_type->name_off);
4784
+ local_len = bpf_core_essential_name_len(local_name);
4785
+ targ_len = bpf_core_essential_name_len(targ_name);
4786
+ /* one of them is anonymous or both w/ same flavor-less names */
4787
+ return local_len == 0 || targ_len == 0 ||
4788
+ (local_len == targ_len &&
4789
+ strncmp(local_name, targ_name, local_len) == 0);
4790
+ }
4791
+ case BTF_KIND_INT:
4792
+ /* just reject deprecated bitfield-like integers; all other
4793
+ * integers are by default compatible between each other
4794
+ */
4795
+ return btf_int_offset(local_type) == 0 &&
4796
+ btf_int_offset(targ_type) == 0;
4797
+ case BTF_KIND_ARRAY:
4798
+ local_id = btf_array(local_type)->type;
4799
+ targ_id = btf_array(targ_type)->type;
4800
+ goto recur;
4801
+ default:
4802
+ pr_warn("unexpected kind %d relocated, local [%d], target [%d]\n",
4803
+ btf_kind(local_type), local_id, targ_id);
4804
+ return 0;
4805
+ }
4806
+}
12374807
1238
- if (insn_idx >= (int)prog->insns_cnt) {
1239
- pr_warning("relocation out of range: '%s'\n",
1240
- prog->section_name);
1241
- return -LIBBPF_ERRNO__RELOC;
4808
+/*
4809
+ * Given single high-level named field accessor in local type, find
4810
+ * corresponding high-level accessor for a target type. Along the way,
4811
+ * maintain low-level spec for target as well. Also keep updating target
4812
+ * bit offset.
4813
+ *
4814
+ * Searching is performed through recursive exhaustive enumeration of all
4815
+ * fields of a struct/union. If there are any anonymous (embedded)
4816
+ * structs/unions, they are recursively searched as well. If field with
4817
+ * desired name is found, check compatibility between local and target types,
4818
+ * before returning result.
4819
+ *
4820
+ * 1 is returned, if field is found.
4821
+ * 0 is returned if no compatible field is found.
4822
+ * <0 is returned on error.
4823
+ */
4824
+static int bpf_core_match_member(const struct btf *local_btf,
4825
+ const struct bpf_core_accessor *local_acc,
4826
+ const struct btf *targ_btf,
4827
+ __u32 targ_id,
4828
+ struct bpf_core_spec *spec,
4829
+ __u32 *next_targ_id)
4830
+{
4831
+ const struct btf_type *local_type, *targ_type;
4832
+ const struct btf_member *local_member, *m;
4833
+ const char *local_name, *targ_name;
4834
+ __u32 local_id;
4835
+ int i, n, found;
4836
+
4837
+ targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
4838
+ if (!targ_type)
4839
+ return -EINVAL;
4840
+ if (!btf_is_composite(targ_type))
4841
+ return 0;
4842
+
4843
+ local_id = local_acc->type_id;
4844
+ local_type = btf__type_by_id(local_btf, local_id);
4845
+ local_member = btf_members(local_type) + local_acc->idx;
4846
+ local_name = btf__name_by_offset(local_btf, local_member->name_off);
4847
+
4848
+ n = btf_vlen(targ_type);
4849
+ m = btf_members(targ_type);
4850
+ for (i = 0; i < n; i++, m++) {
4851
+ __u32 bit_offset;
4852
+
4853
+ bit_offset = btf_member_bit_offset(targ_type, i);
4854
+
4855
+ /* too deep struct/union/array nesting */
4856
+ if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
4857
+ return -E2BIG;
4858
+
4859
+ /* speculate this member will be the good one */
4860
+ spec->bit_offset += bit_offset;
4861
+ spec->raw_spec[spec->raw_len++] = i;
4862
+
4863
+ targ_name = btf__name_by_offset(targ_btf, m->name_off);
4864
+ if (str_is_empty(targ_name)) {
4865
+ /* embedded struct/union, we need to go deeper */
4866
+ found = bpf_core_match_member(local_btf, local_acc,
4867
+ targ_btf, m->type,
4868
+ spec, next_targ_id);
4869
+ if (found) /* either found or error */
4870
+ return found;
4871
+ } else if (strcmp(local_name, targ_name) == 0) {
4872
+ /* matching named field */
4873
+ struct bpf_core_accessor *targ_acc;
4874
+
4875
+ targ_acc = &spec->spec[spec->len++];
4876
+ targ_acc->type_id = targ_id;
4877
+ targ_acc->idx = i;
4878
+ targ_acc->name = targ_name;
4879
+
4880
+ *next_targ_id = m->type;
4881
+ found = bpf_core_fields_are_compat(local_btf,
4882
+ local_member->type,
4883
+ targ_btf, m->type);
4884
+ if (!found)
4885
+ spec->len--; /* pop accessor */
4886
+ return found;
4887
+ }
4888
+ /* member turned out not to be what we looked for */
4889
+ spec->bit_offset -= bit_offset;
4890
+ spec->raw_len--;
4891
+ }
4892
+
4893
+ return 0;
4894
+}
4895
+
4896
+/* Check local and target types for compatibility. This check is used for
4897
+ * type-based CO-RE relocations and follow slightly different rules than
4898
+ * field-based relocations. This function assumes that root types were already
4899
+ * checked for name match. Beyond that initial root-level name check, names
4900
+ * are completely ignored. Compatibility rules are as follows:
4901
+ * - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but
4902
+ * kind should match for local and target types (i.e., STRUCT is not
4903
+ * compatible with UNION);
4904
+ * - for ENUMs, the size is ignored;
4905
+ * - for INT, size and signedness are ignored;
4906
+ * - for ARRAY, dimensionality is ignored, element types are checked for
4907
+ * compatibility recursively;
4908
+ * - CONST/VOLATILE/RESTRICT modifiers are ignored;
4909
+ * - TYPEDEFs/PTRs are compatible if types they pointing to are compatible;
4910
+ * - FUNC_PROTOs are compatible if they have compatible signature: same
4911
+ * number of input args and compatible return and argument types.
4912
+ * These rules are not set in stone and probably will be adjusted as we get
4913
+ * more experience with using BPF CO-RE relocations.
4914
+ */
4915
+static int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
4916
+ const struct btf *targ_btf, __u32 targ_id)
4917
+{
4918
+ const struct btf_type *local_type, *targ_type;
4919
+ int depth = 32; /* max recursion depth */
4920
+
4921
+ /* caller made sure that names match (ignoring flavor suffix) */
4922
+ local_type = btf__type_by_id(local_btf, local_id);
4923
+ targ_type = btf__type_by_id(targ_btf, targ_id);
4924
+ if (btf_kind(local_type) != btf_kind(targ_type))
4925
+ return 0;
4926
+
4927
+recur:
4928
+ depth--;
4929
+ if (depth < 0)
4930
+ return -EINVAL;
4931
+
4932
+ local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
4933
+ targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
4934
+ if (!local_type || !targ_type)
4935
+ return -EINVAL;
4936
+
4937
+ if (btf_kind(local_type) != btf_kind(targ_type))
4938
+ return 0;
4939
+
4940
+ switch (btf_kind(local_type)) {
4941
+ case BTF_KIND_UNKN:
4942
+ case BTF_KIND_STRUCT:
4943
+ case BTF_KIND_UNION:
4944
+ case BTF_KIND_ENUM:
4945
+ case BTF_KIND_FWD:
4946
+ return 1;
4947
+ case BTF_KIND_INT:
4948
+ /* just reject deprecated bitfield-like integers; all other
4949
+ * integers are by default compatible between each other
4950
+ */
4951
+ return btf_int_offset(local_type) == 0 && btf_int_offset(targ_type) == 0;
4952
+ case BTF_KIND_PTR:
4953
+ local_id = local_type->type;
4954
+ targ_id = targ_type->type;
4955
+ goto recur;
4956
+ case BTF_KIND_ARRAY:
4957
+ local_id = btf_array(local_type)->type;
4958
+ targ_id = btf_array(targ_type)->type;
4959
+ goto recur;
4960
+ case BTF_KIND_FUNC_PROTO: {
4961
+ struct btf_param *local_p = btf_params(local_type);
4962
+ struct btf_param *targ_p = btf_params(targ_type);
4963
+ __u16 local_vlen = btf_vlen(local_type);
4964
+ __u16 targ_vlen = btf_vlen(targ_type);
4965
+ int i, err;
4966
+
4967
+ if (local_vlen != targ_vlen)
4968
+ return 0;
4969
+
4970
+ for (i = 0; i < local_vlen; i++, local_p++, targ_p++) {
4971
+ skip_mods_and_typedefs(local_btf, local_p->type, &local_id);
4972
+ skip_mods_and_typedefs(targ_btf, targ_p->type, &targ_id);
4973
+ err = bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id);
4974
+ if (err <= 0)
4975
+ return err;
4976
+ }
4977
+
4978
+ /* tail recurse for return type check */
4979
+ skip_mods_and_typedefs(local_btf, local_type->type, &local_id);
4980
+ skip_mods_and_typedefs(targ_btf, targ_type->type, &targ_id);
4981
+ goto recur;
4982
+ }
4983
+ default:
4984
+ pr_warn("unexpected kind %s relocated, local [%d], target [%d]\n",
4985
+ btf_kind_str(local_type), local_id, targ_id);
4986
+ return 0;
4987
+ }
4988
+}
4989
+
4990
+/*
4991
+ * Try to match local spec to a target type and, if successful, produce full
4992
+ * target spec (high-level, low-level + bit offset).
4993
+ */
4994
+static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
4995
+ const struct btf *targ_btf, __u32 targ_id,
4996
+ struct bpf_core_spec *targ_spec)
4997
+{
4998
+ const struct btf_type *targ_type;
4999
+ const struct bpf_core_accessor *local_acc;
5000
+ struct bpf_core_accessor *targ_acc;
5001
+ int i, sz, matched;
5002
+
5003
+ memset(targ_spec, 0, sizeof(*targ_spec));
5004
+ targ_spec->btf = targ_btf;
5005
+ targ_spec->root_type_id = targ_id;
5006
+ targ_spec->relo_kind = local_spec->relo_kind;
5007
+
5008
+ if (core_relo_is_type_based(local_spec->relo_kind)) {
5009
+ return bpf_core_types_are_compat(local_spec->btf,
5010
+ local_spec->root_type_id,
5011
+ targ_btf, targ_id);
5012
+ }
5013
+
5014
+ local_acc = &local_spec->spec[0];
5015
+ targ_acc = &targ_spec->spec[0];
5016
+
5017
+ if (core_relo_is_enumval_based(local_spec->relo_kind)) {
5018
+ size_t local_essent_len, targ_essent_len;
5019
+ const struct btf_enum *e;
5020
+ const char *targ_name;
5021
+
5022
+ /* has to resolve to an enum */
5023
+ targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, &targ_id);
5024
+ if (!btf_is_enum(targ_type))
5025
+ return 0;
5026
+
5027
+ local_essent_len = bpf_core_essential_name_len(local_acc->name);
5028
+
5029
+ for (i = 0, e = btf_enum(targ_type); i < btf_vlen(targ_type); i++, e++) {
5030
+ targ_name = btf__name_by_offset(targ_spec->btf, e->name_off);
5031
+ targ_essent_len = bpf_core_essential_name_len(targ_name);
5032
+ if (targ_essent_len != local_essent_len)
5033
+ continue;
5034
+ if (strncmp(local_acc->name, targ_name, local_essent_len) == 0) {
5035
+ targ_acc->type_id = targ_id;
5036
+ targ_acc->idx = i;
5037
+ targ_acc->name = targ_name;
5038
+ targ_spec->len++;
5039
+ targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
5040
+ targ_spec->raw_len++;
5041
+ return 1;
12425042 }
1243
- insns[insn_idx].src_reg = BPF_PSEUDO_MAP_FD;
1244
- insns[insn_idx].imm = obj->maps[map_idx].fd;
5043
+ }
5044
+ return 0;
5045
+ }
5046
+
5047
+ if (!core_relo_is_field_based(local_spec->relo_kind))
5048
+ return -EINVAL;
5049
+
5050
+ for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) {
5051
+ targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id,
5052
+ &targ_id);
5053
+ if (!targ_type)
5054
+ return -EINVAL;
5055
+
5056
+ if (local_acc->name) {
5057
+ matched = bpf_core_match_member(local_spec->btf,
5058
+ local_acc,
5059
+ targ_btf, targ_id,
5060
+ targ_spec, &targ_id);
5061
+ if (matched <= 0)
5062
+ return matched;
12455063 } else {
1246
- err = bpf_program__reloc_text(prog, obj,
1247
- &prog->reloc_desc[i]);
5064
+ /* for i=0, targ_id is already treated as array element
5065
+ * type (because it's the original struct), for others
5066
+ * we should find array element type first
5067
+ */
5068
+ if (i > 0) {
5069
+ const struct btf_array *a;
5070
+ bool flex;
5071
+
5072
+ if (!btf_is_array(targ_type))
5073
+ return 0;
5074
+
5075
+ a = btf_array(targ_type);
5076
+ flex = is_flex_arr(targ_btf, targ_acc - 1, a);
5077
+ if (!flex && local_acc->idx >= a->nelems)
5078
+ return 0;
5079
+ if (!skip_mods_and_typedefs(targ_btf, a->type,
5080
+ &targ_id))
5081
+ return -EINVAL;
5082
+ }
5083
+
5084
+ /* too deep struct/union/array nesting */
5085
+ if (targ_spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
5086
+ return -E2BIG;
5087
+
5088
+ targ_acc->type_id = targ_id;
5089
+ targ_acc->idx = local_acc->idx;
5090
+ targ_acc->name = NULL;
5091
+ targ_spec->len++;
5092
+ targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
5093
+ targ_spec->raw_len++;
5094
+
5095
+ sz = btf__resolve_size(targ_btf, targ_id);
5096
+ if (sz < 0)
5097
+ return sz;
5098
+ targ_spec->bit_offset += local_acc->idx * sz * 8;
5099
+ }
5100
+ }
5101
+
5102
+ return 1;
5103
+}
5104
+
5105
+static int bpf_core_calc_field_relo(const struct bpf_program *prog,
5106
+ const struct bpf_core_relo *relo,
5107
+ const struct bpf_core_spec *spec,
5108
+ __u32 *val, __u32 *field_sz, __u32 *type_id,
5109
+ bool *validate)
5110
+{
5111
+ const struct bpf_core_accessor *acc;
5112
+ const struct btf_type *t;
5113
+ __u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id;
5114
+ const struct btf_member *m;
5115
+ const struct btf_type *mt;
5116
+ bool bitfield;
5117
+ __s64 sz;
5118
+
5119
+ *field_sz = 0;
5120
+
5121
+ if (relo->kind == BPF_FIELD_EXISTS) {
5122
+ *val = spec ? 1 : 0;
5123
+ return 0;
5124
+ }
5125
+
5126
+ if (!spec)
5127
+ return -EUCLEAN; /* request instruction poisoning */
5128
+
5129
+ acc = &spec->spec[spec->len - 1];
5130
+ t = btf__type_by_id(spec->btf, acc->type_id);
5131
+
5132
+ /* a[n] accessor needs special handling */
5133
+ if (!acc->name) {
5134
+ if (relo->kind == BPF_FIELD_BYTE_OFFSET) {
5135
+ *val = spec->bit_offset / 8;
5136
+ /* remember field size for load/store mem size */
5137
+ sz = btf__resolve_size(spec->btf, acc->type_id);
5138
+ if (sz < 0)
5139
+ return -EINVAL;
5140
+ *field_sz = sz;
5141
+ *type_id = acc->type_id;
5142
+ } else if (relo->kind == BPF_FIELD_BYTE_SIZE) {
5143
+ sz = btf__resolve_size(spec->btf, acc->type_id);
5144
+ if (sz < 0)
5145
+ return -EINVAL;
5146
+ *val = sz;
5147
+ } else {
5148
+ pr_warn("prog '%s': relo %d at insn #%d can't be applied to array access\n",
5149
+ prog->name, relo->kind, relo->insn_off / 8);
5150
+ return -EINVAL;
5151
+ }
5152
+ if (validate)
5153
+ *validate = true;
5154
+ return 0;
5155
+ }
5156
+
5157
+ m = btf_members(t) + acc->idx;
5158
+ mt = skip_mods_and_typedefs(spec->btf, m->type, &field_type_id);
5159
+ bit_off = spec->bit_offset;
5160
+ bit_sz = btf_member_bitfield_size(t, acc->idx);
5161
+
5162
+ bitfield = bit_sz > 0;
5163
+ if (bitfield) {
5164
+ byte_sz = mt->size;
5165
+ byte_off = bit_off / 8 / byte_sz * byte_sz;
5166
+ /* figure out smallest int size necessary for bitfield load */
5167
+ while (bit_off + bit_sz - byte_off * 8 > byte_sz * 8) {
5168
+ if (byte_sz >= 8) {
5169
+ /* bitfield can't be read with 64-bit read */
5170
+ pr_warn("prog '%s': relo %d at insn #%d can't be satisfied for bitfield\n",
5171
+ prog->name, relo->kind, relo->insn_off / 8);
5172
+ return -E2BIG;
5173
+ }
5174
+ byte_sz *= 2;
5175
+ byte_off = bit_off / 8 / byte_sz * byte_sz;
5176
+ }
5177
+ } else {
5178
+ sz = btf__resolve_size(spec->btf, field_type_id);
5179
+ if (sz < 0)
5180
+ return -EINVAL;
5181
+ byte_sz = sz;
5182
+ byte_off = spec->bit_offset / 8;
5183
+ bit_sz = byte_sz * 8;
5184
+ }
5185
+
5186
+ /* for bitfields, all the relocatable aspects are ambiguous and we
5187
+ * might disagree with compiler, so turn off validation of expected
5188
+ * value, except for signedness
5189
+ */
5190
+ if (validate)
5191
+ *validate = !bitfield;
5192
+
5193
+ switch (relo->kind) {
5194
+ case BPF_FIELD_BYTE_OFFSET:
5195
+ *val = byte_off;
5196
+ if (!bitfield) {
5197
+ *field_sz = byte_sz;
5198
+ *type_id = field_type_id;
5199
+ }
5200
+ break;
5201
+ case BPF_FIELD_BYTE_SIZE:
5202
+ *val = byte_sz;
5203
+ break;
5204
+ case BPF_FIELD_SIGNED:
5205
+ /* enums will be assumed unsigned */
5206
+ *val = btf_is_enum(mt) ||
5207
+ (btf_int_encoding(mt) & BTF_INT_SIGNED);
5208
+ if (validate)
5209
+ *validate = true; /* signedness is never ambiguous */
5210
+ break;
5211
+ case BPF_FIELD_LSHIFT_U64:
5212
+#if __BYTE_ORDER == __LITTLE_ENDIAN
5213
+ *val = 64 - (bit_off + bit_sz - byte_off * 8);
5214
+#else
5215
+ *val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8);
5216
+#endif
5217
+ break;
5218
+ case BPF_FIELD_RSHIFT_U64:
5219
+ *val = 64 - bit_sz;
5220
+ if (validate)
5221
+ *validate = true; /* right shift is never ambiguous */
5222
+ break;
5223
+ case BPF_FIELD_EXISTS:
5224
+ default:
5225
+ return -EOPNOTSUPP;
5226
+ }
5227
+
5228
+ return 0;
5229
+}
5230
+
5231
+static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo,
5232
+ const struct bpf_core_spec *spec,
5233
+ __u32 *val)
5234
+{
5235
+ __s64 sz;
5236
+
5237
+ /* type-based relos return zero when target type is not found */
5238
+ if (!spec) {
5239
+ *val = 0;
5240
+ return 0;
5241
+ }
5242
+
5243
+ switch (relo->kind) {
5244
+ case BPF_TYPE_ID_TARGET:
5245
+ *val = spec->root_type_id;
5246
+ break;
5247
+ case BPF_TYPE_EXISTS:
5248
+ *val = 1;
5249
+ break;
5250
+ case BPF_TYPE_SIZE:
5251
+ sz = btf__resolve_size(spec->btf, spec->root_type_id);
5252
+ if (sz < 0)
5253
+ return -EINVAL;
5254
+ *val = sz;
5255
+ break;
5256
+ case BPF_TYPE_ID_LOCAL:
5257
+ /* BPF_TYPE_ID_LOCAL is handled specially and shouldn't get here */
5258
+ default:
5259
+ return -EOPNOTSUPP;
5260
+ }
5261
+
5262
+ return 0;
5263
+}
5264
+
5265
+static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo,
5266
+ const struct bpf_core_spec *spec,
5267
+ __u32 *val)
5268
+{
5269
+ const struct btf_type *t;
5270
+ const struct btf_enum *e;
5271
+
5272
+ switch (relo->kind) {
5273
+ case BPF_ENUMVAL_EXISTS:
5274
+ *val = spec ? 1 : 0;
5275
+ break;
5276
+ case BPF_ENUMVAL_VALUE:
5277
+ if (!spec)
5278
+ return -EUCLEAN; /* request instruction poisoning */
5279
+ t = btf__type_by_id(spec->btf, spec->spec[0].type_id);
5280
+ e = btf_enum(t) + spec->spec[0].idx;
5281
+ *val = e->val;
5282
+ break;
5283
+ default:
5284
+ return -EOPNOTSUPP;
5285
+ }
5286
+
5287
+ return 0;
5288
+}
5289
+
5290
+struct bpf_core_relo_res
5291
+{
5292
+ /* expected value in the instruction, unless validate == false */
5293
+ __u32 orig_val;
5294
+ /* new value that needs to be patched up to */
5295
+ __u32 new_val;
5296
+ /* relocation unsuccessful, poison instruction, but don't fail load */
5297
+ bool poison;
5298
+ /* some relocations can't be validated against orig_val */
5299
+ bool validate;
5300
+ /* for field byte offset relocations or the forms:
5301
+ * *(T *)(rX + <off>) = rY
5302
+ * rX = *(T *)(rY + <off>),
5303
+ * we remember original and resolved field size to adjust direct
5304
+ * memory loads of pointers and integers; this is necessary for 32-bit
5305
+ * host kernel architectures, but also allows to automatically
5306
+ * relocate fields that were resized from, e.g., u32 to u64, etc.
5307
+ */
5308
+ bool fail_memsz_adjust;
5309
+ __u32 orig_sz;
5310
+ __u32 orig_type_id;
5311
+ __u32 new_sz;
5312
+ __u32 new_type_id;
5313
+};
5314
+
5315
+/* Calculate original and target relocation values, given local and target
5316
+ * specs and relocation kind. These values are calculated for each candidate.
5317
+ * If there are multiple candidates, resulting values should all be consistent
5318
+ * with each other. Otherwise, libbpf will refuse to proceed due to ambiguity.
5319
+ * If instruction has to be poisoned, *poison will be set to true.
5320
+ */
5321
+static int bpf_core_calc_relo(const struct bpf_program *prog,
5322
+ const struct bpf_core_relo *relo,
5323
+ int relo_idx,
5324
+ const struct bpf_core_spec *local_spec,
5325
+ const struct bpf_core_spec *targ_spec,
5326
+ struct bpf_core_relo_res *res)
5327
+{
5328
+ int err = -EOPNOTSUPP;
5329
+
5330
+ res->orig_val = 0;
5331
+ res->new_val = 0;
5332
+ res->poison = false;
5333
+ res->validate = true;
5334
+ res->fail_memsz_adjust = false;
5335
+ res->orig_sz = res->new_sz = 0;
5336
+ res->orig_type_id = res->new_type_id = 0;
5337
+
5338
+ if (core_relo_is_field_based(relo->kind)) {
5339
+ err = bpf_core_calc_field_relo(prog, relo, local_spec,
5340
+ &res->orig_val, &res->orig_sz,
5341
+ &res->orig_type_id, &res->validate);
5342
+ err = err ?: bpf_core_calc_field_relo(prog, relo, targ_spec,
5343
+ &res->new_val, &res->new_sz,
5344
+ &res->new_type_id, NULL);
5345
+ if (err)
5346
+ goto done;
5347
+ /* Validate if it's safe to adjust load/store memory size.
5348
+ * Adjustments are performed only if original and new memory
5349
+ * sizes differ.
5350
+ */
5351
+ res->fail_memsz_adjust = false;
5352
+ if (res->orig_sz != res->new_sz) {
5353
+ const struct btf_type *orig_t, *new_t;
5354
+
5355
+ orig_t = btf__type_by_id(local_spec->btf, res->orig_type_id);
5356
+ new_t = btf__type_by_id(targ_spec->btf, res->new_type_id);
5357
+
5358
+ /* There are two use cases in which it's safe to
5359
+ * adjust load/store's mem size:
5360
+ * - reading a 32-bit kernel pointer, while on BPF
5361
+ * size pointers are always 64-bit; in this case
5362
+ * it's safe to "downsize" instruction size due to
5363
+ * pointer being treated as unsigned integer with
5364
+ * zero-extended upper 32-bits;
5365
+ * - reading unsigned integers, again due to
5366
+ * zero-extension is preserving the value correctly.
5367
+ *
5368
+ * In all other cases it's incorrect to attempt to
5369
+ * load/store field because read value will be
5370
+ * incorrect, so we poison relocated instruction.
5371
+ */
5372
+ if (btf_is_ptr(orig_t) && btf_is_ptr(new_t))
5373
+ goto done;
5374
+ if (btf_is_int(orig_t) && btf_is_int(new_t) &&
5375
+ btf_int_encoding(orig_t) != BTF_INT_SIGNED &&
5376
+ btf_int_encoding(new_t) != BTF_INT_SIGNED)
5377
+ goto done;
5378
+
5379
+ /* mark as invalid mem size adjustment, but this will
5380
+ * only be checked for LDX/STX/ST insns
5381
+ */
5382
+ res->fail_memsz_adjust = true;
5383
+ }
5384
+ } else if (core_relo_is_type_based(relo->kind)) {
5385
+ err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val);
5386
+ err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val);
5387
+ } else if (core_relo_is_enumval_based(relo->kind)) {
5388
+ err = bpf_core_calc_enumval_relo(relo, local_spec, &res->orig_val);
5389
+ err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val);
5390
+ }
5391
+
5392
+done:
5393
+ if (err == -EUCLEAN) {
5394
+ /* EUCLEAN is used to signal instruction poisoning request */
5395
+ res->poison = true;
5396
+ err = 0;
5397
+ } else if (err == -EOPNOTSUPP) {
5398
+ /* EOPNOTSUPP means unknown/unsupported relocation */
5399
+ pr_warn("prog '%s': relo #%d: unrecognized CO-RE relocation %s (%d) at insn #%d\n",
5400
+ prog->name, relo_idx, core_relo_kind_str(relo->kind),
5401
+ relo->kind, relo->insn_off / 8);
5402
+ }
5403
+
5404
+ return err;
5405
+}
5406
+
5407
+/*
5408
+ * Turn instruction for which CO_RE relocation failed into invalid one with
5409
+ * distinct signature.
5410
+ */
5411
+static void bpf_core_poison_insn(struct bpf_program *prog, int relo_idx,
5412
+ int insn_idx, struct bpf_insn *insn)
5413
+{
5414
+ pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n",
5415
+ prog->name, relo_idx, insn_idx);
5416
+ insn->code = BPF_JMP | BPF_CALL;
5417
+ insn->dst_reg = 0;
5418
+ insn->src_reg = 0;
5419
+ insn->off = 0;
5420
+ /* if this instruction is reachable (not a dead code),
5421
+ * verifier will complain with the following message:
5422
+ * invalid func unknown#195896080
5423
+ */
5424
+ insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */
5425
+}
5426
+
5427
+static bool is_ldimm64(struct bpf_insn *insn)
5428
+{
5429
+ return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
5430
+}
5431
+
5432
+static int insn_bpf_size_to_bytes(struct bpf_insn *insn)
5433
+{
5434
+ switch (BPF_SIZE(insn->code)) {
5435
+ case BPF_DW: return 8;
5436
+ case BPF_W: return 4;
5437
+ case BPF_H: return 2;
5438
+ case BPF_B: return 1;
5439
+ default: return -1;
5440
+ }
5441
+}
5442
+
5443
+static int insn_bytes_to_bpf_size(__u32 sz)
5444
+{
5445
+ switch (sz) {
5446
+ case 8: return BPF_DW;
5447
+ case 4: return BPF_W;
5448
+ case 2: return BPF_H;
5449
+ case 1: return BPF_B;
5450
+ default: return -1;
5451
+ }
5452
+}
5453
+
5454
+/*
5455
+ * Patch relocatable BPF instruction.
5456
+ *
5457
+ * Patched value is determined by relocation kind and target specification.
5458
+ * For existence relocations target spec will be NULL if field/type is not found.
5459
+ * Expected insn->imm value is determined using relocation kind and local
5460
+ * spec, and is checked before patching instruction. If actual insn->imm value
5461
+ * is wrong, bail out with error.
5462
+ *
5463
+ * Currently supported classes of BPF instruction are:
5464
+ * 1. rX = <imm> (assignment with immediate operand);
5465
+ * 2. rX += <imm> (arithmetic operations with immediate operand);
5466
+ * 3. rX = <imm64> (load with 64-bit immediate value);
5467
+ * 4. rX = *(T *)(rY + <off>), where T is one of {u8, u16, u32, u64};
5468
+ * 5. *(T *)(rX + <off>) = rY, where T is one of {u8, u16, u32, u64};
5469
+ * 6. *(T *)(rX + <off>) = <imm>, where T is one of {u8, u16, u32, u64}.
5470
+ */
5471
+static int bpf_core_patch_insn(struct bpf_program *prog,
5472
+ const struct bpf_core_relo *relo,
5473
+ int relo_idx,
5474
+ const struct bpf_core_relo_res *res)
5475
+{
5476
+ __u32 orig_val, new_val;
5477
+ struct bpf_insn *insn;
5478
+ int insn_idx;
5479
+ __u8 class;
5480
+
5481
+ if (relo->insn_off % BPF_INSN_SZ)
5482
+ return -EINVAL;
5483
+ insn_idx = relo->insn_off / BPF_INSN_SZ;
5484
+ /* adjust insn_idx from section frame of reference to the local
5485
+ * program's frame of reference; (sub-)program code is not yet
5486
+ * relocated, so it's enough to just subtract in-section offset
5487
+ */
5488
+ insn_idx = insn_idx - prog->sec_insn_off;
5489
+ insn = &prog->insns[insn_idx];
5490
+ class = BPF_CLASS(insn->code);
5491
+
5492
+ if (res->poison) {
5493
+poison:
5494
+ /* poison second part of ldimm64 to avoid confusing error from
5495
+ * verifier about "unknown opcode 00"
5496
+ */
5497
+ if (is_ldimm64(insn))
5498
+ bpf_core_poison_insn(prog, relo_idx, insn_idx + 1, insn + 1);
5499
+ bpf_core_poison_insn(prog, relo_idx, insn_idx, insn);
5500
+ return 0;
5501
+ }
5502
+
5503
+ orig_val = res->orig_val;
5504
+ new_val = res->new_val;
5505
+
5506
+ switch (class) {
5507
+ case BPF_ALU:
5508
+ case BPF_ALU64:
5509
+ if (BPF_SRC(insn->code) != BPF_K)
5510
+ return -EINVAL;
5511
+ if (res->validate && insn->imm != orig_val) {
5512
+ pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n",
5513
+ prog->name, relo_idx,
5514
+ insn_idx, insn->imm, orig_val, new_val);
5515
+ return -EINVAL;
5516
+ }
5517
+ orig_val = insn->imm;
5518
+ insn->imm = new_val;
5519
+ pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %u -> %u\n",
5520
+ prog->name, relo_idx, insn_idx,
5521
+ orig_val, new_val);
5522
+ break;
5523
+ case BPF_LDX:
5524
+ case BPF_ST:
5525
+ case BPF_STX:
5526
+ if (res->validate && insn->off != orig_val) {
5527
+ pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %u -> %u\n",
5528
+ prog->name, relo_idx, insn_idx, insn->off, orig_val, new_val);
5529
+ return -EINVAL;
5530
+ }
5531
+ if (new_val > SHRT_MAX) {
5532
+ pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %u\n",
5533
+ prog->name, relo_idx, insn_idx, new_val);
5534
+ return -ERANGE;
5535
+ }
5536
+ if (res->fail_memsz_adjust) {
5537
+ pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) accesses field incorrectly. "
5538
+ "Make sure you are accessing pointers, unsigned integers, or fields of matching type and size.\n",
5539
+ prog->name, relo_idx, insn_idx);
5540
+ goto poison;
5541
+ }
5542
+
5543
+ orig_val = insn->off;
5544
+ insn->off = new_val;
5545
+ pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n",
5546
+ prog->name, relo_idx, insn_idx, orig_val, new_val);
5547
+
5548
+ if (res->new_sz != res->orig_sz) {
5549
+ int insn_bytes_sz, insn_bpf_sz;
5550
+
5551
+ insn_bytes_sz = insn_bpf_size_to_bytes(insn);
5552
+ if (insn_bytes_sz != res->orig_sz) {
5553
+ pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) unexpected mem size: got %d, exp %u\n",
5554
+ prog->name, relo_idx, insn_idx, insn_bytes_sz, res->orig_sz);
5555
+ return -EINVAL;
5556
+ }
5557
+
5558
+ insn_bpf_sz = insn_bytes_to_bpf_size(res->new_sz);
5559
+ if (insn_bpf_sz < 0) {
5560
+ pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) invalid new mem size: %u\n",
5561
+ prog->name, relo_idx, insn_idx, res->new_sz);
5562
+ return -EINVAL;
5563
+ }
5564
+
5565
+ insn->code = BPF_MODE(insn->code) | insn_bpf_sz | BPF_CLASS(insn->code);
5566
+ pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) mem_sz %u -> %u\n",
5567
+ prog->name, relo_idx, insn_idx, res->orig_sz, res->new_sz);
5568
+ }
5569
+ break;
5570
+ case BPF_LD: {
5571
+ __u64 imm;
5572
+
5573
+ if (!is_ldimm64(insn) ||
5574
+ insn[0].src_reg != 0 || insn[0].off != 0 ||
5575
+ insn_idx + 1 >= prog->insns_cnt ||
5576
+ insn[1].code != 0 || insn[1].dst_reg != 0 ||
5577
+ insn[1].src_reg != 0 || insn[1].off != 0) {
5578
+ pr_warn("prog '%s': relo #%d: insn #%d (LDIMM64) has unexpected form\n",
5579
+ prog->name, relo_idx, insn_idx);
5580
+ return -EINVAL;
5581
+ }
5582
+
5583
+ imm = insn[0].imm + ((__u64)insn[1].imm << 32);
5584
+ if (res->validate && imm != orig_val) {
5585
+ pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %u -> %u\n",
5586
+ prog->name, relo_idx,
5587
+ insn_idx, (unsigned long long)imm,
5588
+ orig_val, new_val);
5589
+ return -EINVAL;
5590
+ }
5591
+
5592
+ insn[0].imm = new_val;
5593
+ insn[1].imm = 0; /* currently only 32-bit values are supported */
5594
+ pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %u\n",
5595
+ prog->name, relo_idx, insn_idx,
5596
+ (unsigned long long)imm, new_val);
5597
+ break;
5598
+ }
5599
+ default:
5600
+ pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:0x%x, src:0x%x, dst:0x%x, off:0x%x, imm:0x%x\n",
5601
+ prog->name, relo_idx, insn_idx, insn->code,
5602
+ insn->src_reg, insn->dst_reg, insn->off, insn->imm);
5603
+ return -EINVAL;
5604
+ }
5605
+
5606
+ return 0;
5607
+}
5608
+
5609
+/* Output spec definition in the format:
5610
+ * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>,
5611
+ * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b
5612
+ */
5613
+static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec)
5614
+{
5615
+ const struct btf_type *t;
5616
+ const struct btf_enum *e;
5617
+ const char *s;
5618
+ __u32 type_id;
5619
+ int i;
5620
+
5621
+ type_id = spec->root_type_id;
5622
+ t = btf__type_by_id(spec->btf, type_id);
5623
+ s = btf__name_by_offset(spec->btf, t->name_off);
5624
+
5625
+ libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s);
5626
+
5627
+ if (core_relo_is_type_based(spec->relo_kind))
5628
+ return;
5629
+
5630
+ if (core_relo_is_enumval_based(spec->relo_kind)) {
5631
+ t = skip_mods_and_typedefs(spec->btf, type_id, NULL);
5632
+ e = btf_enum(t) + spec->raw_spec[0];
5633
+ s = btf__name_by_offset(spec->btf, e->name_off);
5634
+
5635
+ libbpf_print(level, "::%s = %u", s, e->val);
5636
+ return;
5637
+ }
5638
+
5639
+ if (core_relo_is_field_based(spec->relo_kind)) {
5640
+ for (i = 0; i < spec->len; i++) {
5641
+ if (spec->spec[i].name)
5642
+ libbpf_print(level, ".%s", spec->spec[i].name);
5643
+ else if (i > 0 || spec->spec[i].idx > 0)
5644
+ libbpf_print(level, "[%u]", spec->spec[i].idx);
5645
+ }
5646
+
5647
+ libbpf_print(level, " (");
5648
+ for (i = 0; i < spec->raw_len; i++)
5649
+ libbpf_print(level, "%s%d", i == 0 ? "" : ":", spec->raw_spec[i]);
5650
+
5651
+ if (spec->bit_offset % 8)
5652
+ libbpf_print(level, " @ offset %u.%u)",
5653
+ spec->bit_offset / 8, spec->bit_offset % 8);
5654
+ else
5655
+ libbpf_print(level, " @ offset %u)", spec->bit_offset / 8);
5656
+ return;
5657
+ }
5658
+}
5659
+
5660
+static size_t bpf_core_hash_fn(const void *key, void *ctx)
5661
+{
5662
+ return (size_t)key;
5663
+}
5664
+
5665
+static bool bpf_core_equal_fn(const void *k1, const void *k2, void *ctx)
5666
+{
5667
+ return k1 == k2;
5668
+}
5669
+
5670
+static void *u32_as_hash_key(__u32 x)
5671
+{
5672
+ return (void *)(uintptr_t)x;
5673
+}
5674
+
5675
+/*
5676
+ * CO-RE relocate single instruction.
5677
+ *
5678
+ * The outline and important points of the algorithm:
5679
+ * 1. For given local type, find corresponding candidate target types.
5680
+ * Candidate type is a type with the same "essential" name, ignoring
5681
+ * everything after last triple underscore (___). E.g., `sample`,
5682
+ * `sample___flavor_one`, `sample___flavor_another_one`, are all candidates
5683
+ * for each other. Names with triple underscore are referred to as
5684
+ * "flavors" and are useful, among other things, to allow to
5685
+ * specify/support incompatible variations of the same kernel struct, which
5686
+ * might differ between different kernel versions and/or build
5687
+ * configurations.
5688
+ *
5689
+ * N.B. Struct "flavors" could be generated by bpftool's BTF-to-C
5690
+ * converter, when deduplicated BTF of a kernel still contains more than
5691
+ * one different types with the same name. In that case, ___2, ___3, etc
5692
+ * are appended starting from second name conflict. But start flavors are
5693
+ * also useful to be defined "locally", in BPF program, to extract same
5694
+ * data from incompatible changes between different kernel
5695
+ * versions/configurations. For instance, to handle field renames between
5696
+ * kernel versions, one can use two flavors of the struct name with the
5697
+ * same common name and use conditional relocations to extract that field,
5698
+ * depending on target kernel version.
5699
+ * 2. For each candidate type, try to match local specification to this
5700
+ * candidate target type. Matching involves finding corresponding
5701
+ * high-level spec accessors, meaning that all named fields should match,
5702
+ * as well as all array accesses should be within the actual bounds. Also,
5703
+ * types should be compatible (see bpf_core_fields_are_compat for details).
5704
+ * 3. It is supported and expected that there might be multiple flavors
5705
+ * matching the spec. As long as all the specs resolve to the same set of
5706
+ * offsets across all candidates, there is no error. If there is any
5707
+ * ambiguity, CO-RE relocation will fail. This is necessary to accomodate
5708
+ * imprefection of BTF deduplication, which can cause slight duplication of
5709
+ * the same BTF type, if some directly or indirectly referenced (by
5710
+ * pointer) type gets resolved to different actual types in different
5711
+ * object files. If such situation occurs, deduplicated BTF will end up
5712
+ * with two (or more) structurally identical types, which differ only in
5713
+ * types they refer to through pointer. This should be OK in most cases and
5714
+ * is not an error.
5715
+ * 4. Candidate types search is performed by linearly scanning through all
5716
+ * types in target BTF. It is anticipated that this is overall more
5717
+ * efficient memory-wise and not significantly worse (if not better)
5718
+ * CPU-wise compared to prebuilding a map from all local type names to
5719
+ * a list of candidate type names. It's also sped up by caching resolved
5720
+ * list of matching candidates per each local "root" type ID, that has at
5721
+ * least one bpf_core_relo associated with it. This list is shared
5722
+ * between multiple relocations for the same type ID and is updated as some
5723
+ * of the candidates are pruned due to structural incompatibility.
5724
+ */
5725
+static int bpf_core_apply_relo(struct bpf_program *prog,
5726
+ const struct bpf_core_relo *relo,
5727
+ int relo_idx,
5728
+ const struct btf *local_btf,
5729
+ const struct btf *targ_btf,
5730
+ struct hashmap *cand_cache)
5731
+{
5732
+ struct bpf_core_spec local_spec, cand_spec, targ_spec = {};
5733
+ const void *type_key = u32_as_hash_key(relo->type_id);
5734
+ struct bpf_core_relo_res cand_res, targ_res;
5735
+ const struct btf_type *local_type;
5736
+ const char *local_name;
5737
+ struct ids_vec *cand_ids;
5738
+ __u32 local_id, cand_id;
5739
+ const char *spec_str;
5740
+ int i, j, err;
5741
+
5742
+ local_id = relo->type_id;
5743
+ local_type = btf__type_by_id(local_btf, local_id);
5744
+ if (!local_type)
5745
+ return -EINVAL;
5746
+
5747
+ local_name = btf__name_by_offset(local_btf, local_type->name_off);
5748
+ if (!local_name)
5749
+ return -EINVAL;
5750
+
5751
+ spec_str = btf__name_by_offset(local_btf, relo->access_str_off);
5752
+ if (str_is_empty(spec_str))
5753
+ return -EINVAL;
5754
+
5755
+ err = bpf_core_parse_spec(local_btf, local_id, spec_str, relo->kind, &local_spec);
5756
+ if (err) {
5757
+ pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n",
5758
+ prog->name, relo_idx, local_id, btf_kind_str(local_type),
5759
+ str_is_empty(local_name) ? "<anon>" : local_name,
5760
+ spec_str, err);
5761
+ return -EINVAL;
5762
+ }
5763
+
5764
+ pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog->name,
5765
+ relo_idx, core_relo_kind_str(relo->kind), relo->kind);
5766
+ bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec);
5767
+ libbpf_print(LIBBPF_DEBUG, "\n");
5768
+
5769
+ /* TYPE_ID_LOCAL relo is special and doesn't need candidate search */
5770
+ if (relo->kind == BPF_TYPE_ID_LOCAL) {
5771
+ targ_res.validate = true;
5772
+ targ_res.poison = false;
5773
+ targ_res.orig_val = local_spec.root_type_id;
5774
+ targ_res.new_val = local_spec.root_type_id;
5775
+ goto patch_insn;
5776
+ }
5777
+
5778
+ /* libbpf doesn't support candidate search for anonymous types */
5779
+ if (str_is_empty(spec_str)) {
5780
+ pr_warn("prog '%s': relo #%d: <%s> (%d) relocation doesn't support anonymous types\n",
5781
+ prog->name, relo_idx, core_relo_kind_str(relo->kind), relo->kind);
5782
+ return -EOPNOTSUPP;
5783
+ }
5784
+
5785
+ if (!hashmap__find(cand_cache, type_key, (void **)&cand_ids)) {
5786
+ cand_ids = bpf_core_find_cands(local_btf, local_id, targ_btf);
5787
+ if (IS_ERR(cand_ids)) {
5788
+ pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld",
5789
+ prog->name, relo_idx, local_id, btf_kind_str(local_type),
5790
+ local_name, PTR_ERR(cand_ids));
5791
+ return PTR_ERR(cand_ids);
5792
+ }
5793
+ err = hashmap__set(cand_cache, type_key, cand_ids, NULL, NULL);
5794
+ if (err) {
5795
+ bpf_core_free_cands(cand_ids);
5796
+ return err;
5797
+ }
5798
+ }
5799
+
5800
+ for (i = 0, j = 0; i < cand_ids->len; i++) {
5801
+ cand_id = cand_ids->data[i];
5802
+ err = bpf_core_spec_match(&local_spec, targ_btf, cand_id, &cand_spec);
5803
+ if (err < 0) {
5804
+ pr_warn("prog '%s': relo #%d: error matching candidate #%d ",
5805
+ prog->name, relo_idx, i);
5806
+ bpf_core_dump_spec(LIBBPF_WARN, &cand_spec);
5807
+ libbpf_print(LIBBPF_WARN, ": %d\n", err);
5808
+ return err;
5809
+ }
5810
+
5811
+ pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog->name,
5812
+ relo_idx, err == 0 ? "non-matching" : "matching", i);
5813
+ bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec);
5814
+ libbpf_print(LIBBPF_DEBUG, "\n");
5815
+
5816
+ if (err == 0)
5817
+ continue;
5818
+
5819
+ err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, &cand_spec, &cand_res);
5820
+ if (err)
5821
+ return err;
5822
+
5823
+ if (j == 0) {
5824
+ targ_res = cand_res;
5825
+ targ_spec = cand_spec;
5826
+ } else if (cand_spec.bit_offset != targ_spec.bit_offset) {
5827
+ /* if there are many field relo candidates, they
5828
+ * should all resolve to the same bit offset
5829
+ */
5830
+ pr_warn("prog '%s': relo #%d: field offset ambiguity: %u != %u\n",
5831
+ prog->name, relo_idx, cand_spec.bit_offset,
5832
+ targ_spec.bit_offset);
5833
+ return -EINVAL;
5834
+ } else if (cand_res.poison != targ_res.poison || cand_res.new_val != targ_res.new_val) {
5835
+ /* all candidates should result in the same relocation
5836
+ * decision and value, otherwise it's dangerous to
5837
+ * proceed due to ambiguity
5838
+ */
5839
+ pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %u != %s %u\n",
5840
+ prog->name, relo_idx,
5841
+ cand_res.poison ? "failure" : "success", cand_res.new_val,
5842
+ targ_res.poison ? "failure" : "success", targ_res.new_val);
5843
+ return -EINVAL;
5844
+ }
5845
+
5846
+ cand_ids->data[j++] = cand_spec.root_type_id;
5847
+ }
5848
+
5849
+ /*
5850
+ * For BPF_FIELD_EXISTS relo or when used BPF program has field
5851
+ * existence checks or kernel version/config checks, it's expected
5852
+ * that we might not find any candidates. In this case, if field
5853
+ * wasn't found in any candidate, the list of candidates shouldn't
5854
+ * change at all, we'll just handle relocating appropriately,
5855
+ * depending on relo's kind.
5856
+ */
5857
+ if (j > 0)
5858
+ cand_ids->len = j;
5859
+
5860
+ /*
5861
+ * If no candidates were found, it might be both a programmer error,
5862
+ * as well as expected case, depending whether instruction w/
5863
+ * relocation is guarded in some way that makes it unreachable (dead
5864
+ * code) if relocation can't be resolved. This is handled in
5865
+ * bpf_core_patch_insn() uniformly by replacing that instruction with
5866
+ * BPF helper call insn (using invalid helper ID). If that instruction
5867
+ * is indeed unreachable, then it will be ignored and eliminated by
5868
+ * verifier. If it was an error, then verifier will complain and point
5869
+ * to a specific instruction number in its log.
5870
+ */
5871
+ if (j == 0) {
5872
+ pr_debug("prog '%s': relo #%d: no matching targets found\n",
5873
+ prog->name, relo_idx);
5874
+
5875
+ /* calculate single target relo result explicitly */
5876
+ err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, NULL, &targ_res);
5877
+ if (err)
5878
+ return err;
5879
+ }
5880
+
5881
+patch_insn:
5882
+ /* bpf_core_patch_insn() should know how to handle missing targ_spec */
5883
+ err = bpf_core_patch_insn(prog, relo, relo_idx, &targ_res);
5884
+ if (err) {
5885
+ pr_warn("prog '%s': relo #%d: failed to patch insn at offset %d: %d\n",
5886
+ prog->name, relo_idx, relo->insn_off, err);
5887
+ return -EINVAL;
5888
+ }
5889
+
5890
+ return 0;
5891
+}
5892
+
5893
+static int
5894
+bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
5895
+{
5896
+ const struct btf_ext_info_sec *sec;
5897
+ const struct bpf_core_relo *rec;
5898
+ const struct btf_ext_info *seg;
5899
+ struct hashmap_entry *entry;
5900
+ struct hashmap *cand_cache = NULL;
5901
+ struct bpf_program *prog;
5902
+ struct btf *targ_btf;
5903
+ const char *sec_name;
5904
+ int i, err = 0, insn_idx, sec_idx;
5905
+
5906
+ if (obj->btf_ext->core_relo_info.len == 0)
5907
+ return 0;
5908
+
5909
+ if (targ_btf_path)
5910
+ targ_btf = btf__parse(targ_btf_path, NULL);
5911
+ else
5912
+ targ_btf = obj->btf_vmlinux;
5913
+ if (IS_ERR_OR_NULL(targ_btf)) {
5914
+ pr_warn("failed to get target BTF: %ld\n", PTR_ERR(targ_btf));
5915
+ return PTR_ERR(targ_btf);
5916
+ }
5917
+
5918
+ cand_cache = hashmap__new(bpf_core_hash_fn, bpf_core_equal_fn, NULL);
5919
+ if (IS_ERR(cand_cache)) {
5920
+ err = PTR_ERR(cand_cache);
5921
+ goto out;
5922
+ }
5923
+
5924
+ seg = &obj->btf_ext->core_relo_info;
5925
+ for_each_btf_ext_sec(seg, sec) {
5926
+ sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
5927
+ if (str_is_empty(sec_name)) {
5928
+ err = -EINVAL;
5929
+ goto out;
5930
+ }
5931
+ /* bpf_object's ELF is gone by now so it's not easy to find
5932
+ * section index by section name, but we can find *any*
5933
+ * bpf_program within desired section name and use it's
5934
+ * prog->sec_idx to do a proper search by section index and
5935
+ * instruction offset
5936
+ */
5937
+ prog = NULL;
5938
+ for (i = 0; i < obj->nr_programs; i++) {
5939
+ if (strcmp(obj->programs[i].sec_name, sec_name) == 0) {
5940
+ prog = &obj->programs[i];
5941
+ break;
5942
+ }
5943
+ }
5944
+ if (!prog) {
5945
+ pr_warn("sec '%s': failed to find a BPF program\n", sec_name);
5946
+ return -ENOENT;
5947
+ }
5948
+ sec_idx = prog->sec_idx;
5949
+
5950
+ pr_debug("sec '%s': found %d CO-RE relocations\n",
5951
+ sec_name, sec->num_info);
5952
+
5953
+ for_each_btf_ext_rec(seg, sec, i, rec) {
5954
+ insn_idx = rec->insn_off / BPF_INSN_SZ;
5955
+ prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
5956
+ if (!prog) {
5957
+ /* When __weak subprog is "overridden" by another instance
5958
+ * of the subprog from a different object file, linker still
5959
+ * appends all the .BTF.ext info that used to belong to that
5960
+ * eliminated subprogram.
5961
+ * This is similar to what x86-64 linker does for relocations.
5962
+ * So just ignore such relocations just like we ignore
5963
+ * subprog instructions when discovering subprograms.
5964
+ */
5965
+ pr_debug("sec '%s': skipping CO-RE relocation #%d for insn #%d belonging to eliminated weak subprogram\n",
5966
+ sec_name, i, insn_idx);
5967
+ continue;
5968
+ }
5969
+ /* no need to apply CO-RE relocation if the program is
5970
+ * not going to be loaded
5971
+ */
5972
+ if (!prog->load)
5973
+ continue;
5974
+
5975
+ err = bpf_core_apply_relo(prog, rec, i, obj->btf,
5976
+ targ_btf, cand_cache);
5977
+ if (err) {
5978
+ pr_warn("prog '%s': relo #%d: failed to relocate: %d\n",
5979
+ prog->name, i, err);
5980
+ goto out;
5981
+ }
5982
+ }
5983
+ }
5984
+
5985
+out:
5986
+ /* obj->btf_vmlinux is freed at the end of object load phase */
5987
+ if (targ_btf != obj->btf_vmlinux)
5988
+ btf__free(targ_btf);
5989
+ if (!IS_ERR_OR_NULL(cand_cache)) {
5990
+ hashmap__for_each_entry(cand_cache, entry, i) {
5991
+ bpf_core_free_cands(entry->value);
5992
+ }
5993
+ hashmap__free(cand_cache);
5994
+ }
5995
+ return err;
5996
+}
5997
+
5998
+/* Relocate data references within program code:
5999
+ * - map references;
6000
+ * - global variable references;
6001
+ * - extern references.
6002
+ */
6003
+static int
6004
+bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
6005
+{
6006
+ int i;
6007
+
6008
+ for (i = 0; i < prog->nr_reloc; i++) {
6009
+ struct reloc_desc *relo = &prog->reloc_desc[i];
6010
+ struct bpf_insn *insn = &prog->insns[relo->insn_idx];
6011
+ struct extern_desc *ext;
6012
+
6013
+ switch (relo->type) {
6014
+ case RELO_LD64:
6015
+ insn[0].src_reg = BPF_PSEUDO_MAP_FD;
6016
+ insn[0].imm = obj->maps[relo->map_idx].fd;
6017
+ relo->processed = true;
6018
+ break;
6019
+ case RELO_DATA:
6020
+ insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
6021
+ insn[1].imm = insn[0].imm + relo->sym_off;
6022
+ insn[0].imm = obj->maps[relo->map_idx].fd;
6023
+ relo->processed = true;
6024
+ break;
6025
+ case RELO_EXTERN:
6026
+ ext = &obj->externs[relo->sym_off];
6027
+ if (ext->type == EXT_KCFG) {
6028
+ insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
6029
+ insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
6030
+ insn[1].imm = ext->kcfg.data_off;
6031
+ } else /* EXT_KSYM */ {
6032
+ if (ext->ksym.type_id) { /* typed ksyms */
6033
+ insn[0].src_reg = BPF_PSEUDO_BTF_ID;
6034
+ insn[0].imm = ext->ksym.vmlinux_btf_id;
6035
+ } else { /* typeless ksyms */
6036
+ insn[0].imm = (__u32)ext->ksym.addr;
6037
+ insn[1].imm = ext->ksym.addr >> 32;
6038
+ }
6039
+ }
6040
+ relo->processed = true;
6041
+ break;
6042
+ case RELO_CALL:
6043
+ /* will be handled as a follow up pass */
6044
+ break;
6045
+ default:
6046
+ pr_warn("prog '%s': relo #%d: bad relo type %d\n",
6047
+ prog->name, i, relo->type);
6048
+ return -EINVAL;
6049
+ }
6050
+ }
6051
+
6052
+ return 0;
6053
+}
6054
+
6055
+static int adjust_prog_btf_ext_info(const struct bpf_object *obj,
6056
+ const struct bpf_program *prog,
6057
+ const struct btf_ext_info *ext_info,
6058
+ void **prog_info, __u32 *prog_rec_cnt,
6059
+ __u32 *prog_rec_sz)
6060
+{
6061
+ void *copy_start = NULL, *copy_end = NULL;
6062
+ void *rec, *rec_end, *new_prog_info;
6063
+ const struct btf_ext_info_sec *sec;
6064
+ size_t old_sz, new_sz;
6065
+ const char *sec_name;
6066
+ int i, off_adj;
6067
+
6068
+ for_each_btf_ext_sec(ext_info, sec) {
6069
+ sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
6070
+ if (!sec_name)
6071
+ return -EINVAL;
6072
+ if (strcmp(sec_name, prog->sec_name) != 0)
6073
+ continue;
6074
+
6075
+ for_each_btf_ext_rec(ext_info, sec, i, rec) {
6076
+ __u32 insn_off = *(__u32 *)rec / BPF_INSN_SZ;
6077
+
6078
+ if (insn_off < prog->sec_insn_off)
6079
+ continue;
6080
+ if (insn_off >= prog->sec_insn_off + prog->sec_insn_cnt)
6081
+ break;
6082
+
6083
+ if (!copy_start)
6084
+ copy_start = rec;
6085
+ copy_end = rec + ext_info->rec_size;
6086
+ }
6087
+
6088
+ if (!copy_start)
6089
+ return -ENOENT;
6090
+
6091
+ /* append func/line info of a given (sub-)program to the main
6092
+ * program func/line info
6093
+ */
6094
+ old_sz = (size_t)(*prog_rec_cnt) * ext_info->rec_size;
6095
+ new_sz = old_sz + (copy_end - copy_start);
6096
+ new_prog_info = realloc(*prog_info, new_sz);
6097
+ if (!new_prog_info)
6098
+ return -ENOMEM;
6099
+ *prog_info = new_prog_info;
6100
+ *prog_rec_cnt = new_sz / ext_info->rec_size;
6101
+ memcpy(new_prog_info + old_sz, copy_start, copy_end - copy_start);
6102
+
6103
+ /* Kernel instruction offsets are in units of 8-byte
6104
+ * instructions, while .BTF.ext instruction offsets generated
6105
+ * by Clang are in units of bytes. So convert Clang offsets
6106
+ * into kernel offsets and adjust offset according to program
6107
+ * relocated position.
6108
+ */
6109
+ off_adj = prog->sub_insn_off - prog->sec_insn_off;
6110
+ rec = new_prog_info + old_sz;
6111
+ rec_end = new_prog_info + new_sz;
6112
+ for (; rec < rec_end; rec += ext_info->rec_size) {
6113
+ __u32 *insn_off = rec;
6114
+
6115
+ *insn_off = *insn_off / BPF_INSN_SZ + off_adj;
6116
+ }
6117
+ *prog_rec_sz = ext_info->rec_size;
6118
+ return 0;
6119
+ }
6120
+
6121
+ return -ENOENT;
6122
+}
6123
+
6124
+static int
6125
+reloc_prog_func_and_line_info(const struct bpf_object *obj,
6126
+ struct bpf_program *main_prog,
6127
+ const struct bpf_program *prog)
6128
+{
6129
+ int err;
6130
+
6131
+ /* no .BTF.ext relocation if .BTF.ext is missing or kernel doesn't
6132
+ * supprot func/line info
6133
+ */
6134
+ if (!obj->btf_ext || !kernel_supports(FEAT_BTF_FUNC))
6135
+ return 0;
6136
+
6137
+ /* only attempt func info relocation if main program's func_info
6138
+ * relocation was successful
6139
+ */
6140
+ if (main_prog != prog && !main_prog->func_info)
6141
+ goto line_info;
6142
+
6143
+ err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->func_info,
6144
+ &main_prog->func_info,
6145
+ &main_prog->func_info_cnt,
6146
+ &main_prog->func_info_rec_size);
6147
+ if (err) {
6148
+ if (err != -ENOENT) {
6149
+ pr_warn("prog '%s': error relocating .BTF.ext function info: %d\n",
6150
+ prog->name, err);
6151
+ return err;
6152
+ }
6153
+ if (main_prog->func_info) {
6154
+ /*
6155
+ * Some info has already been found but has problem
6156
+ * in the last btf_ext reloc. Must have to error out.
6157
+ */
6158
+ pr_warn("prog '%s': missing .BTF.ext function info.\n", prog->name);
6159
+ return err;
6160
+ }
6161
+ /* Have problem loading the very first info. Ignore the rest. */
6162
+ pr_warn("prog '%s': missing .BTF.ext function info for the main program, skipping all of .BTF.ext func info.\n",
6163
+ prog->name);
6164
+ }
6165
+
6166
+line_info:
6167
+ /* don't relocate line info if main program's relocation failed */
6168
+ if (main_prog != prog && !main_prog->line_info)
6169
+ return 0;
6170
+
6171
+ err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->line_info,
6172
+ &main_prog->line_info,
6173
+ &main_prog->line_info_cnt,
6174
+ &main_prog->line_info_rec_size);
6175
+ if (err) {
6176
+ if (err != -ENOENT) {
6177
+ pr_warn("prog '%s': error relocating .BTF.ext line info: %d\n",
6178
+ prog->name, err);
6179
+ return err;
6180
+ }
6181
+ if (main_prog->line_info) {
6182
+ /*
6183
+ * Some info has already been found but has problem
6184
+ * in the last btf_ext reloc. Must have to error out.
6185
+ */
6186
+ pr_warn("prog '%s': missing .BTF.ext line info.\n", prog->name);
6187
+ return err;
6188
+ }
6189
+ /* Have problem loading the very first info. Ignore the rest. */
6190
+ pr_warn("prog '%s': missing .BTF.ext line info for the main program, skipping all of .BTF.ext line info.\n",
6191
+ prog->name);
6192
+ }
6193
+ return 0;
6194
+}
6195
+
6196
+static int cmp_relo_by_insn_idx(const void *key, const void *elem)
6197
+{
6198
+ size_t insn_idx = *(const size_t *)key;
6199
+ const struct reloc_desc *relo = elem;
6200
+
6201
+ if (insn_idx == relo->insn_idx)
6202
+ return 0;
6203
+ return insn_idx < relo->insn_idx ? -1 : 1;
6204
+}
6205
+
6206
+static struct reloc_desc *find_prog_insn_relo(const struct bpf_program *prog, size_t insn_idx)
6207
+{
6208
+ return bsearch(&insn_idx, prog->reloc_desc, prog->nr_reloc,
6209
+ sizeof(*prog->reloc_desc), cmp_relo_by_insn_idx);
6210
+}
6211
+
6212
+static int
6213
+bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
6214
+ struct bpf_program *prog)
6215
+{
6216
+ size_t sub_insn_idx, insn_idx, new_cnt;
6217
+ struct bpf_program *subprog;
6218
+ struct bpf_insn *insns, *insn;
6219
+ struct reloc_desc *relo;
6220
+ int err;
6221
+
6222
+ err = reloc_prog_func_and_line_info(obj, main_prog, prog);
6223
+ if (err)
6224
+ return err;
6225
+
6226
+ for (insn_idx = 0; insn_idx < prog->sec_insn_cnt; insn_idx++) {
6227
+ insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
6228
+ if (!insn_is_subprog_call(insn))
6229
+ continue;
6230
+
6231
+ relo = find_prog_insn_relo(prog, insn_idx);
6232
+ if (relo && relo->type != RELO_CALL) {
6233
+ pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n",
6234
+ prog->name, insn_idx, relo->type);
6235
+ return -LIBBPF_ERRNO__RELOC;
6236
+ }
6237
+ if (relo) {
6238
+ /* sub-program instruction index is a combination of
6239
+ * an offset of a symbol pointed to by relocation and
6240
+ * call instruction's imm field; for global functions,
6241
+ * call always has imm = -1, but for static functions
6242
+ * relocation is against STT_SECTION and insn->imm
6243
+ * points to a start of a static function
6244
+ */
6245
+ sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1;
6246
+ } else {
6247
+ /* if subprogram call is to a static function within
6248
+ * the same ELF section, there won't be any relocation
6249
+ * emitted, but it also means there is no additional
6250
+ * offset necessary, insns->imm is relative to
6251
+ * instruction's original position within the section
6252
+ */
6253
+ sub_insn_idx = prog->sec_insn_off + insn_idx + insn->imm + 1;
6254
+ }
6255
+
6256
+ /* we enforce that sub-programs should be in .text section */
6257
+ subprog = find_prog_by_sec_insn(obj, obj->efile.text_shndx, sub_insn_idx);
6258
+ if (!subprog) {
6259
+ pr_warn("prog '%s': no .text section found yet sub-program call exists\n",
6260
+ prog->name);
6261
+ return -LIBBPF_ERRNO__RELOC;
6262
+ }
6263
+
6264
+ /* if it's the first call instruction calling into this
6265
+ * subprogram (meaning this subprog hasn't been processed
6266
+ * yet) within the context of current main program:
6267
+ * - append it at the end of main program's instructions blog;
6268
+ * - process is recursively, while current program is put on hold;
6269
+ * - if that subprogram calls some other not yet processes
6270
+ * subprogram, same thing will happen recursively until
6271
+ * there are no more unprocesses subprograms left to append
6272
+ * and relocate.
6273
+ */
6274
+ if (subprog->sub_insn_off == 0) {
6275
+ subprog->sub_insn_off = main_prog->insns_cnt;
6276
+
6277
+ new_cnt = main_prog->insns_cnt + subprog->insns_cnt;
6278
+ insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns));
6279
+ if (!insns) {
6280
+ pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name);
6281
+ return -ENOMEM;
6282
+ }
6283
+ main_prog->insns = insns;
6284
+ main_prog->insns_cnt = new_cnt;
6285
+
6286
+ memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns,
6287
+ subprog->insns_cnt * sizeof(*insns));
6288
+
6289
+ pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n",
6290
+ main_prog->name, subprog->insns_cnt, subprog->name);
6291
+
6292
+ err = bpf_object__reloc_code(obj, main_prog, subprog);
12486293 if (err)
12496294 return err;
12506295 }
6296
+
6297
+ /* main_prog->insns memory could have been re-allocated, so
6298
+ * calculate pointer again
6299
+ */
6300
+ insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
6301
+ /* calculate correct instruction position within current main
6302
+ * prog; each main prog can have a different set of
6303
+ * subprograms appended (potentially in different order as
6304
+ * well), so position of any subprog can be different for
6305
+ * different main programs */
6306
+ insn->imm = subprog->sub_insn_off - (prog->sub_insn_off + insn_idx) - 1;
6307
+
6308
+ if (relo)
6309
+ relo->processed = true;
6310
+
6311
+ pr_debug("prog '%s': insn #%zu relocated, imm %d points to subprog '%s' (now at %zu offset)\n",
6312
+ prog->name, insn_idx, insn->imm, subprog->name, subprog->sub_insn_off);
12516313 }
12526314
1253
- zfree(&prog->reloc_desc);
1254
- prog->nr_reloc = 0;
12556315 return 0;
12566316 }
12576317
6318
+/*
6319
+ * Relocate sub-program calls.
6320
+ *
6321
+ * Algorithm operates as follows. Each entry-point BPF program (referred to as
6322
+ * main prog) is processed separately. For each subprog (non-entry functions,
6323
+ * that can be called from either entry progs or other subprogs) gets their
6324
+ * sub_insn_off reset to zero. This serves as indicator that this subprogram
6325
+ * hasn't been yet appended and relocated within current main prog. Once its
6326
+ * relocated, sub_insn_off will point at the position within current main prog
6327
+ * where given subprog was appended. This will further be used to relocate all
6328
+ * the call instructions jumping into this subprog.
6329
+ *
6330
+ * We start with main program and process all call instructions. If the call
6331
+ * is into a subprog that hasn't been processed (i.e., subprog->sub_insn_off
6332
+ * is zero), subprog instructions are appended at the end of main program's
6333
+ * instruction array. Then main program is "put on hold" while we recursively
6334
+ * process newly appended subprogram. If that subprogram calls into another
6335
+ * subprogram that hasn't been appended, new subprogram is appended again to
6336
+ * the *main* prog's instructions (subprog's instructions are always left
6337
+ * untouched, as they need to be in unmodified state for subsequent main progs
6338
+ * and subprog instructions are always sent only as part of a main prog) and
6339
+ * the process continues recursively. Once all the subprogs called from a main
6340
+ * prog or any of its subprogs are appended (and relocated), all their
6341
+ * positions within finalized instructions array are known, so it's easy to
6342
+ * rewrite call instructions with correct relative offsets, corresponding to
6343
+ * desired target subprog.
6344
+ *
6345
+ * Its important to realize that some subprogs might not be called from some
6346
+ * main prog and any of its called/used subprogs. Those will keep their
6347
+ * subprog->sub_insn_off as zero at all times and won't be appended to current
6348
+ * main prog and won't be relocated within the context of current main prog.
6349
+ * They might still be used from other main progs later.
6350
+ *
6351
+ * Visually this process can be shown as below. Suppose we have two main
6352
+ * programs mainA and mainB and BPF object contains three subprogs: subA,
6353
+ * subB, and subC. mainA calls only subA, mainB calls only subC, but subA and
6354
+ * subC both call subB:
6355
+ *
6356
+ * +--------+ +-------+
6357
+ * | v v |
6358
+ * +--+---+ +--+-+-+ +---+--+
6359
+ * | subA | | subB | | subC |
6360
+ * +--+---+ +------+ +---+--+
6361
+ * ^ ^
6362
+ * | |
6363
+ * +---+-------+ +------+----+
6364
+ * | mainA | | mainB |
6365
+ * +-----------+ +-----------+
6366
+ *
6367
+ * We'll start relocating mainA, will find subA, append it and start
6368
+ * processing sub A recursively:
6369
+ *
6370
+ * +-----------+------+
6371
+ * | mainA | subA |
6372
+ * +-----------+------+
6373
+ *
6374
+ * At this point we notice that subB is used from subA, so we append it and
6375
+ * relocate (there are no further subcalls from subB):
6376
+ *
6377
+ * +-----------+------+------+
6378
+ * | mainA | subA | subB |
6379
+ * +-----------+------+------+
6380
+ *
6381
+ * At this point, we relocate subA calls, then go one level up and finish with
6382
+ * relocatin mainA calls. mainA is done.
6383
+ *
6384
+ * For mainB process is similar but results in different order. We start with
6385
+ * mainB and skip subA and subB, as mainB never calls them (at least
6386
+ * directly), but we see subC is needed, so we append and start processing it:
6387
+ *
6388
+ * +-----------+------+
6389
+ * | mainB | subC |
6390
+ * +-----------+------+
6391
+ * Now we see subC needs subB, so we go back to it, append and relocate it:
6392
+ *
6393
+ * +-----------+------+------+
6394
+ * | mainB | subC | subB |
6395
+ * +-----------+------+------+
6396
+ *
6397
+ * At this point we unwind recursion, relocate calls in subC, then in mainB.
6398
+ */
6399
+static int
6400
+bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog)
6401
+{
6402
+ struct bpf_program *subprog;
6403
+ int i, j, err;
6404
+
6405
+ /* mark all subprogs as not relocated (yet) within the context of
6406
+ * current main program
6407
+ */
6408
+ for (i = 0; i < obj->nr_programs; i++) {
6409
+ subprog = &obj->programs[i];
6410
+ if (!prog_is_subprog(obj, subprog))
6411
+ continue;
6412
+
6413
+ subprog->sub_insn_off = 0;
6414
+ for (j = 0; j < subprog->nr_reloc; j++)
6415
+ if (subprog->reloc_desc[j].type == RELO_CALL)
6416
+ subprog->reloc_desc[j].processed = false;
6417
+ }
6418
+
6419
+ err = bpf_object__reloc_code(obj, prog, prog);
6420
+ if (err)
6421
+ return err;
6422
+
6423
+
6424
+ return 0;
6425
+}
12586426
12596427 static int
1260
-bpf_object__relocate(struct bpf_object *obj)
6428
+bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
12616429 {
12626430 struct bpf_program *prog;
12636431 size_t i;
12646432 int err;
12656433
1266
- for (i = 0; i < obj->nr_programs; i++) {
1267
- prog = &obj->programs[i];
1268
-
1269
- err = bpf_program__relocate(prog, obj);
6434
+ if (obj->btf_ext) {
6435
+ err = bpf_object__relocate_core(obj, targ_btf_path);
12706436 if (err) {
1271
- pr_warning("failed to relocate '%s'\n",
1272
- prog->section_name);
6437
+ pr_warn("failed to perform CO-RE relocations: %d\n",
6438
+ err);
12736439 return err;
12746440 }
6441
+ }
6442
+ /* relocate data references first for all programs and sub-programs,
6443
+ * as they don't change relative to code locations, so subsequent
6444
+ * subprogram processing won't need to re-calculate any of them
6445
+ */
6446
+ for (i = 0; i < obj->nr_programs; i++) {
6447
+ prog = &obj->programs[i];
6448
+ err = bpf_object__relocate_data(obj, prog);
6449
+ if (err) {
6450
+ pr_warn("prog '%s': failed to relocate data references: %d\n",
6451
+ prog->name, err);
6452
+ return err;
6453
+ }
6454
+ }
6455
+ /* now relocate subprogram calls and append used subprograms to main
6456
+ * programs; each copy of subprogram code needs to be relocated
6457
+ * differently for each main program, because its code location might
6458
+ * have changed
6459
+ */
6460
+ for (i = 0; i < obj->nr_programs; i++) {
6461
+ prog = &obj->programs[i];
6462
+ /* sub-program's sub-calls are relocated within the context of
6463
+ * its main program only
6464
+ */
6465
+ if (prog_is_subprog(obj, prog))
6466
+ continue;
6467
+
6468
+ err = bpf_object__relocate_calls(obj, prog);
6469
+ if (err) {
6470
+ pr_warn("prog '%s': failed to relocate calls: %d\n",
6471
+ prog->name, err);
6472
+ return err;
6473
+ }
6474
+ }
6475
+ /* free up relocation descriptors */
6476
+ for (i = 0; i < obj->nr_programs; i++) {
6477
+ prog = &obj->programs[i];
6478
+ zfree(&prog->reloc_desc);
6479
+ prog->nr_reloc = 0;
12756480 }
12766481 return 0;
12776482 }
12786483
1279
-static int bpf_object__collect_reloc(struct bpf_object *obj)
6484
+static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
6485
+ GElf_Shdr *shdr, Elf_Data *data);
6486
+
6487
+static int bpf_object__collect_map_relos(struct bpf_object *obj,
6488
+ GElf_Shdr *shdr, Elf_Data *data)
12806489 {
1281
- int i, err;
6490
+ const int bpf_ptr_sz = 8, host_ptr_sz = sizeof(void *);
6491
+ int i, j, nrels, new_sz;
6492
+ const struct btf_var_secinfo *vi = NULL;
6493
+ const struct btf_type *sec, *var, *def;
6494
+ struct bpf_map *map = NULL, *targ_map;
6495
+ const struct btf_member *member;
6496
+ const char *name, *mname;
6497
+ Elf_Data *symbols;
6498
+ unsigned int moff;
6499
+ GElf_Sym sym;
6500
+ GElf_Rel rel;
6501
+ void *tmp;
12826502
1283
- if (!obj_elf_valid(obj)) {
1284
- pr_warning("Internal error: elf object is closed\n");
1285
- return -LIBBPF_ERRNO__INTERNAL;
1286
- }
6503
+ if (!obj->efile.btf_maps_sec_btf_id || !obj->btf)
6504
+ return -EINVAL;
6505
+ sec = btf__type_by_id(obj->btf, obj->efile.btf_maps_sec_btf_id);
6506
+ if (!sec)
6507
+ return -EINVAL;
12876508
1288
- for (i = 0; i < obj->efile.nr_reloc; i++) {
1289
- GElf_Shdr *shdr = &obj->efile.reloc[i].shdr;
1290
- Elf_Data *data = obj->efile.reloc[i].data;
1291
- int idx = shdr->sh_info;
1292
- struct bpf_program *prog;
1293
-
1294
- if (shdr->sh_type != SHT_REL) {
1295
- pr_warning("internal error at %d\n", __LINE__);
1296
- return -LIBBPF_ERRNO__INTERNAL;
6509
+ symbols = obj->efile.symbols;
6510
+ nrels = shdr->sh_size / shdr->sh_entsize;
6511
+ for (i = 0; i < nrels; i++) {
6512
+ if (!gelf_getrel(data, i, &rel)) {
6513
+ pr_warn(".maps relo #%d: failed to get ELF relo\n", i);
6514
+ return -LIBBPF_ERRNO__FORMAT;
12976515 }
1298
-
1299
- prog = bpf_object__find_prog_by_idx(obj, idx);
1300
- if (!prog) {
1301
- pr_warning("relocation failed: no section(%d)\n", idx);
6516
+ if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) {
6517
+ pr_warn(".maps relo #%d: symbol %zx not found\n",
6518
+ i, (size_t)GELF_R_SYM(rel.r_info));
6519
+ return -LIBBPF_ERRNO__FORMAT;
6520
+ }
6521
+ name = elf_sym_str(obj, sym.st_name) ?: "<?>";
6522
+ if (sym.st_shndx != obj->efile.btf_maps_shndx) {
6523
+ pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n",
6524
+ i, name);
13026525 return -LIBBPF_ERRNO__RELOC;
13036526 }
13046527
1305
- err = bpf_program__collect_reloc(prog,
1306
- shdr, data,
1307
- obj);
6528
+ pr_debug(".maps relo #%d: for %zd value %zd rel.r_offset %zu name %d ('%s')\n",
6529
+ i, (ssize_t)(rel.r_info >> 32), (size_t)sym.st_value,
6530
+ (size_t)rel.r_offset, sym.st_name, name);
6531
+
6532
+ for (j = 0; j < obj->nr_maps; j++) {
6533
+ map = &obj->maps[j];
6534
+ if (map->sec_idx != obj->efile.btf_maps_shndx)
6535
+ continue;
6536
+
6537
+ vi = btf_var_secinfos(sec) + map->btf_var_idx;
6538
+ if (vi->offset <= rel.r_offset &&
6539
+ rel.r_offset + bpf_ptr_sz <= vi->offset + vi->size)
6540
+ break;
6541
+ }
6542
+ if (j == obj->nr_maps) {
6543
+ pr_warn(".maps relo #%d: cannot find map '%s' at rel.r_offset %zu\n",
6544
+ i, name, (size_t)rel.r_offset);
6545
+ return -EINVAL;
6546
+ }
6547
+
6548
+ if (!bpf_map_type__is_map_in_map(map->def.type))
6549
+ return -EINVAL;
6550
+ if (map->def.type == BPF_MAP_TYPE_HASH_OF_MAPS &&
6551
+ map->def.key_size != sizeof(int)) {
6552
+ pr_warn(".maps relo #%d: hash-of-maps '%s' should have key size %zu.\n",
6553
+ i, map->name, sizeof(int));
6554
+ return -EINVAL;
6555
+ }
6556
+
6557
+ targ_map = bpf_object__find_map_by_name(obj, name);
6558
+ if (!targ_map)
6559
+ return -ESRCH;
6560
+
6561
+ var = btf__type_by_id(obj->btf, vi->type);
6562
+ def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
6563
+ if (btf_vlen(def) == 0)
6564
+ return -EINVAL;
6565
+ member = btf_members(def) + btf_vlen(def) - 1;
6566
+ mname = btf__name_by_offset(obj->btf, member->name_off);
6567
+ if (strcmp(mname, "values"))
6568
+ return -EINVAL;
6569
+
6570
+ moff = btf_member_bit_offset(def, btf_vlen(def) - 1) / 8;
6571
+ if (rel.r_offset - vi->offset < moff)
6572
+ return -EINVAL;
6573
+
6574
+ moff = rel.r_offset - vi->offset - moff;
6575
+ /* here we use BPF pointer size, which is always 64 bit, as we
6576
+ * are parsing ELF that was built for BPF target
6577
+ */
6578
+ if (moff % bpf_ptr_sz)
6579
+ return -EINVAL;
6580
+ moff /= bpf_ptr_sz;
6581
+ if (moff >= map->init_slots_sz) {
6582
+ new_sz = moff + 1;
6583
+ tmp = libbpf_reallocarray(map->init_slots, new_sz, host_ptr_sz);
6584
+ if (!tmp)
6585
+ return -ENOMEM;
6586
+ map->init_slots = tmp;
6587
+ memset(map->init_slots + map->init_slots_sz, 0,
6588
+ (new_sz - map->init_slots_sz) * host_ptr_sz);
6589
+ map->init_slots_sz = new_sz;
6590
+ }
6591
+ map->init_slots[moff] = targ_map;
6592
+
6593
+ pr_debug(".maps relo #%d: map '%s' slot [%d] points to map '%s'\n",
6594
+ i, map->name, moff, name);
6595
+ }
6596
+
6597
+ return 0;
6598
+}
6599
+
6600
+static int cmp_relocs(const void *_a, const void *_b)
6601
+{
6602
+ const struct reloc_desc *a = _a;
6603
+ const struct reloc_desc *b = _b;
6604
+
6605
+ if (a->insn_idx != b->insn_idx)
6606
+ return a->insn_idx < b->insn_idx ? -1 : 1;
6607
+
6608
+ /* no two relocations should have the same insn_idx, but ... */
6609
+ if (a->type != b->type)
6610
+ return a->type < b->type ? -1 : 1;
6611
+
6612
+ return 0;
6613
+}
6614
+
6615
+static int bpf_object__collect_relos(struct bpf_object *obj)
6616
+{
6617
+ int i, err;
6618
+
6619
+ for (i = 0; i < obj->efile.nr_reloc_sects; i++) {
6620
+ GElf_Shdr *shdr = &obj->efile.reloc_sects[i].shdr;
6621
+ Elf_Data *data = obj->efile.reloc_sects[i].data;
6622
+ int idx = shdr->sh_info;
6623
+
6624
+ if (shdr->sh_type != SHT_REL) {
6625
+ pr_warn("internal error at %d\n", __LINE__);
6626
+ return -LIBBPF_ERRNO__INTERNAL;
6627
+ }
6628
+
6629
+ if (idx == obj->efile.st_ops_shndx)
6630
+ err = bpf_object__collect_st_ops_relos(obj, shdr, data);
6631
+ else if (idx == obj->efile.btf_maps_shndx)
6632
+ err = bpf_object__collect_map_relos(obj, shdr, data);
6633
+ else
6634
+ err = bpf_object__collect_prog_relos(obj, shdr, data);
13086635 if (err)
13096636 return err;
6637
+ }
6638
+
6639
+ for (i = 0; i < obj->nr_programs; i++) {
6640
+ struct bpf_program *p = &obj->programs[i];
6641
+
6642
+ if (!p->nr_reloc)
6643
+ continue;
6644
+
6645
+ qsort(p->reloc_desc, p->nr_reloc, sizeof(*p->reloc_desc), cmp_relocs);
6646
+ }
6647
+ return 0;
6648
+}
6649
+
6650
+static bool insn_is_helper_call(struct bpf_insn *insn, enum bpf_func_id *func_id)
6651
+{
6652
+ if (BPF_CLASS(insn->code) == BPF_JMP &&
6653
+ BPF_OP(insn->code) == BPF_CALL &&
6654
+ BPF_SRC(insn->code) == BPF_K &&
6655
+ insn->src_reg == 0 &&
6656
+ insn->dst_reg == 0) {
6657
+ *func_id = insn->imm;
6658
+ return true;
6659
+ }
6660
+ return false;
6661
+}
6662
+
6663
+static int bpf_object__sanitize_prog(struct bpf_object* obj, struct bpf_program *prog)
6664
+{
6665
+ struct bpf_insn *insn = prog->insns;
6666
+ enum bpf_func_id func_id;
6667
+ int i;
6668
+
6669
+ for (i = 0; i < prog->insns_cnt; i++, insn++) {
6670
+ if (!insn_is_helper_call(insn, &func_id))
6671
+ continue;
6672
+
6673
+ /* on kernels that don't yet support
6674
+ * bpf_probe_read_{kernel,user}[_str] helpers, fall back
6675
+ * to bpf_probe_read() which works well for old kernels
6676
+ */
6677
+ switch (func_id) {
6678
+ case BPF_FUNC_probe_read_kernel:
6679
+ case BPF_FUNC_probe_read_user:
6680
+ if (!kernel_supports(FEAT_PROBE_READ_KERN))
6681
+ insn->imm = BPF_FUNC_probe_read;
6682
+ break;
6683
+ case BPF_FUNC_probe_read_kernel_str:
6684
+ case BPF_FUNC_probe_read_user_str:
6685
+ if (!kernel_supports(FEAT_PROBE_READ_KERN))
6686
+ insn->imm = BPF_FUNC_probe_read_str;
6687
+ break;
6688
+ default:
6689
+ break;
6690
+ }
13106691 }
13116692 return 0;
13126693 }
13136694
13146695 static int
1315
-load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type,
1316
- const char *name, struct bpf_insn *insns, int insns_cnt,
1317
- char *license, u32 kern_version, int *pfd, int prog_ifindex)
6696
+load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
6697
+ char *license, __u32 kern_version, int *pfd)
13186698 {
13196699 struct bpf_load_program_attr load_attr;
13206700 char *cp, errmsg[STRERR_BUFSIZE];
1321
- char *log_buf;
1322
- int ret;
6701
+ size_t log_buf_size = 0;
6702
+ char *log_buf = NULL;
6703
+ int btf_fd, ret;
6704
+
6705
+ if (!insns || !insns_cnt)
6706
+ return -EINVAL;
13236707
13246708 memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
1325
- load_attr.prog_type = type;
1326
- load_attr.expected_attach_type = expected_attach_type;
1327
- load_attr.name = name;
6709
+ load_attr.prog_type = prog->type;
6710
+ /* old kernels might not support specifying expected_attach_type */
6711
+ if (!kernel_supports(FEAT_EXP_ATTACH_TYPE) && prog->sec_def &&
6712
+ prog->sec_def->is_exp_attach_type_optional)
6713
+ load_attr.expected_attach_type = 0;
6714
+ else
6715
+ load_attr.expected_attach_type = prog->expected_attach_type;
6716
+ if (kernel_supports(FEAT_PROG_NAME))
6717
+ load_attr.name = prog->name;
13286718 load_attr.insns = insns;
13296719 load_attr.insns_cnt = insns_cnt;
13306720 load_attr.license = license;
1331
- load_attr.kern_version = kern_version;
1332
- load_attr.prog_ifindex = prog_ifindex;
6721
+ if (prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
6722
+ prog->type == BPF_PROG_TYPE_LSM) {
6723
+ load_attr.attach_btf_id = prog->attach_btf_id;
6724
+ } else if (prog->type == BPF_PROG_TYPE_TRACING ||
6725
+ prog->type == BPF_PROG_TYPE_EXT) {
6726
+ load_attr.attach_prog_fd = prog->attach_prog_fd;
6727
+ load_attr.attach_btf_id = prog->attach_btf_id;
6728
+ } else {
6729
+ load_attr.kern_version = kern_version;
6730
+ load_attr.prog_ifindex = prog->prog_ifindex;
6731
+ }
6732
+ /* specify func_info/line_info only if kernel supports them */
6733
+ btf_fd = bpf_object__btf_fd(prog->obj);
6734
+ if (btf_fd >= 0 && kernel_supports(FEAT_BTF_FUNC)) {
6735
+ load_attr.prog_btf_fd = btf_fd;
6736
+ load_attr.func_info = prog->func_info;
6737
+ load_attr.func_info_rec_size = prog->func_info_rec_size;
6738
+ load_attr.func_info_cnt = prog->func_info_cnt;
6739
+ load_attr.line_info = prog->line_info;
6740
+ load_attr.line_info_rec_size = prog->line_info_rec_size;
6741
+ load_attr.line_info_cnt = prog->line_info_cnt;
6742
+ }
6743
+ load_attr.log_level = prog->log_level;
6744
+ load_attr.prog_flags = prog->prog_flags;
13336745
1334
- if (!load_attr.insns || !load_attr.insns_cnt)
1335
- return -EINVAL;
6746
+retry_load:
6747
+ if (log_buf_size) {
6748
+ log_buf = malloc(log_buf_size);
6749
+ if (!log_buf)
6750
+ return -ENOMEM;
13366751
1337
- log_buf = malloc(BPF_LOG_BUF_SIZE);
1338
- if (!log_buf)
1339
- pr_warning("Alloc log buffer for bpf loader error, continue without log\n");
6752
+ *log_buf = 0;
6753
+ }
13406754
1341
- ret = bpf_load_program_xattr(&load_attr, log_buf, BPF_LOG_BUF_SIZE);
6755
+ ret = bpf_load_program_xattr(&load_attr, log_buf, log_buf_size);
13426756
13436757 if (ret >= 0) {
6758
+ if (log_buf && load_attr.log_level)
6759
+ pr_debug("verifier log:\n%s", log_buf);
6760
+
6761
+ if (prog->obj->rodata_map_idx >= 0 &&
6762
+ kernel_supports(FEAT_PROG_BIND_MAP)) {
6763
+ struct bpf_map *rodata_map =
6764
+ &prog->obj->maps[prog->obj->rodata_map_idx];
6765
+
6766
+ if (bpf_prog_bind_map(ret, bpf_map__fd(rodata_map), NULL)) {
6767
+ cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
6768
+ pr_warn("prog '%s': failed to bind .rodata map: %s\n",
6769
+ prog->name, cp);
6770
+ /* Don't fail hard if can't bind rodata. */
6771
+ }
6772
+ }
6773
+
13446774 *pfd = ret;
13456775 ret = 0;
13466776 goto out;
13476777 }
13486778
1349
- ret = -LIBBPF_ERRNO__LOAD;
1350
- cp = str_error(errno, errmsg, sizeof(errmsg));
1351
- pr_warning("load bpf program failed: %s\n", cp);
6779
+ if (!log_buf || errno == ENOSPC) {
6780
+ log_buf_size = max((size_t)BPF_LOG_BUF_SIZE,
6781
+ log_buf_size << 1);
6782
+
6783
+ free(log_buf);
6784
+ goto retry_load;
6785
+ }
6786
+ ret = errno ? -errno : -LIBBPF_ERRNO__LOAD;
6787
+ cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
6788
+ pr_warn("load bpf program failed: %s\n", cp);
6789
+ pr_perm_msg(ret);
13526790
13536791 if (log_buf && log_buf[0] != '\0') {
13546792 ret = -LIBBPF_ERRNO__VERIFY;
1355
- pr_warning("-- BEGIN DUMP LOG ---\n");
1356
- pr_warning("\n%s\n", log_buf);
1357
- pr_warning("-- END LOG --\n");
6793
+ pr_warn("-- BEGIN DUMP LOG ---\n");
6794
+ pr_warn("\n%s\n", log_buf);
6795
+ pr_warn("-- END LOG --\n");
13586796 } else if (load_attr.insns_cnt >= BPF_MAXINSNS) {
1359
- pr_warning("Program too large (%zu insns), at most %d insns\n",
1360
- load_attr.insns_cnt, BPF_MAXINSNS);
6797
+ pr_warn("Program too large (%zu insns), at most %d insns\n",
6798
+ load_attr.insns_cnt, BPF_MAXINSNS);
13616799 ret = -LIBBPF_ERRNO__PROG2BIG;
1362
- } else {
6800
+ } else if (load_attr.prog_type != BPF_PROG_TYPE_KPROBE) {
13636801 /* Wrong program type? */
1364
- if (load_attr.prog_type != BPF_PROG_TYPE_KPROBE) {
1365
- int fd;
6802
+ int fd;
13666803
1367
- load_attr.prog_type = BPF_PROG_TYPE_KPROBE;
1368
- load_attr.expected_attach_type = 0;
1369
- fd = bpf_load_program_xattr(&load_attr, NULL, 0);
1370
- if (fd >= 0) {
1371
- close(fd);
1372
- ret = -LIBBPF_ERRNO__PROGTYPE;
1373
- goto out;
1374
- }
6804
+ load_attr.prog_type = BPF_PROG_TYPE_KPROBE;
6805
+ load_attr.expected_attach_type = 0;
6806
+ fd = bpf_load_program_xattr(&load_attr, NULL, 0);
6807
+ if (fd >= 0) {
6808
+ close(fd);
6809
+ ret = -LIBBPF_ERRNO__PROGTYPE;
6810
+ goto out;
13756811 }
1376
-
1377
- if (log_buf)
1378
- ret = -LIBBPF_ERRNO__KVER;
13796812 }
13806813
13816814 out:
....@@ -1383,22 +6816,36 @@
13836816 return ret;
13846817 }
13856818
1386
-static int
1387
-bpf_program__load(struct bpf_program *prog,
1388
- char *license, u32 kern_version)
6819
+static int libbpf_find_attach_btf_id(struct bpf_program *prog);
6820
+
6821
+int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
13896822 {
1390
- int err = 0, fd, i;
6823
+ int err = 0, fd, i, btf_id;
6824
+
6825
+ if (prog->obj->loaded) {
6826
+ pr_warn("prog '%s': can't load after object was loaded\n", prog->name);
6827
+ return -EINVAL;
6828
+ }
6829
+
6830
+ if ((prog->type == BPF_PROG_TYPE_TRACING ||
6831
+ prog->type == BPF_PROG_TYPE_LSM ||
6832
+ prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) {
6833
+ btf_id = libbpf_find_attach_btf_id(prog);
6834
+ if (btf_id <= 0)
6835
+ return btf_id;
6836
+ prog->attach_btf_id = btf_id;
6837
+ }
13916838
13926839 if (prog->instances.nr < 0 || !prog->instances.fds) {
13936840 if (prog->preprocessor) {
1394
- pr_warning("Internal error: can't load program '%s'\n",
1395
- prog->section_name);
6841
+ pr_warn("Internal error: can't load program '%s'\n",
6842
+ prog->name);
13966843 return -LIBBPF_ERRNO__INTERNAL;
13976844 }
13986845
13996846 prog->instances.fds = malloc(sizeof(int));
14006847 if (!prog->instances.fds) {
1401
- pr_warning("Not enough memory for BPF fds\n");
6848
+ pr_warn("Not enough memory for BPF fds\n");
14026849 return -ENOMEM;
14036850 }
14046851 prog->instances.nr = 1;
....@@ -1407,13 +6854,11 @@
14076854
14086855 if (!prog->preprocessor) {
14096856 if (prog->instances.nr != 1) {
1410
- pr_warning("Program '%s' is inconsistent: nr(%d) != 1\n",
1411
- prog->section_name, prog->instances.nr);
6857
+ pr_warn("prog '%s': inconsistent nr(%d) != 1\n",
6858
+ prog->name, prog->instances.nr);
14126859 }
1413
- err = load_program(prog->type, prog->expected_attach_type,
1414
- prog->name, prog->insns, prog->insns_cnt,
1415
- license, kern_version, &fd,
1416
- prog->prog_ifindex);
6860
+ err = load_program(prog, prog->insns, prog->insns_cnt,
6861
+ license, kern_ver, &fd);
14176862 if (!err)
14186863 prog->instances.fds[0] = fd;
14196864 goto out;
....@@ -1423,33 +6868,29 @@
14236868 struct bpf_prog_prep_result result;
14246869 bpf_program_prep_t preprocessor = prog->preprocessor;
14256870
1426
- bzero(&result, sizeof(result));
6871
+ memset(&result, 0, sizeof(result));
14276872 err = preprocessor(prog, i, prog->insns,
14286873 prog->insns_cnt, &result);
14296874 if (err) {
1430
- pr_warning("Preprocessing the %dth instance of program '%s' failed\n",
1431
- i, prog->section_name);
6875
+ pr_warn("Preprocessing the %dth instance of program '%s' failed\n",
6876
+ i, prog->name);
14326877 goto out;
14336878 }
14346879
14356880 if (!result.new_insn_ptr || !result.new_insn_cnt) {
14366881 pr_debug("Skip loading the %dth instance of program '%s'\n",
1437
- i, prog->section_name);
6882
+ i, prog->name);
14386883 prog->instances.fds[i] = -1;
14396884 if (result.pfd)
14406885 *result.pfd = -1;
14416886 continue;
14426887 }
14436888
1444
- err = load_program(prog->type, prog->expected_attach_type,
1445
- prog->name, result.new_insn_ptr,
1446
- result.new_insn_cnt,
1447
- license, kern_version, &fd,
1448
- prog->prog_ifindex);
1449
-
6889
+ err = load_program(prog, result.new_insn_ptr,
6890
+ result.new_insn_cnt, license, kern_ver, &fd);
14506891 if (err) {
1451
- pr_warning("Loading the %dth instance of program '%s' failed\n",
1452
- i, prog->section_name);
6892
+ pr_warn("Loading the %dth instance of program '%s' failed\n",
6893
+ i, prog->name);
14536894 goto out;
14546895 }
14556896
....@@ -1459,117 +6900,140 @@
14596900 }
14606901 out:
14616902 if (err)
1462
- pr_warning("failed to load program '%s'\n",
1463
- prog->section_name);
6903
+ pr_warn("failed to load program '%s'\n", prog->name);
14646904 zfree(&prog->insns);
14656905 prog->insns_cnt = 0;
14666906 return err;
14676907 }
14686908
1469
-static bool bpf_program__is_function_storage(struct bpf_program *prog,
1470
- struct bpf_object *obj)
1471
-{
1472
- return prog->idx == obj->efile.text_shndx && obj->has_pseudo_calls;
1473
-}
1474
-
14756909 static int
1476
-bpf_object__load_progs(struct bpf_object *obj)
6910
+bpf_object__load_progs(struct bpf_object *obj, int log_level)
14776911 {
6912
+ struct bpf_program *prog;
14786913 size_t i;
14796914 int err;
14806915
14816916 for (i = 0; i < obj->nr_programs; i++) {
1482
- if (bpf_program__is_function_storage(&obj->programs[i], obj))
6917
+ prog = &obj->programs[i];
6918
+ err = bpf_object__sanitize_prog(obj, prog);
6919
+ if (err)
6920
+ return err;
6921
+ }
6922
+
6923
+ for (i = 0; i < obj->nr_programs; i++) {
6924
+ prog = &obj->programs[i];
6925
+ if (prog_is_subprog(obj, prog))
14836926 continue;
1484
- err = bpf_program__load(&obj->programs[i],
1485
- obj->license,
1486
- obj->kern_version);
6927
+ if (!prog->load) {
6928
+ pr_debug("prog '%s': skipped loading\n", prog->name);
6929
+ continue;
6930
+ }
6931
+ prog->log_level |= log_level;
6932
+ err = bpf_program__load(prog, obj->license, obj->kern_version);
14876933 if (err)
14886934 return err;
14896935 }
14906936 return 0;
14916937 }
14926938
1493
-static bool bpf_prog_type__needs_kver(enum bpf_prog_type type)
1494
-{
1495
- switch (type) {
1496
- case BPF_PROG_TYPE_SOCKET_FILTER:
1497
- case BPF_PROG_TYPE_SCHED_CLS:
1498
- case BPF_PROG_TYPE_SCHED_ACT:
1499
- case BPF_PROG_TYPE_XDP:
1500
- case BPF_PROG_TYPE_CGROUP_SKB:
1501
- case BPF_PROG_TYPE_CGROUP_SOCK:
1502
- case BPF_PROG_TYPE_LWT_IN:
1503
- case BPF_PROG_TYPE_LWT_OUT:
1504
- case BPF_PROG_TYPE_LWT_XMIT:
1505
- case BPF_PROG_TYPE_LWT_SEG6LOCAL:
1506
- case BPF_PROG_TYPE_SOCK_OPS:
1507
- case BPF_PROG_TYPE_SK_SKB:
1508
- case BPF_PROG_TYPE_CGROUP_DEVICE:
1509
- case BPF_PROG_TYPE_SK_MSG:
1510
- case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
1511
- case BPF_PROG_TYPE_LIRC_MODE2:
1512
- case BPF_PROG_TYPE_SK_REUSEPORT:
1513
- return false;
1514
- case BPF_PROG_TYPE_UNSPEC:
1515
- case BPF_PROG_TYPE_KPROBE:
1516
- case BPF_PROG_TYPE_TRACEPOINT:
1517
- case BPF_PROG_TYPE_PERF_EVENT:
1518
- case BPF_PROG_TYPE_RAW_TRACEPOINT:
1519
- default:
1520
- return true;
1521
- }
1522
-}
1523
-
1524
-static int bpf_object__validate(struct bpf_object *obj, bool needs_kver)
1525
-{
1526
- if (needs_kver && obj->kern_version == 0) {
1527
- pr_warning("%s doesn't provide kernel version\n",
1528
- obj->path);
1529
- return -LIBBPF_ERRNO__KVERSION;
1530
- }
1531
- return 0;
1532
-}
6939
+static const struct bpf_sec_def *find_sec_def(const char *sec_name);
15336940
15346941 static struct bpf_object *
1535
-__bpf_object__open(const char *path, void *obj_buf, size_t obj_buf_sz,
1536
- bool needs_kver)
6942
+__bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
6943
+ const struct bpf_object_open_opts *opts)
15376944 {
6945
+ const char *obj_name, *kconfig;
6946
+ struct bpf_program *prog;
15386947 struct bpf_object *obj;
6948
+ char tmp_name[64];
15396949 int err;
15406950
15416951 if (elf_version(EV_CURRENT) == EV_NONE) {
1542
- pr_warning("failed to init libelf for %s\n", path);
6952
+ pr_warn("failed to init libelf for %s\n",
6953
+ path ? : "(mem buf)");
15436954 return ERR_PTR(-LIBBPF_ERRNO__LIBELF);
15446955 }
15456956
1546
- obj = bpf_object__new(path, obj_buf, obj_buf_sz);
6957
+ if (!OPTS_VALID(opts, bpf_object_open_opts))
6958
+ return ERR_PTR(-EINVAL);
6959
+
6960
+ obj_name = OPTS_GET(opts, object_name, NULL);
6961
+ if (obj_buf) {
6962
+ if (!obj_name) {
6963
+ snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx",
6964
+ (unsigned long)obj_buf,
6965
+ (unsigned long)obj_buf_sz);
6966
+ obj_name = tmp_name;
6967
+ }
6968
+ path = obj_name;
6969
+ pr_debug("loading object '%s' from buffer\n", obj_name);
6970
+ }
6971
+
6972
+ obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name);
15476973 if (IS_ERR(obj))
15486974 return obj;
15496975
1550
- CHECK_ERR(bpf_object__elf_init(obj), err, out);
1551
- CHECK_ERR(bpf_object__check_endianness(obj), err, out);
1552
- CHECK_ERR(bpf_object__elf_collect(obj), err, out);
1553
- CHECK_ERR(bpf_object__collect_reloc(obj), err, out);
1554
- CHECK_ERR(bpf_object__validate(obj, needs_kver), err, out);
6976
+ kconfig = OPTS_GET(opts, kconfig, NULL);
6977
+ if (kconfig) {
6978
+ obj->kconfig = strdup(kconfig);
6979
+ if (!obj->kconfig) {
6980
+ err = -ENOMEM;
6981
+ goto out;
6982
+ }
6983
+ }
15556984
6985
+ err = bpf_object__elf_init(obj);
6986
+ err = err ? : bpf_object__check_endianness(obj);
6987
+ err = err ? : bpf_object__elf_collect(obj);
6988
+ err = err ? : bpf_object__collect_externs(obj);
6989
+ err = err ? : bpf_object__finalize_btf(obj);
6990
+ err = err ? : bpf_object__init_maps(obj, opts);
6991
+ err = err ? : bpf_object__collect_relos(obj);
6992
+ if (err)
6993
+ goto out;
15566994 bpf_object__elf_finish(obj);
6995
+
6996
+ bpf_object__for_each_program(prog, obj) {
6997
+ prog->sec_def = find_sec_def(prog->sec_name);
6998
+ if (!prog->sec_def)
6999
+ /* couldn't guess, but user might manually specify */
7000
+ continue;
7001
+
7002
+ if (prog->sec_def->is_sleepable)
7003
+ prog->prog_flags |= BPF_F_SLEEPABLE;
7004
+ bpf_program__set_type(prog, prog->sec_def->prog_type);
7005
+ bpf_program__set_expected_attach_type(prog,
7006
+ prog->sec_def->expected_attach_type);
7007
+
7008
+ if (prog->sec_def->prog_type == BPF_PROG_TYPE_TRACING ||
7009
+ prog->sec_def->prog_type == BPF_PROG_TYPE_EXT)
7010
+ prog->attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0);
7011
+ }
7012
+
15577013 return obj;
15587014 out:
15597015 bpf_object__close(obj);
15607016 return ERR_PTR(err);
15617017 }
15627018
1563
-struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr)
7019
+static struct bpf_object *
7020
+__bpf_object__open_xattr(struct bpf_object_open_attr *attr, int flags)
15647021 {
7022
+ DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
7023
+ .relaxed_maps = flags & MAPS_RELAX_COMPAT,
7024
+ );
7025
+
15657026 /* param validation */
15667027 if (!attr->file)
15677028 return NULL;
15687029
15697030 pr_debug("loading %s\n", attr->file);
7031
+ return __bpf_object__open(attr->file, NULL, 0, &opts);
7032
+}
15707033
1571
- return __bpf_object__open(attr->file, NULL, 0,
1572
- bpf_prog_type__needs_kver(attr->prog_type));
7034
+struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr)
7035
+{
7036
+ return __bpf_object__open_xattr(attr, 0);
15737037 }
15747038
15757039 struct bpf_object *bpf_object__open(const char *path)
....@@ -1582,27 +7046,42 @@
15827046 return bpf_object__open_xattr(&attr);
15837047 }
15847048
1585
-struct bpf_object *bpf_object__open_buffer(void *obj_buf,
1586
- size_t obj_buf_sz,
1587
- const char *name)
7049
+struct bpf_object *
7050
+bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts)
15887051 {
1589
- char tmp_name[64];
7052
+ if (!path)
7053
+ return ERR_PTR(-EINVAL);
15907054
1591
- /* param validation */
1592
- if (!obj_buf || obj_buf_sz <= 0)
7055
+ pr_debug("loading %s\n", path);
7056
+
7057
+ return __bpf_object__open(path, NULL, 0, opts);
7058
+}
7059
+
7060
+struct bpf_object *
7061
+bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
7062
+ const struct bpf_object_open_opts *opts)
7063
+{
7064
+ if (!obj_buf || obj_buf_sz == 0)
7065
+ return ERR_PTR(-EINVAL);
7066
+
7067
+ return __bpf_object__open(NULL, obj_buf, obj_buf_sz, opts);
7068
+}
7069
+
7070
+struct bpf_object *
7071
+bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz,
7072
+ const char *name)
7073
+{
7074
+ DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
7075
+ .object_name = name,
7076
+ /* wrong default, but backwards-compatible */
7077
+ .relaxed_maps = true,
7078
+ );
7079
+
7080
+ /* returning NULL is wrong, but backwards-compatible */
7081
+ if (!obj_buf || obj_buf_sz == 0)
15937082 return NULL;
15947083
1595
- if (!name) {
1596
- snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx",
1597
- (unsigned long)obj_buf,
1598
- (unsigned long)obj_buf_sz);
1599
- tmp_name[sizeof(tmp_name) - 1] = '\0';
1600
- name = tmp_name;
1601
- }
1602
- pr_debug("loading object '%s' from buffer\n",
1603
- name);
1604
-
1605
- return __bpf_object__open(name, obj_buf, obj_buf_sz, true);
7084
+ return bpf_object__open_mem(obj_buf, obj_buf_sz, &opts);
16067085 }
16077086
16087087 int bpf_object__unload(struct bpf_object *obj)
....@@ -1612,8 +7091,11 @@
16127091 if (!obj)
16137092 return -EINVAL;
16147093
1615
- for (i = 0; i < obj->nr_maps; i++)
7094
+ for (i = 0; i < obj->nr_maps; i++) {
16167095 zclose(obj->maps[i].fd);
7096
+ if (obj->maps[i].st_ops)
7097
+ zfree(&obj->maps[i].st_ops->kern_vdata);
7098
+ }
16177099
16187100 for (i = 0; i < obj->nr_programs; i++)
16197101 bpf_program__unload(&obj->programs[i]);
....@@ -1621,28 +7103,294 @@
16217103 return 0;
16227104 }
16237105
1624
-int bpf_object__load(struct bpf_object *obj)
7106
+static int bpf_object__sanitize_maps(struct bpf_object *obj)
16257107 {
1626
- int err;
7108
+ struct bpf_map *m;
16277109
7110
+ bpf_object__for_each_map(m, obj) {
7111
+ if (!bpf_map__is_internal(m))
7112
+ continue;
7113
+ if (!kernel_supports(FEAT_GLOBAL_DATA)) {
7114
+ pr_warn("kernel doesn't support global data\n");
7115
+ return -ENOTSUP;
7116
+ }
7117
+ if (!kernel_supports(FEAT_ARRAY_MMAP))
7118
+ m->def.map_flags ^= BPF_F_MMAPABLE;
7119
+ }
7120
+
7121
+ return 0;
7122
+}
7123
+
7124
+static int bpf_object__read_kallsyms_file(struct bpf_object *obj)
7125
+{
7126
+ char sym_type, sym_name[500];
7127
+ unsigned long long sym_addr;
7128
+ struct extern_desc *ext;
7129
+ int ret, err = 0;
7130
+ FILE *f;
7131
+
7132
+ f = fopen("/proc/kallsyms", "r");
7133
+ if (!f) {
7134
+ err = -errno;
7135
+ pr_warn("failed to open /proc/kallsyms: %d\n", err);
7136
+ return err;
7137
+ }
7138
+
7139
+ while (true) {
7140
+ ret = fscanf(f, "%llx %c %499s%*[^\n]\n",
7141
+ &sym_addr, &sym_type, sym_name);
7142
+ if (ret == EOF && feof(f))
7143
+ break;
7144
+ if (ret != 3) {
7145
+ pr_warn("failed to read kallsyms entry: %d\n", ret);
7146
+ err = -EINVAL;
7147
+ goto out;
7148
+ }
7149
+
7150
+ ext = find_extern_by_name(obj, sym_name);
7151
+ if (!ext || ext->type != EXT_KSYM)
7152
+ continue;
7153
+
7154
+ if (ext->is_set && ext->ksym.addr != sym_addr) {
7155
+ pr_warn("extern (ksym) '%s' resolution is ambiguous: 0x%llx or 0x%llx\n",
7156
+ sym_name, ext->ksym.addr, sym_addr);
7157
+ err = -EINVAL;
7158
+ goto out;
7159
+ }
7160
+ if (!ext->is_set) {
7161
+ ext->is_set = true;
7162
+ ext->ksym.addr = sym_addr;
7163
+ pr_debug("extern (ksym) %s=0x%llx\n", sym_name, sym_addr);
7164
+ }
7165
+ }
7166
+
7167
+out:
7168
+ fclose(f);
7169
+ return err;
7170
+}
7171
+
7172
+static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj)
7173
+{
7174
+ struct extern_desc *ext;
7175
+ int i, id;
7176
+
7177
+ for (i = 0; i < obj->nr_extern; i++) {
7178
+ const struct btf_type *targ_var, *targ_type;
7179
+ __u32 targ_type_id, local_type_id;
7180
+ const char *targ_var_name;
7181
+ int ret;
7182
+
7183
+ ext = &obj->externs[i];
7184
+ if (ext->type != EXT_KSYM || !ext->ksym.type_id)
7185
+ continue;
7186
+
7187
+ id = btf__find_by_name_kind(obj->btf_vmlinux, ext->name,
7188
+ BTF_KIND_VAR);
7189
+ if (id <= 0) {
7190
+ pr_warn("extern (ksym) '%s': failed to find BTF ID in vmlinux BTF.\n",
7191
+ ext->name);
7192
+ return -ESRCH;
7193
+ }
7194
+
7195
+ /* find local type_id */
7196
+ local_type_id = ext->ksym.type_id;
7197
+
7198
+ /* find target type_id */
7199
+ targ_var = btf__type_by_id(obj->btf_vmlinux, id);
7200
+ targ_var_name = btf__name_by_offset(obj->btf_vmlinux,
7201
+ targ_var->name_off);
7202
+ targ_type = skip_mods_and_typedefs(obj->btf_vmlinux,
7203
+ targ_var->type,
7204
+ &targ_type_id);
7205
+
7206
+ ret = bpf_core_types_are_compat(obj->btf, local_type_id,
7207
+ obj->btf_vmlinux, targ_type_id);
7208
+ if (ret <= 0) {
7209
+ const struct btf_type *local_type;
7210
+ const char *targ_name, *local_name;
7211
+
7212
+ local_type = btf__type_by_id(obj->btf, local_type_id);
7213
+ local_name = btf__name_by_offset(obj->btf,
7214
+ local_type->name_off);
7215
+ targ_name = btf__name_by_offset(obj->btf_vmlinux,
7216
+ targ_type->name_off);
7217
+
7218
+ pr_warn("extern (ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n",
7219
+ ext->name, local_type_id,
7220
+ btf_kind_str(local_type), local_name, targ_type_id,
7221
+ btf_kind_str(targ_type), targ_name);
7222
+ return -EINVAL;
7223
+ }
7224
+
7225
+ ext->is_set = true;
7226
+ ext->ksym.vmlinux_btf_id = id;
7227
+ pr_debug("extern (ksym) '%s': resolved to [%d] %s %s\n",
7228
+ ext->name, id, btf_kind_str(targ_var), targ_var_name);
7229
+ }
7230
+ return 0;
7231
+}
7232
+
7233
+static int bpf_object__resolve_externs(struct bpf_object *obj,
7234
+ const char *extra_kconfig)
7235
+{
7236
+ bool need_config = false, need_kallsyms = false;
7237
+ bool need_vmlinux_btf = false;
7238
+ struct extern_desc *ext;
7239
+ void *kcfg_data = NULL;
7240
+ int err, i;
7241
+
7242
+ if (obj->nr_extern == 0)
7243
+ return 0;
7244
+
7245
+ if (obj->kconfig_map_idx >= 0)
7246
+ kcfg_data = obj->maps[obj->kconfig_map_idx].mmaped;
7247
+
7248
+ for (i = 0; i < obj->nr_extern; i++) {
7249
+ ext = &obj->externs[i];
7250
+
7251
+ if (ext->type == EXT_KCFG &&
7252
+ strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) {
7253
+ void *ext_val = kcfg_data + ext->kcfg.data_off;
7254
+ __u32 kver = get_kernel_version();
7255
+
7256
+ if (!kver) {
7257
+ pr_warn("failed to get kernel version\n");
7258
+ return -EINVAL;
7259
+ }
7260
+ err = set_kcfg_value_num(ext, ext_val, kver);
7261
+ if (err)
7262
+ return err;
7263
+ pr_debug("extern (kcfg) %s=0x%x\n", ext->name, kver);
7264
+ } else if (ext->type == EXT_KCFG &&
7265
+ strncmp(ext->name, "CONFIG_", 7) == 0) {
7266
+ need_config = true;
7267
+ } else if (ext->type == EXT_KSYM) {
7268
+ if (ext->ksym.type_id)
7269
+ need_vmlinux_btf = true;
7270
+ else
7271
+ need_kallsyms = true;
7272
+ } else {
7273
+ pr_warn("unrecognized extern '%s'\n", ext->name);
7274
+ return -EINVAL;
7275
+ }
7276
+ }
7277
+ if (need_config && extra_kconfig) {
7278
+ err = bpf_object__read_kconfig_mem(obj, extra_kconfig, kcfg_data);
7279
+ if (err)
7280
+ return -EINVAL;
7281
+ need_config = false;
7282
+ for (i = 0; i < obj->nr_extern; i++) {
7283
+ ext = &obj->externs[i];
7284
+ if (ext->type == EXT_KCFG && !ext->is_set) {
7285
+ need_config = true;
7286
+ break;
7287
+ }
7288
+ }
7289
+ }
7290
+ if (need_config) {
7291
+ err = bpf_object__read_kconfig_file(obj, kcfg_data);
7292
+ if (err)
7293
+ return -EINVAL;
7294
+ }
7295
+ if (need_kallsyms) {
7296
+ err = bpf_object__read_kallsyms_file(obj);
7297
+ if (err)
7298
+ return -EINVAL;
7299
+ }
7300
+ if (need_vmlinux_btf) {
7301
+ err = bpf_object__resolve_ksyms_btf_id(obj);
7302
+ if (err)
7303
+ return -EINVAL;
7304
+ }
7305
+ for (i = 0; i < obj->nr_extern; i++) {
7306
+ ext = &obj->externs[i];
7307
+
7308
+ if (!ext->is_set && !ext->is_weak) {
7309
+ pr_warn("extern %s (strong) not resolved\n", ext->name);
7310
+ return -ESRCH;
7311
+ } else if (!ext->is_set) {
7312
+ pr_debug("extern %s (weak) not resolved, defaulting to zero\n",
7313
+ ext->name);
7314
+ }
7315
+ }
7316
+
7317
+ return 0;
7318
+}
7319
+
7320
+int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
7321
+{
7322
+ struct bpf_object *obj;
7323
+ int err, i;
7324
+
7325
+ if (!attr)
7326
+ return -EINVAL;
7327
+ obj = attr->obj;
16287328 if (!obj)
16297329 return -EINVAL;
16307330
16317331 if (obj->loaded) {
1632
- pr_warning("object should not be loaded twice\n");
7332
+ pr_warn("object '%s': load can't be attempted twice\n", obj->name);
16337333 return -EINVAL;
16347334 }
16357335
1636
- obj->loaded = true;
7336
+ err = bpf_object__probe_loading(obj);
7337
+ err = err ? : bpf_object__load_vmlinux_btf(obj);
7338
+ err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
7339
+ err = err ? : bpf_object__sanitize_and_load_btf(obj);
7340
+ err = err ? : bpf_object__sanitize_maps(obj);
7341
+ err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
7342
+ err = err ? : bpf_object__create_maps(obj);
7343
+ err = err ? : bpf_object__relocate(obj, attr->target_btf_path);
7344
+ err = err ? : bpf_object__load_progs(obj, attr->log_level);
16377345
1638
- CHECK_ERR(bpf_object__create_maps(obj), err, out);
1639
- CHECK_ERR(bpf_object__relocate(obj), err, out);
1640
- CHECK_ERR(bpf_object__load_progs(obj), err, out);
7346
+ btf__free(obj->btf_vmlinux);
7347
+ obj->btf_vmlinux = NULL;
7348
+
7349
+ obj->loaded = true; /* doesn't matter if successfully or not */
7350
+
7351
+ if (err)
7352
+ goto out;
16417353
16427354 return 0;
16437355 out:
7356
+ /* unpin any maps that were auto-pinned during load */
7357
+ for (i = 0; i < obj->nr_maps; i++)
7358
+ if (obj->maps[i].pinned && !obj->maps[i].reused)
7359
+ bpf_map__unpin(&obj->maps[i], NULL);
7360
+
16447361 bpf_object__unload(obj);
1645
- pr_warning("failed to load object '%s'\n", obj->path);
7362
+ pr_warn("failed to load object '%s'\n", obj->path);
7363
+ return err;
7364
+}
7365
+
7366
+int bpf_object__load(struct bpf_object *obj)
7367
+{
7368
+ struct bpf_object_load_attr attr = {
7369
+ .obj = obj,
7370
+ };
7371
+
7372
+ return bpf_object__load_xattr(&attr);
7373
+}
7374
+
7375
+static int make_parent_dir(const char *path)
7376
+{
7377
+ char *cp, errmsg[STRERR_BUFSIZE];
7378
+ char *dname, *dir;
7379
+ int err = 0;
7380
+
7381
+ dname = strdup(path);
7382
+ if (dname == NULL)
7383
+ return -ENOMEM;
7384
+
7385
+ dir = dirname(dname);
7386
+ if (mkdir(dir, 0700) && errno != EEXIST)
7387
+ err = -errno;
7388
+
7389
+ free(dname);
7390
+ if (err) {
7391
+ cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
7392
+ pr_warn("failed to mkdir %s: %s\n", path, cp);
7393
+ }
16467394 return err;
16477395 }
16487396
....@@ -1662,14 +7410,14 @@
16627410
16637411 dir = dirname(dname);
16647412 if (statfs(dir, &st_fs)) {
1665
- cp = str_error(errno, errmsg, sizeof(errmsg));
1666
- pr_warning("failed to statfs %s: %s\n", dir, cp);
7413
+ cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
7414
+ pr_warn("failed to statfs %s: %s\n", dir, cp);
16677415 err = -errno;
16687416 }
16697417 free(dname);
16707418
16717419 if (!err && st_fs.f_type != BPF_FS_MAGIC) {
1672
- pr_warning("specified path %s is not on BPF FS\n", path);
7420
+ pr_warn("specified path %s is not on BPF FS\n", path);
16737421 err = -EINVAL;
16747422 }
16757423
....@@ -1682,47 +7430,131 @@
16827430 char *cp, errmsg[STRERR_BUFSIZE];
16837431 int err;
16847432
7433
+ err = make_parent_dir(path);
7434
+ if (err)
7435
+ return err;
7436
+
16857437 err = check_path(path);
16867438 if (err)
16877439 return err;
16887440
16897441 if (prog == NULL) {
1690
- pr_warning("invalid program pointer\n");
7442
+ pr_warn("invalid program pointer\n");
16917443 return -EINVAL;
16927444 }
16937445
16947446 if (instance < 0 || instance >= prog->instances.nr) {
1695
- pr_warning("invalid prog instance %d of prog %s (max %d)\n",
1696
- instance, prog->section_name, prog->instances.nr);
7447
+ pr_warn("invalid prog instance %d of prog %s (max %d)\n",
7448
+ instance, prog->name, prog->instances.nr);
16977449 return -EINVAL;
16987450 }
16997451
17007452 if (bpf_obj_pin(prog->instances.fds[instance], path)) {
1701
- cp = str_error(errno, errmsg, sizeof(errmsg));
1702
- pr_warning("failed to pin program: %s\n", cp);
1703
- return -errno;
7453
+ err = -errno;
7454
+ cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
7455
+ pr_warn("failed to pin program: %s\n", cp);
7456
+ return err;
17047457 }
17057458 pr_debug("pinned program '%s'\n", path);
17067459
17077460 return 0;
17087461 }
17097462
1710
-static int make_dir(const char *path)
7463
+int bpf_program__unpin_instance(struct bpf_program *prog, const char *path,
7464
+ int instance)
17117465 {
1712
- char *cp, errmsg[STRERR_BUFSIZE];
1713
- int err = 0;
7466
+ int err;
17147467
1715
- if (mkdir(path, 0700) && errno != EEXIST)
1716
- err = -errno;
7468
+ err = check_path(path);
7469
+ if (err)
7470
+ return err;
17177471
1718
- if (err) {
1719
- cp = str_error(-err, errmsg, sizeof(errmsg));
1720
- pr_warning("failed to mkdir %s: %s\n", path, cp);
7472
+ if (prog == NULL) {
7473
+ pr_warn("invalid program pointer\n");
7474
+ return -EINVAL;
17217475 }
1722
- return err;
7476
+
7477
+ if (instance < 0 || instance >= prog->instances.nr) {
7478
+ pr_warn("invalid prog instance %d of prog %s (max %d)\n",
7479
+ instance, prog->name, prog->instances.nr);
7480
+ return -EINVAL;
7481
+ }
7482
+
7483
+ err = unlink(path);
7484
+ if (err != 0)
7485
+ return -errno;
7486
+ pr_debug("unpinned program '%s'\n", path);
7487
+
7488
+ return 0;
17237489 }
17247490
17257491 int bpf_program__pin(struct bpf_program *prog, const char *path)
7492
+{
7493
+ int i, err;
7494
+
7495
+ err = make_parent_dir(path);
7496
+ if (err)
7497
+ return err;
7498
+
7499
+ err = check_path(path);
7500
+ if (err)
7501
+ return err;
7502
+
7503
+ if (prog == NULL) {
7504
+ pr_warn("invalid program pointer\n");
7505
+ return -EINVAL;
7506
+ }
7507
+
7508
+ if (prog->instances.nr <= 0) {
7509
+ pr_warn("no instances of prog %s to pin\n", prog->name);
7510
+ return -EINVAL;
7511
+ }
7512
+
7513
+ if (prog->instances.nr == 1) {
7514
+ /* don't create subdirs when pinning single instance */
7515
+ return bpf_program__pin_instance(prog, path, 0);
7516
+ }
7517
+
7518
+ for (i = 0; i < prog->instances.nr; i++) {
7519
+ char buf[PATH_MAX];
7520
+ int len;
7521
+
7522
+ len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
7523
+ if (len < 0) {
7524
+ err = -EINVAL;
7525
+ goto err_unpin;
7526
+ } else if (len >= PATH_MAX) {
7527
+ err = -ENAMETOOLONG;
7528
+ goto err_unpin;
7529
+ }
7530
+
7531
+ err = bpf_program__pin_instance(prog, buf, i);
7532
+ if (err)
7533
+ goto err_unpin;
7534
+ }
7535
+
7536
+ return 0;
7537
+
7538
+err_unpin:
7539
+ for (i = i - 1; i >= 0; i--) {
7540
+ char buf[PATH_MAX];
7541
+ int len;
7542
+
7543
+ len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
7544
+ if (len < 0)
7545
+ continue;
7546
+ else if (len >= PATH_MAX)
7547
+ continue;
7548
+
7549
+ bpf_program__unpin_instance(prog, buf, i);
7550
+ }
7551
+
7552
+ rmdir(path);
7553
+
7554
+ return err;
7555
+}
7556
+
7557
+int bpf_program__unpin(struct bpf_program *prog, const char *path)
17267558 {
17277559 int i, err;
17287560
....@@ -1731,19 +7563,19 @@
17317563 return err;
17327564
17337565 if (prog == NULL) {
1734
- pr_warning("invalid program pointer\n");
7566
+ pr_warn("invalid program pointer\n");
17357567 return -EINVAL;
17367568 }
17377569
17387570 if (prog->instances.nr <= 0) {
1739
- pr_warning("no instances of prog %s to pin\n",
1740
- prog->section_name);
7571
+ pr_warn("no instances of prog %s to pin\n", prog->name);
17417572 return -EINVAL;
17427573 }
17437574
1744
- err = make_dir(path);
1745
- if (err)
1746
- return err;
7575
+ if (prog->instances.nr == 1) {
7576
+ /* don't create subdirs when pinning single instance */
7577
+ return bpf_program__unpin_instance(prog, path, 0);
7578
+ }
17477579
17487580 for (i = 0; i < prog->instances.nr; i++) {
17497581 char buf[PATH_MAX];
....@@ -1755,10 +7587,14 @@
17557587 else if (len >= PATH_MAX)
17567588 return -ENAMETOOLONG;
17577589
1758
- err = bpf_program__pin_instance(prog, buf, i);
7590
+ err = bpf_program__unpin_instance(prog, buf, i);
17597591 if (err)
17607592 return err;
17617593 }
7594
+
7595
+ err = rmdir(path);
7596
+ if (err)
7597
+ return -errno;
17627598
17637599 return 0;
17647600 }
....@@ -1768,28 +7604,135 @@
17687604 char *cp, errmsg[STRERR_BUFSIZE];
17697605 int err;
17707606
7607
+ if (map == NULL) {
7608
+ pr_warn("invalid map pointer\n");
7609
+ return -EINVAL;
7610
+ }
7611
+
7612
+ if (map->pin_path) {
7613
+ if (path && strcmp(path, map->pin_path)) {
7614
+ pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
7615
+ bpf_map__name(map), map->pin_path, path);
7616
+ return -EINVAL;
7617
+ } else if (map->pinned) {
7618
+ pr_debug("map '%s' already pinned at '%s'; not re-pinning\n",
7619
+ bpf_map__name(map), map->pin_path);
7620
+ return 0;
7621
+ }
7622
+ } else {
7623
+ if (!path) {
7624
+ pr_warn("missing a path to pin map '%s' at\n",
7625
+ bpf_map__name(map));
7626
+ return -EINVAL;
7627
+ } else if (map->pinned) {
7628
+ pr_warn("map '%s' already pinned\n", bpf_map__name(map));
7629
+ return -EEXIST;
7630
+ }
7631
+
7632
+ map->pin_path = strdup(path);
7633
+ if (!map->pin_path) {
7634
+ err = -errno;
7635
+ goto out_err;
7636
+ }
7637
+ }
7638
+
7639
+ err = make_parent_dir(map->pin_path);
7640
+ if (err)
7641
+ return err;
7642
+
7643
+ err = check_path(map->pin_path);
7644
+ if (err)
7645
+ return err;
7646
+
7647
+ if (bpf_obj_pin(map->fd, map->pin_path)) {
7648
+ err = -errno;
7649
+ goto out_err;
7650
+ }
7651
+
7652
+ map->pinned = true;
7653
+ pr_debug("pinned map '%s'\n", map->pin_path);
7654
+
7655
+ return 0;
7656
+
7657
+out_err:
7658
+ cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
7659
+ pr_warn("failed to pin map: %s\n", cp);
7660
+ return err;
7661
+}
7662
+
7663
+int bpf_map__unpin(struct bpf_map *map, const char *path)
7664
+{
7665
+ int err;
7666
+
7667
+ if (map == NULL) {
7668
+ pr_warn("invalid map pointer\n");
7669
+ return -EINVAL;
7670
+ }
7671
+
7672
+ if (map->pin_path) {
7673
+ if (path && strcmp(path, map->pin_path)) {
7674
+ pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
7675
+ bpf_map__name(map), map->pin_path, path);
7676
+ return -EINVAL;
7677
+ }
7678
+ path = map->pin_path;
7679
+ } else if (!path) {
7680
+ pr_warn("no path to unpin map '%s' from\n",
7681
+ bpf_map__name(map));
7682
+ return -EINVAL;
7683
+ }
7684
+
17717685 err = check_path(path);
17727686 if (err)
17737687 return err;
17747688
1775
- if (map == NULL) {
1776
- pr_warning("invalid map pointer\n");
1777
- return -EINVAL;
1778
- }
1779
-
1780
- if (bpf_obj_pin(map->fd, path)) {
1781
- cp = str_error(errno, errmsg, sizeof(errmsg));
1782
- pr_warning("failed to pin map: %s\n", cp);
7689
+ err = unlink(path);
7690
+ if (err != 0)
17837691 return -errno;
1784
- }
17857692
1786
- pr_debug("pinned map '%s'\n", path);
7693
+ map->pinned = false;
7694
+ pr_debug("unpinned map '%s' from '%s'\n", bpf_map__name(map), path);
7695
+
17877696 return 0;
17887697 }
17897698
1790
-int bpf_object__pin(struct bpf_object *obj, const char *path)
7699
+int bpf_map__set_pin_path(struct bpf_map *map, const char *path)
17917700 {
1792
- struct bpf_program *prog;
7701
+ char *new = NULL;
7702
+
7703
+ if (path) {
7704
+ new = strdup(path);
7705
+ if (!new)
7706
+ return -errno;
7707
+ }
7708
+
7709
+ free(map->pin_path);
7710
+ map->pin_path = new;
7711
+ return 0;
7712
+}
7713
+
7714
+const char *bpf_map__get_pin_path(const struct bpf_map *map)
7715
+{
7716
+ return map->pin_path;
7717
+}
7718
+
7719
+bool bpf_map__is_pinned(const struct bpf_map *map)
7720
+{
7721
+ return map->pinned;
7722
+}
7723
+
7724
+static void sanitize_pin_path(char *s)
7725
+{
7726
+ /* bpffs disallows periods in path names */
7727
+ while (*s) {
7728
+ if (*s == '.')
7729
+ *s = '_';
7730
+ s++;
7731
+ }
7732
+}
7733
+
7734
+int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
7735
+{
17937736 struct bpf_map *map;
17947737 int err;
17957738
....@@ -1797,42 +7740,78 @@
17977740 return -ENOENT;
17987741
17997742 if (!obj->loaded) {
1800
- pr_warning("object not yet loaded; load it first\n");
7743
+ pr_warn("object not yet loaded; load it first\n");
18017744 return -ENOENT;
18027745 }
18037746
1804
- err = make_dir(path);
1805
- if (err)
1806
- return err;
1807
-
1808
- bpf_map__for_each(map, obj) {
7747
+ bpf_object__for_each_map(map, obj) {
7748
+ char *pin_path = NULL;
18097749 char buf[PATH_MAX];
1810
- int len;
18117750
1812
- len = snprintf(buf, PATH_MAX, "%s/%s", path,
1813
- bpf_map__name(map));
1814
- if (len < 0)
1815
- return -EINVAL;
1816
- else if (len >= PATH_MAX)
1817
- return -ENAMETOOLONG;
7751
+ if (path) {
7752
+ int len;
18187753
1819
- err = bpf_map__pin(map, buf);
7754
+ len = snprintf(buf, PATH_MAX, "%s/%s", path,
7755
+ bpf_map__name(map));
7756
+ if (len < 0) {
7757
+ err = -EINVAL;
7758
+ goto err_unpin_maps;
7759
+ } else if (len >= PATH_MAX) {
7760
+ err = -ENAMETOOLONG;
7761
+ goto err_unpin_maps;
7762
+ }
7763
+ sanitize_pin_path(buf);
7764
+ pin_path = buf;
7765
+ } else if (!map->pin_path) {
7766
+ continue;
7767
+ }
7768
+
7769
+ err = bpf_map__pin(map, pin_path);
18207770 if (err)
1821
- return err;
7771
+ goto err_unpin_maps;
18227772 }
18237773
1824
- bpf_object__for_each_program(prog, obj) {
7774
+ return 0;
7775
+
7776
+err_unpin_maps:
7777
+ while ((map = bpf_map__prev(map, obj))) {
7778
+ if (!map->pin_path)
7779
+ continue;
7780
+
7781
+ bpf_map__unpin(map, NULL);
7782
+ }
7783
+
7784
+ return err;
7785
+}
7786
+
7787
+int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
7788
+{
7789
+ struct bpf_map *map;
7790
+ int err;
7791
+
7792
+ if (!obj)
7793
+ return -ENOENT;
7794
+
7795
+ bpf_object__for_each_map(map, obj) {
7796
+ char *pin_path = NULL;
18257797 char buf[PATH_MAX];
1826
- int len;
18277798
1828
- len = snprintf(buf, PATH_MAX, "%s/%s", path,
1829
- prog->section_name);
1830
- if (len < 0)
1831
- return -EINVAL;
1832
- else if (len >= PATH_MAX)
1833
- return -ENAMETOOLONG;
7799
+ if (path) {
7800
+ int len;
18347801
1835
- err = bpf_program__pin(prog, buf);
7802
+ len = snprintf(buf, PATH_MAX, "%s/%s", path,
7803
+ bpf_map__name(map));
7804
+ if (len < 0)
7805
+ return -EINVAL;
7806
+ else if (len >= PATH_MAX)
7807
+ return -ENAMETOOLONG;
7808
+ sanitize_pin_path(buf);
7809
+ pin_path = buf;
7810
+ } else if (!map->pin_path) {
7811
+ continue;
7812
+ }
7813
+
7814
+ err = bpf_map__unpin(map, pin_path);
18367815 if (err)
18377816 return err;
18387817 }
....@@ -1840,11 +7819,141 @@
18407819 return 0;
18417820 }
18427821
7822
+int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
7823
+{
7824
+ struct bpf_program *prog;
7825
+ int err;
7826
+
7827
+ if (!obj)
7828
+ return -ENOENT;
7829
+
7830
+ if (!obj->loaded) {
7831
+ pr_warn("object not yet loaded; load it first\n");
7832
+ return -ENOENT;
7833
+ }
7834
+
7835
+ bpf_object__for_each_program(prog, obj) {
7836
+ char buf[PATH_MAX];
7837
+ int len;
7838
+
7839
+ len = snprintf(buf, PATH_MAX, "%s/%s", path,
7840
+ prog->pin_name);
7841
+ if (len < 0) {
7842
+ err = -EINVAL;
7843
+ goto err_unpin_programs;
7844
+ } else if (len >= PATH_MAX) {
7845
+ err = -ENAMETOOLONG;
7846
+ goto err_unpin_programs;
7847
+ }
7848
+
7849
+ err = bpf_program__pin(prog, buf);
7850
+ if (err)
7851
+ goto err_unpin_programs;
7852
+ }
7853
+
7854
+ return 0;
7855
+
7856
+err_unpin_programs:
7857
+ while ((prog = bpf_program__prev(prog, obj))) {
7858
+ char buf[PATH_MAX];
7859
+ int len;
7860
+
7861
+ len = snprintf(buf, PATH_MAX, "%s/%s", path,
7862
+ prog->pin_name);
7863
+ if (len < 0)
7864
+ continue;
7865
+ else if (len >= PATH_MAX)
7866
+ continue;
7867
+
7868
+ bpf_program__unpin(prog, buf);
7869
+ }
7870
+
7871
+ return err;
7872
+}
7873
+
7874
+int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
7875
+{
7876
+ struct bpf_program *prog;
7877
+ int err;
7878
+
7879
+ if (!obj)
7880
+ return -ENOENT;
7881
+
7882
+ bpf_object__for_each_program(prog, obj) {
7883
+ char buf[PATH_MAX];
7884
+ int len;
7885
+
7886
+ len = snprintf(buf, PATH_MAX, "%s/%s", path,
7887
+ prog->pin_name);
7888
+ if (len < 0)
7889
+ return -EINVAL;
7890
+ else if (len >= PATH_MAX)
7891
+ return -ENAMETOOLONG;
7892
+
7893
+ err = bpf_program__unpin(prog, buf);
7894
+ if (err)
7895
+ return err;
7896
+ }
7897
+
7898
+ return 0;
7899
+}
7900
+
7901
+int bpf_object__pin(struct bpf_object *obj, const char *path)
7902
+{
7903
+ int err;
7904
+
7905
+ err = bpf_object__pin_maps(obj, path);
7906
+ if (err)
7907
+ return err;
7908
+
7909
+ err = bpf_object__pin_programs(obj, path);
7910
+ if (err) {
7911
+ bpf_object__unpin_maps(obj, path);
7912
+ return err;
7913
+ }
7914
+
7915
+ return 0;
7916
+}
7917
+
7918
+static void bpf_map__destroy(struct bpf_map *map)
7919
+{
7920
+ if (map->clear_priv)
7921
+ map->clear_priv(map, map->priv);
7922
+ map->priv = NULL;
7923
+ map->clear_priv = NULL;
7924
+
7925
+ if (map->inner_map) {
7926
+ bpf_map__destroy(map->inner_map);
7927
+ zfree(&map->inner_map);
7928
+ }
7929
+
7930
+ zfree(&map->init_slots);
7931
+ map->init_slots_sz = 0;
7932
+
7933
+ if (map->mmaped) {
7934
+ munmap(map->mmaped, bpf_map_mmap_sz(map));
7935
+ map->mmaped = NULL;
7936
+ }
7937
+
7938
+ if (map->st_ops) {
7939
+ zfree(&map->st_ops->data);
7940
+ zfree(&map->st_ops->progs);
7941
+ zfree(&map->st_ops->kern_func_off);
7942
+ zfree(&map->st_ops);
7943
+ }
7944
+
7945
+ zfree(&map->name);
7946
+ zfree(&map->pin_path);
7947
+
7948
+ if (map->fd >= 0)
7949
+ zclose(map->fd);
7950
+}
7951
+
18437952 void bpf_object__close(struct bpf_object *obj)
18447953 {
18457954 size_t i;
18467955
1847
- if (!obj)
7956
+ if (IS_ERR_OR_NULL(obj))
18487957 return;
18497958
18507959 if (obj->clear_priv)
....@@ -1853,15 +7962,15 @@
18537962 bpf_object__elf_finish(obj);
18547963 bpf_object__unload(obj);
18557964 btf__free(obj->btf);
7965
+ btf_ext__free(obj->btf_ext);
18567966
1857
- for (i = 0; i < obj->nr_maps; i++) {
1858
- zfree(&obj->maps[i].name);
1859
- if (obj->maps[i].clear_priv)
1860
- obj->maps[i].clear_priv(&obj->maps[i],
1861
- obj->maps[i].priv);
1862
- obj->maps[i].priv = NULL;
1863
- obj->maps[i].clear_priv = NULL;
1864
- }
7967
+ for (i = 0; i < obj->nr_maps; i++)
7968
+ bpf_map__destroy(&obj->maps[i]);
7969
+
7970
+ zfree(&obj->kconfig);
7971
+ zfree(&obj->externs);
7972
+ obj->nr_extern = 0;
7973
+
18657974 zfree(&obj->maps);
18667975 obj->nr_maps = 0;
18677976
....@@ -1894,14 +8003,19 @@
18948003 return next;
18958004 }
18968005
1897
-const char *bpf_object__name(struct bpf_object *obj)
8006
+const char *bpf_object__name(const struct bpf_object *obj)
18988007 {
1899
- return obj ? obj->path : ERR_PTR(-EINVAL);
8008
+ return obj ? obj->name : ERR_PTR(-EINVAL);
19008009 }
19018010
1902
-unsigned int bpf_object__kversion(struct bpf_object *obj)
8011
+unsigned int bpf_object__kversion(const struct bpf_object *obj)
19038012 {
19048013 return obj ? obj->kern_version : 0;
8014
+}
8015
+
8016
+struct btf *bpf_object__btf(const struct bpf_object *obj)
8017
+{
8018
+ return obj ? obj->btf : NULL;
19058019 }
19068020
19078021 int bpf_object__btf_fd(const struct bpf_object *obj)
....@@ -1920,41 +8034,57 @@
19208034 return 0;
19218035 }
19228036
1923
-void *bpf_object__priv(struct bpf_object *obj)
8037
+void *bpf_object__priv(const struct bpf_object *obj)
19248038 {
19258039 return obj ? obj->priv : ERR_PTR(-EINVAL);
19268040 }
19278041
19288042 static struct bpf_program *
1929
-__bpf_program__next(struct bpf_program *prev, struct bpf_object *obj)
8043
+__bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj,
8044
+ bool forward)
19308045 {
1931
- size_t idx;
8046
+ size_t nr_programs = obj->nr_programs;
8047
+ ssize_t idx;
19328048
1933
- if (!obj->programs)
8049
+ if (!nr_programs)
19348050 return NULL;
1935
- /* First handler */
1936
- if (prev == NULL)
1937
- return &obj->programs[0];
19388051
1939
- if (prev->obj != obj) {
1940
- pr_warning("error: program handler doesn't match object\n");
8052
+ if (!p)
8053
+ /* Iter from the beginning */
8054
+ return forward ? &obj->programs[0] :
8055
+ &obj->programs[nr_programs - 1];
8056
+
8057
+ if (p->obj != obj) {
8058
+ pr_warn("error: program handler doesn't match object\n");
19418059 return NULL;
19428060 }
19438061
1944
- idx = (prev - obj->programs) + 1;
1945
- if (idx >= obj->nr_programs)
8062
+ idx = (p - obj->programs) + (forward ? 1 : -1);
8063
+ if (idx >= obj->nr_programs || idx < 0)
19468064 return NULL;
19478065 return &obj->programs[idx];
19488066 }
19498067
19508068 struct bpf_program *
1951
-bpf_program__next(struct bpf_program *prev, struct bpf_object *obj)
8069
+bpf_program__next(struct bpf_program *prev, const struct bpf_object *obj)
19528070 {
19538071 struct bpf_program *prog = prev;
19548072
19558073 do {
1956
- prog = __bpf_program__next(prog, obj);
1957
- } while (prog && bpf_program__is_function_storage(prog, obj));
8074
+ prog = __bpf_program__iter(prog, obj, true);
8075
+ } while (prog && prog_is_subprog(obj, prog));
8076
+
8077
+ return prog;
8078
+}
8079
+
8080
+struct bpf_program *
8081
+bpf_program__prev(struct bpf_program *next, const struct bpf_object *obj)
8082
+{
8083
+ struct bpf_program *prog = next;
8084
+
8085
+ do {
8086
+ prog = __bpf_program__iter(prog, obj, false);
8087
+ } while (prog && prog_is_subprog(obj, prog));
19588088
19598089 return prog;
19608090 }
....@@ -1970,7 +8100,7 @@
19708100 return 0;
19718101 }
19728102
1973
-void *bpf_program__priv(struct bpf_program *prog)
8103
+void *bpf_program__priv(const struct bpf_program *prog)
19748104 {
19758105 return prog ? prog->priv : ERR_PTR(-EINVAL);
19768106 }
....@@ -1980,15 +8110,25 @@
19808110 prog->prog_ifindex = ifindex;
19818111 }
19828112
1983
-const char *bpf_program__title(struct bpf_program *prog, bool needs_copy)
8113
+const char *bpf_program__name(const struct bpf_program *prog)
8114
+{
8115
+ return prog->name;
8116
+}
8117
+
8118
+const char *bpf_program__section_name(const struct bpf_program *prog)
8119
+{
8120
+ return prog->sec_name;
8121
+}
8122
+
8123
+const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy)
19848124 {
19858125 const char *title;
19868126
1987
- title = prog->section_name;
8127
+ title = prog->sec_name;
19888128 if (needs_copy) {
19898129 title = strdup(title);
19908130 if (!title) {
1991
- pr_warning("failed to strdup program title\n");
8131
+ pr_warn("failed to strdup program title\n");
19928132 return ERR_PTR(-ENOMEM);
19938133 }
19948134 }
....@@ -1996,9 +8136,28 @@
19968136 return title;
19978137 }
19988138
1999
-int bpf_program__fd(struct bpf_program *prog)
8139
+bool bpf_program__autoload(const struct bpf_program *prog)
8140
+{
8141
+ return prog->load;
8142
+}
8143
+
8144
+int bpf_program__set_autoload(struct bpf_program *prog, bool autoload)
8145
+{
8146
+ if (prog->obj->loaded)
8147
+ return -EINVAL;
8148
+
8149
+ prog->load = autoload;
8150
+ return 0;
8151
+}
8152
+
8153
+int bpf_program__fd(const struct bpf_program *prog)
20008154 {
20018155 return bpf_program__nth_fd(prog, 0);
8156
+}
8157
+
8158
+size_t bpf_program__size(const struct bpf_program *prog)
8159
+{
8160
+ return prog->insns_cnt * BPF_INSN_SZ;
20028161 }
20038162
20048163 int bpf_program__set_prep(struct bpf_program *prog, int nr_instances,
....@@ -2010,13 +8169,13 @@
20108169 return -EINVAL;
20118170
20128171 if (prog->instances.nr > 0 || prog->instances.fds) {
2013
- pr_warning("Can't set pre-processor after loading\n");
8172
+ pr_warn("Can't set pre-processor after loading\n");
20148173 return -EINVAL;
20158174 }
20168175
20178176 instances_fds = malloc(sizeof(int) * nr_instances);
20188177 if (!instances_fds) {
2019
- pr_warning("alloc memory failed for fds\n");
8178
+ pr_warn("alloc memory failed for fds\n");
20208179 return -ENOMEM;
20218180 }
20228181
....@@ -2029,7 +8188,7 @@
20298188 return 0;
20308189 }
20318190
2032
-int bpf_program__nth_fd(struct bpf_program *prog, int n)
8191
+int bpf_program__nth_fd(const struct bpf_program *prog, int n)
20338192 {
20348193 int fd;
20358194
....@@ -2037,19 +8196,24 @@
20378196 return -EINVAL;
20388197
20398198 if (n >= prog->instances.nr || n < 0) {
2040
- pr_warning("Can't get the %dth fd from program %s: only %d instances\n",
2041
- n, prog->section_name, prog->instances.nr);
8199
+ pr_warn("Can't get the %dth fd from program %s: only %d instances\n",
8200
+ n, prog->name, prog->instances.nr);
20428201 return -EINVAL;
20438202 }
20448203
20458204 fd = prog->instances.fds[n];
20468205 if (fd < 0) {
2047
- pr_warning("%dth instance of program '%s' is invalid\n",
2048
- n, prog->section_name);
8206
+ pr_warn("%dth instance of program '%s' is invalid\n",
8207
+ n, prog->name);
20498208 return -ENOENT;
20508209 }
20518210
20528211 return fd;
8212
+}
8213
+
8214
+enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog)
8215
+{
8216
+ return prog->type;
20538217 }
20548218
20558219 void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type)
....@@ -2057,27 +8221,28 @@
20578221 prog->type = type;
20588222 }
20598223
2060
-static bool bpf_program__is_type(struct bpf_program *prog,
8224
+static bool bpf_program__is_type(const struct bpf_program *prog,
20618225 enum bpf_prog_type type)
20628226 {
20638227 return prog ? (prog->type == type) : false;
20648228 }
20658229
2066
-#define BPF_PROG_TYPE_FNS(NAME, TYPE) \
2067
-int bpf_program__set_##NAME(struct bpf_program *prog) \
2068
-{ \
2069
- if (!prog) \
2070
- return -EINVAL; \
2071
- bpf_program__set_type(prog, TYPE); \
2072
- return 0; \
2073
-} \
2074
- \
2075
-bool bpf_program__is_##NAME(struct bpf_program *prog) \
2076
-{ \
2077
- return bpf_program__is_type(prog, TYPE); \
2078
-} \
8230
+#define BPF_PROG_TYPE_FNS(NAME, TYPE) \
8231
+int bpf_program__set_##NAME(struct bpf_program *prog) \
8232
+{ \
8233
+ if (!prog) \
8234
+ return -EINVAL; \
8235
+ bpf_program__set_type(prog, TYPE); \
8236
+ return 0; \
8237
+} \
8238
+ \
8239
+bool bpf_program__is_##NAME(const struct bpf_program *prog) \
8240
+{ \
8241
+ return bpf_program__is_type(prog, TYPE); \
8242
+} \
20798243
20808244 BPF_PROG_TYPE_FNS(socket_filter, BPF_PROG_TYPE_SOCKET_FILTER);
8245
+BPF_PROG_TYPE_FNS(lsm, BPF_PROG_TYPE_LSM);
20818246 BPF_PROG_TYPE_FNS(kprobe, BPF_PROG_TYPE_KPROBE);
20828247 BPF_PROG_TYPE_FNS(sched_cls, BPF_PROG_TYPE_SCHED_CLS);
20838248 BPF_PROG_TYPE_FNS(sched_act, BPF_PROG_TYPE_SCHED_ACT);
....@@ -2085,6 +8250,16 @@
20858250 BPF_PROG_TYPE_FNS(raw_tracepoint, BPF_PROG_TYPE_RAW_TRACEPOINT);
20868251 BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP);
20878252 BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT);
8253
+BPF_PROG_TYPE_FNS(tracing, BPF_PROG_TYPE_TRACING);
8254
+BPF_PROG_TYPE_FNS(struct_ops, BPF_PROG_TYPE_STRUCT_OPS);
8255
+BPF_PROG_TYPE_FNS(extension, BPF_PROG_TYPE_EXT);
8256
+BPF_PROG_TYPE_FNS(sk_lookup, BPF_PROG_TYPE_SK_LOOKUP);
8257
+
8258
+enum bpf_attach_type
8259
+bpf_program__get_expected_attach_type(struct bpf_program *prog)
8260
+{
8261
+ return prog->expected_attach_type;
8262
+}
20888263
20898264 void bpf_program__set_expected_attach_type(struct bpf_program *prog,
20908265 enum bpf_attach_type type)
....@@ -2092,98 +8267,636 @@
20928267 prog->expected_attach_type = type;
20938268 }
20948269
2095
-#define BPF_PROG_SEC_FULL(string, ptype, atype) \
2096
- { string, sizeof(string) - 1, ptype, atype }
8270
+#define BPF_PROG_SEC_IMPL(string, ptype, eatype, eatype_optional, \
8271
+ attachable, attach_btf) \
8272
+ { \
8273
+ .sec = string, \
8274
+ .len = sizeof(string) - 1, \
8275
+ .prog_type = ptype, \
8276
+ .expected_attach_type = eatype, \
8277
+ .is_exp_attach_type_optional = eatype_optional, \
8278
+ .is_attachable = attachable, \
8279
+ .is_attach_btf = attach_btf, \
8280
+ }
20978281
2098
-#define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_FULL(string, ptype, 0)
8282
+/* Programs that can NOT be attached. */
8283
+#define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_IMPL(string, ptype, 0, 0, 0, 0)
20998284
2100
-#define BPF_S_PROG_SEC(string, ptype) \
2101
- BPF_PROG_SEC_FULL(string, BPF_PROG_TYPE_CGROUP_SOCK, ptype)
8285
+/* Programs that can be attached. */
8286
+#define BPF_APROG_SEC(string, ptype, atype) \
8287
+ BPF_PROG_SEC_IMPL(string, ptype, atype, true, 1, 0)
21028288
2103
-#define BPF_SA_PROG_SEC(string, ptype) \
2104
- BPF_PROG_SEC_FULL(string, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, ptype)
8289
+/* Programs that must specify expected attach type at load time. */
8290
+#define BPF_EAPROG_SEC(string, ptype, eatype) \
8291
+ BPF_PROG_SEC_IMPL(string, ptype, eatype, false, 1, 0)
21058292
2106
-static const struct {
2107
- const char *sec;
2108
- size_t len;
2109
- enum bpf_prog_type prog_type;
2110
- enum bpf_attach_type expected_attach_type;
2111
-} section_names[] = {
2112
- BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER),
2113
- BPF_PROG_SEC("kprobe/", BPF_PROG_TYPE_KPROBE),
2114
- BPF_PROG_SEC("kretprobe/", BPF_PROG_TYPE_KPROBE),
2115
- BPF_PROG_SEC("classifier", BPF_PROG_TYPE_SCHED_CLS),
2116
- BPF_PROG_SEC("action", BPF_PROG_TYPE_SCHED_ACT),
2117
- BPF_PROG_SEC("tracepoint/", BPF_PROG_TYPE_TRACEPOINT),
2118
- BPF_PROG_SEC("raw_tracepoint/", BPF_PROG_TYPE_RAW_TRACEPOINT),
2119
- BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP),
2120
- BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT),
2121
- BPF_PROG_SEC("cgroup/skb", BPF_PROG_TYPE_CGROUP_SKB),
2122
- BPF_PROG_SEC("cgroup/sock", BPF_PROG_TYPE_CGROUP_SOCK),
2123
- BPF_PROG_SEC("cgroup/dev", BPF_PROG_TYPE_CGROUP_DEVICE),
2124
- BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN),
2125
- BPF_PROG_SEC("lwt_out", BPF_PROG_TYPE_LWT_OUT),
2126
- BPF_PROG_SEC("lwt_xmit", BPF_PROG_TYPE_LWT_XMIT),
2127
- BPF_PROG_SEC("lwt_seg6local", BPF_PROG_TYPE_LWT_SEG6LOCAL),
2128
- BPF_PROG_SEC("sockops", BPF_PROG_TYPE_SOCK_OPS),
2129
- BPF_PROG_SEC("sk_skb", BPF_PROG_TYPE_SK_SKB),
2130
- BPF_PROG_SEC("sk_msg", BPF_PROG_TYPE_SK_MSG),
2131
- BPF_PROG_SEC("lirc_mode2", BPF_PROG_TYPE_LIRC_MODE2),
2132
- BPF_SA_PROG_SEC("cgroup/bind4", BPF_CGROUP_INET4_BIND),
2133
- BPF_SA_PROG_SEC("cgroup/bind6", BPF_CGROUP_INET6_BIND),
2134
- BPF_SA_PROG_SEC("cgroup/connect4", BPF_CGROUP_INET4_CONNECT),
2135
- BPF_SA_PROG_SEC("cgroup/connect6", BPF_CGROUP_INET6_CONNECT),
2136
- BPF_SA_PROG_SEC("cgroup/sendmsg4", BPF_CGROUP_UDP4_SENDMSG),
2137
- BPF_SA_PROG_SEC("cgroup/sendmsg6", BPF_CGROUP_UDP6_SENDMSG),
2138
- BPF_S_PROG_SEC("cgroup/post_bind4", BPF_CGROUP_INET4_POST_BIND),
2139
- BPF_S_PROG_SEC("cgroup/post_bind6", BPF_CGROUP_INET6_POST_BIND),
8293
+/* Programs that use BTF to identify attach point */
8294
+#define BPF_PROG_BTF(string, ptype, eatype) \
8295
+ BPF_PROG_SEC_IMPL(string, ptype, eatype, false, 0, 1)
8296
+
8297
+/* Programs that can be attached but attach type can't be identified by section
8298
+ * name. Kept for backward compatibility.
8299
+ */
8300
+#define BPF_APROG_COMPAT(string, ptype) BPF_PROG_SEC(string, ptype)
8301
+
8302
+#define SEC_DEF(sec_pfx, ptype, ...) { \
8303
+ .sec = sec_pfx, \
8304
+ .len = sizeof(sec_pfx) - 1, \
8305
+ .prog_type = BPF_PROG_TYPE_##ptype, \
8306
+ __VA_ARGS__ \
8307
+}
8308
+
8309
+static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec,
8310
+ struct bpf_program *prog);
8311
+static struct bpf_link *attach_tp(const struct bpf_sec_def *sec,
8312
+ struct bpf_program *prog);
8313
+static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec,
8314
+ struct bpf_program *prog);
8315
+static struct bpf_link *attach_trace(const struct bpf_sec_def *sec,
8316
+ struct bpf_program *prog);
8317
+static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec,
8318
+ struct bpf_program *prog);
8319
+static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,
8320
+ struct bpf_program *prog);
8321
+
8322
+static const struct bpf_sec_def section_defs[] = {
8323
+ BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER),
8324
+ BPF_PROG_SEC("sk_reuseport", BPF_PROG_TYPE_SK_REUSEPORT),
8325
+ SEC_DEF("kprobe/", KPROBE,
8326
+ .attach_fn = attach_kprobe),
8327
+ BPF_PROG_SEC("uprobe/", BPF_PROG_TYPE_KPROBE),
8328
+ SEC_DEF("kretprobe/", KPROBE,
8329
+ .attach_fn = attach_kprobe),
8330
+ BPF_PROG_SEC("uretprobe/", BPF_PROG_TYPE_KPROBE),
8331
+ BPF_PROG_SEC("classifier", BPF_PROG_TYPE_SCHED_CLS),
8332
+ BPF_PROG_SEC("action", BPF_PROG_TYPE_SCHED_ACT),
8333
+ SEC_DEF("tracepoint/", TRACEPOINT,
8334
+ .attach_fn = attach_tp),
8335
+ SEC_DEF("tp/", TRACEPOINT,
8336
+ .attach_fn = attach_tp),
8337
+ SEC_DEF("raw_tracepoint/", RAW_TRACEPOINT,
8338
+ .attach_fn = attach_raw_tp),
8339
+ SEC_DEF("raw_tp/", RAW_TRACEPOINT,
8340
+ .attach_fn = attach_raw_tp),
8341
+ SEC_DEF("tp_btf/", TRACING,
8342
+ .expected_attach_type = BPF_TRACE_RAW_TP,
8343
+ .is_attach_btf = true,
8344
+ .attach_fn = attach_trace),
8345
+ SEC_DEF("fentry/", TRACING,
8346
+ .expected_attach_type = BPF_TRACE_FENTRY,
8347
+ .is_attach_btf = true,
8348
+ .attach_fn = attach_trace),
8349
+ SEC_DEF("fmod_ret/", TRACING,
8350
+ .expected_attach_type = BPF_MODIFY_RETURN,
8351
+ .is_attach_btf = true,
8352
+ .attach_fn = attach_trace),
8353
+ SEC_DEF("fexit/", TRACING,
8354
+ .expected_attach_type = BPF_TRACE_FEXIT,
8355
+ .is_attach_btf = true,
8356
+ .attach_fn = attach_trace),
8357
+ SEC_DEF("fentry.s/", TRACING,
8358
+ .expected_attach_type = BPF_TRACE_FENTRY,
8359
+ .is_attach_btf = true,
8360
+ .is_sleepable = true,
8361
+ .attach_fn = attach_trace),
8362
+ SEC_DEF("fmod_ret.s/", TRACING,
8363
+ .expected_attach_type = BPF_MODIFY_RETURN,
8364
+ .is_attach_btf = true,
8365
+ .is_sleepable = true,
8366
+ .attach_fn = attach_trace),
8367
+ SEC_DEF("fexit.s/", TRACING,
8368
+ .expected_attach_type = BPF_TRACE_FEXIT,
8369
+ .is_attach_btf = true,
8370
+ .is_sleepable = true,
8371
+ .attach_fn = attach_trace),
8372
+ SEC_DEF("freplace/", EXT,
8373
+ .is_attach_btf = true,
8374
+ .attach_fn = attach_trace),
8375
+ SEC_DEF("lsm/", LSM,
8376
+ .is_attach_btf = true,
8377
+ .expected_attach_type = BPF_LSM_MAC,
8378
+ .attach_fn = attach_lsm),
8379
+ SEC_DEF("lsm.s/", LSM,
8380
+ .is_attach_btf = true,
8381
+ .is_sleepable = true,
8382
+ .expected_attach_type = BPF_LSM_MAC,
8383
+ .attach_fn = attach_lsm),
8384
+ SEC_DEF("iter/", TRACING,
8385
+ .expected_attach_type = BPF_TRACE_ITER,
8386
+ .is_attach_btf = true,
8387
+ .attach_fn = attach_iter),
8388
+ BPF_EAPROG_SEC("xdp_devmap/", BPF_PROG_TYPE_XDP,
8389
+ BPF_XDP_DEVMAP),
8390
+ BPF_EAPROG_SEC("xdp_cpumap/", BPF_PROG_TYPE_XDP,
8391
+ BPF_XDP_CPUMAP),
8392
+ BPF_APROG_SEC("xdp", BPF_PROG_TYPE_XDP,
8393
+ BPF_XDP),
8394
+ BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT),
8395
+ BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN),
8396
+ BPF_PROG_SEC("lwt_out", BPF_PROG_TYPE_LWT_OUT),
8397
+ BPF_PROG_SEC("lwt_xmit", BPF_PROG_TYPE_LWT_XMIT),
8398
+ BPF_PROG_SEC("lwt_seg6local", BPF_PROG_TYPE_LWT_SEG6LOCAL),
8399
+ BPF_APROG_SEC("cgroup_skb/ingress", BPF_PROG_TYPE_CGROUP_SKB,
8400
+ BPF_CGROUP_INET_INGRESS),
8401
+ BPF_APROG_SEC("cgroup_skb/egress", BPF_PROG_TYPE_CGROUP_SKB,
8402
+ BPF_CGROUP_INET_EGRESS),
8403
+ BPF_APROG_COMPAT("cgroup/skb", BPF_PROG_TYPE_CGROUP_SKB),
8404
+ BPF_EAPROG_SEC("cgroup/sock_create", BPF_PROG_TYPE_CGROUP_SOCK,
8405
+ BPF_CGROUP_INET_SOCK_CREATE),
8406
+ BPF_EAPROG_SEC("cgroup/sock_release", BPF_PROG_TYPE_CGROUP_SOCK,
8407
+ BPF_CGROUP_INET_SOCK_RELEASE),
8408
+ BPF_APROG_SEC("cgroup/sock", BPF_PROG_TYPE_CGROUP_SOCK,
8409
+ BPF_CGROUP_INET_SOCK_CREATE),
8410
+ BPF_EAPROG_SEC("cgroup/post_bind4", BPF_PROG_TYPE_CGROUP_SOCK,
8411
+ BPF_CGROUP_INET4_POST_BIND),
8412
+ BPF_EAPROG_SEC("cgroup/post_bind6", BPF_PROG_TYPE_CGROUP_SOCK,
8413
+ BPF_CGROUP_INET6_POST_BIND),
8414
+ BPF_APROG_SEC("cgroup/dev", BPF_PROG_TYPE_CGROUP_DEVICE,
8415
+ BPF_CGROUP_DEVICE),
8416
+ BPF_APROG_SEC("sockops", BPF_PROG_TYPE_SOCK_OPS,
8417
+ BPF_CGROUP_SOCK_OPS),
8418
+ BPF_APROG_SEC("sk_skb/stream_parser", BPF_PROG_TYPE_SK_SKB,
8419
+ BPF_SK_SKB_STREAM_PARSER),
8420
+ BPF_APROG_SEC("sk_skb/stream_verdict", BPF_PROG_TYPE_SK_SKB,
8421
+ BPF_SK_SKB_STREAM_VERDICT),
8422
+ BPF_APROG_COMPAT("sk_skb", BPF_PROG_TYPE_SK_SKB),
8423
+ BPF_APROG_SEC("sk_msg", BPF_PROG_TYPE_SK_MSG,
8424
+ BPF_SK_MSG_VERDICT),
8425
+ BPF_APROG_SEC("lirc_mode2", BPF_PROG_TYPE_LIRC_MODE2,
8426
+ BPF_LIRC_MODE2),
8427
+ BPF_APROG_SEC("flow_dissector", BPF_PROG_TYPE_FLOW_DISSECTOR,
8428
+ BPF_FLOW_DISSECTOR),
8429
+ BPF_EAPROG_SEC("cgroup/bind4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
8430
+ BPF_CGROUP_INET4_BIND),
8431
+ BPF_EAPROG_SEC("cgroup/bind6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
8432
+ BPF_CGROUP_INET6_BIND),
8433
+ BPF_EAPROG_SEC("cgroup/connect4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
8434
+ BPF_CGROUP_INET4_CONNECT),
8435
+ BPF_EAPROG_SEC("cgroup/connect6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
8436
+ BPF_CGROUP_INET6_CONNECT),
8437
+ BPF_EAPROG_SEC("cgroup/sendmsg4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
8438
+ BPF_CGROUP_UDP4_SENDMSG),
8439
+ BPF_EAPROG_SEC("cgroup/sendmsg6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
8440
+ BPF_CGROUP_UDP6_SENDMSG),
8441
+ BPF_EAPROG_SEC("cgroup/recvmsg4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
8442
+ BPF_CGROUP_UDP4_RECVMSG),
8443
+ BPF_EAPROG_SEC("cgroup/recvmsg6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
8444
+ BPF_CGROUP_UDP6_RECVMSG),
8445
+ BPF_EAPROG_SEC("cgroup/getpeername4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
8446
+ BPF_CGROUP_INET4_GETPEERNAME),
8447
+ BPF_EAPROG_SEC("cgroup/getpeername6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
8448
+ BPF_CGROUP_INET6_GETPEERNAME),
8449
+ BPF_EAPROG_SEC("cgroup/getsockname4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
8450
+ BPF_CGROUP_INET4_GETSOCKNAME),
8451
+ BPF_EAPROG_SEC("cgroup/getsockname6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
8452
+ BPF_CGROUP_INET6_GETSOCKNAME),
8453
+ BPF_EAPROG_SEC("cgroup/sysctl", BPF_PROG_TYPE_CGROUP_SYSCTL,
8454
+ BPF_CGROUP_SYSCTL),
8455
+ BPF_EAPROG_SEC("cgroup/getsockopt", BPF_PROG_TYPE_CGROUP_SOCKOPT,
8456
+ BPF_CGROUP_GETSOCKOPT),
8457
+ BPF_EAPROG_SEC("cgroup/setsockopt", BPF_PROG_TYPE_CGROUP_SOCKOPT,
8458
+ BPF_CGROUP_SETSOCKOPT),
8459
+ BPF_PROG_SEC("struct_ops", BPF_PROG_TYPE_STRUCT_OPS),
8460
+ BPF_EAPROG_SEC("sk_lookup/", BPF_PROG_TYPE_SK_LOOKUP,
8461
+ BPF_SK_LOOKUP),
21408462 };
21418463
8464
+#undef BPF_PROG_SEC_IMPL
21428465 #undef BPF_PROG_SEC
2143
-#undef BPF_PROG_SEC_FULL
2144
-#undef BPF_S_PROG_SEC
2145
-#undef BPF_SA_PROG_SEC
8466
+#undef BPF_APROG_SEC
8467
+#undef BPF_EAPROG_SEC
8468
+#undef BPF_APROG_COMPAT
8469
+#undef SEC_DEF
8470
+
8471
+#define MAX_TYPE_NAME_SIZE 32
8472
+
8473
+static const struct bpf_sec_def *find_sec_def(const char *sec_name)
8474
+{
8475
+ int i, n = ARRAY_SIZE(section_defs);
8476
+
8477
+ for (i = 0; i < n; i++) {
8478
+ if (strncmp(sec_name,
8479
+ section_defs[i].sec, section_defs[i].len))
8480
+ continue;
8481
+ return &section_defs[i];
8482
+ }
8483
+ return NULL;
8484
+}
8485
+
8486
+static char *libbpf_get_type_names(bool attach_type)
8487
+{
8488
+ int i, len = ARRAY_SIZE(section_defs) * MAX_TYPE_NAME_SIZE;
8489
+ char *buf;
8490
+
8491
+ buf = malloc(len);
8492
+ if (!buf)
8493
+ return NULL;
8494
+
8495
+ buf[0] = '\0';
8496
+ /* Forge string buf with all available names */
8497
+ for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
8498
+ if (attach_type && !section_defs[i].is_attachable)
8499
+ continue;
8500
+
8501
+ if (strlen(buf) + strlen(section_defs[i].sec) + 2 > len) {
8502
+ free(buf);
8503
+ return NULL;
8504
+ }
8505
+ strcat(buf, " ");
8506
+ strcat(buf, section_defs[i].sec);
8507
+ }
8508
+
8509
+ return buf;
8510
+}
21468511
21478512 int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
21488513 enum bpf_attach_type *expected_attach_type)
21498514 {
8515
+ const struct bpf_sec_def *sec_def;
8516
+ char *type_names;
8517
+
8518
+ if (!name)
8519
+ return -EINVAL;
8520
+
8521
+ sec_def = find_sec_def(name);
8522
+ if (sec_def) {
8523
+ *prog_type = sec_def->prog_type;
8524
+ *expected_attach_type = sec_def->expected_attach_type;
8525
+ return 0;
8526
+ }
8527
+
8528
+ pr_debug("failed to guess program type from ELF section '%s'\n", name);
8529
+ type_names = libbpf_get_type_names(false);
8530
+ if (type_names != NULL) {
8531
+ pr_debug("supported section(type) names are:%s\n", type_names);
8532
+ free(type_names);
8533
+ }
8534
+
8535
+ return -ESRCH;
8536
+}
8537
+
8538
+static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj,
8539
+ size_t offset)
8540
+{
8541
+ struct bpf_map *map;
8542
+ size_t i;
8543
+
8544
+ for (i = 0; i < obj->nr_maps; i++) {
8545
+ map = &obj->maps[i];
8546
+ if (!bpf_map__is_struct_ops(map))
8547
+ continue;
8548
+ if (map->sec_offset <= offset &&
8549
+ offset - map->sec_offset < map->def.value_size)
8550
+ return map;
8551
+ }
8552
+
8553
+ return NULL;
8554
+}
8555
+
8556
+/* Collect the reloc from ELF and populate the st_ops->progs[] */
8557
+static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
8558
+ GElf_Shdr *shdr, Elf_Data *data)
8559
+{
8560
+ const struct btf_member *member;
8561
+ struct bpf_struct_ops *st_ops;
8562
+ struct bpf_program *prog;
8563
+ unsigned int shdr_idx;
8564
+ const struct btf *btf;
8565
+ struct bpf_map *map;
8566
+ Elf_Data *symbols;
8567
+ unsigned int moff, insn_idx;
8568
+ const char *name;
8569
+ __u32 member_idx;
8570
+ GElf_Sym sym;
8571
+ GElf_Rel rel;
8572
+ int i, nrels;
8573
+
8574
+ symbols = obj->efile.symbols;
8575
+ btf = obj->btf;
8576
+ nrels = shdr->sh_size / shdr->sh_entsize;
8577
+ for (i = 0; i < nrels; i++) {
8578
+ if (!gelf_getrel(data, i, &rel)) {
8579
+ pr_warn("struct_ops reloc: failed to get %d reloc\n", i);
8580
+ return -LIBBPF_ERRNO__FORMAT;
8581
+ }
8582
+
8583
+ if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) {
8584
+ pr_warn("struct_ops reloc: symbol %zx not found\n",
8585
+ (size_t)GELF_R_SYM(rel.r_info));
8586
+ return -LIBBPF_ERRNO__FORMAT;
8587
+ }
8588
+
8589
+ name = elf_sym_str(obj, sym.st_name) ?: "<?>";
8590
+ map = find_struct_ops_map_by_offset(obj, rel.r_offset);
8591
+ if (!map) {
8592
+ pr_warn("struct_ops reloc: cannot find map at rel.r_offset %zu\n",
8593
+ (size_t)rel.r_offset);
8594
+ return -EINVAL;
8595
+ }
8596
+
8597
+ moff = rel.r_offset - map->sec_offset;
8598
+ shdr_idx = sym.st_shndx;
8599
+ st_ops = map->st_ops;
8600
+ pr_debug("struct_ops reloc %s: for %lld value %lld shdr_idx %u rel.r_offset %zu map->sec_offset %zu name %d (\'%s\')\n",
8601
+ map->name,
8602
+ (long long)(rel.r_info >> 32),
8603
+ (long long)sym.st_value,
8604
+ shdr_idx, (size_t)rel.r_offset,
8605
+ map->sec_offset, sym.st_name, name);
8606
+
8607
+ if (shdr_idx >= SHN_LORESERVE) {
8608
+ pr_warn("struct_ops reloc %s: rel.r_offset %zu shdr_idx %u unsupported non-static function\n",
8609
+ map->name, (size_t)rel.r_offset, shdr_idx);
8610
+ return -LIBBPF_ERRNO__RELOC;
8611
+ }
8612
+ if (sym.st_value % BPF_INSN_SZ) {
8613
+ pr_warn("struct_ops reloc %s: invalid target program offset %llu\n",
8614
+ map->name, (unsigned long long)sym.st_value);
8615
+ return -LIBBPF_ERRNO__FORMAT;
8616
+ }
8617
+ insn_idx = sym.st_value / BPF_INSN_SZ;
8618
+
8619
+ member = find_member_by_offset(st_ops->type, moff * 8);
8620
+ if (!member) {
8621
+ pr_warn("struct_ops reloc %s: cannot find member at moff %u\n",
8622
+ map->name, moff);
8623
+ return -EINVAL;
8624
+ }
8625
+ member_idx = member - btf_members(st_ops->type);
8626
+ name = btf__name_by_offset(btf, member->name_off);
8627
+
8628
+ if (!resolve_func_ptr(btf, member->type, NULL)) {
8629
+ pr_warn("struct_ops reloc %s: cannot relocate non func ptr %s\n",
8630
+ map->name, name);
8631
+ return -EINVAL;
8632
+ }
8633
+
8634
+ prog = find_prog_by_sec_insn(obj, shdr_idx, insn_idx);
8635
+ if (!prog) {
8636
+ pr_warn("struct_ops reloc %s: cannot find prog at shdr_idx %u to relocate func ptr %s\n",
8637
+ map->name, shdr_idx, name);
8638
+ return -EINVAL;
8639
+ }
8640
+
8641
+ if (prog->type == BPF_PROG_TYPE_UNSPEC) {
8642
+ const struct bpf_sec_def *sec_def;
8643
+
8644
+ sec_def = find_sec_def(prog->sec_name);
8645
+ if (sec_def &&
8646
+ sec_def->prog_type != BPF_PROG_TYPE_STRUCT_OPS) {
8647
+ /* for pr_warn */
8648
+ prog->type = sec_def->prog_type;
8649
+ goto invalid_prog;
8650
+ }
8651
+
8652
+ prog->type = BPF_PROG_TYPE_STRUCT_OPS;
8653
+ prog->attach_btf_id = st_ops->type_id;
8654
+ prog->expected_attach_type = member_idx;
8655
+ } else if (prog->type != BPF_PROG_TYPE_STRUCT_OPS ||
8656
+ prog->attach_btf_id != st_ops->type_id ||
8657
+ prog->expected_attach_type != member_idx) {
8658
+ goto invalid_prog;
8659
+ }
8660
+ st_ops->progs[member_idx] = prog;
8661
+ }
8662
+
8663
+ return 0;
8664
+
8665
+invalid_prog:
8666
+ pr_warn("struct_ops reloc %s: cannot use prog %s in sec %s with type %u attach_btf_id %u expected_attach_type %u for func ptr %s\n",
8667
+ map->name, prog->name, prog->sec_name, prog->type,
8668
+ prog->attach_btf_id, prog->expected_attach_type, name);
8669
+ return -EINVAL;
8670
+}
8671
+
8672
+#define BTF_TRACE_PREFIX "btf_trace_"
8673
+#define BTF_LSM_PREFIX "bpf_lsm_"
8674
+#define BTF_ITER_PREFIX "bpf_iter_"
8675
+#define BTF_MAX_NAME_SIZE 128
8676
+
8677
+static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
8678
+ const char *name, __u32 kind)
8679
+{
8680
+ char btf_type_name[BTF_MAX_NAME_SIZE];
8681
+ int ret;
8682
+
8683
+ ret = snprintf(btf_type_name, sizeof(btf_type_name),
8684
+ "%s%s", prefix, name);
8685
+ /* snprintf returns the number of characters written excluding the
8686
+ * the terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it
8687
+ * indicates truncation.
8688
+ */
8689
+ if (ret < 0 || ret >= sizeof(btf_type_name))
8690
+ return -ENAMETOOLONG;
8691
+ return btf__find_by_name_kind(btf, btf_type_name, kind);
8692
+}
8693
+
8694
+static inline int __find_vmlinux_btf_id(struct btf *btf, const char *name,
8695
+ enum bpf_attach_type attach_type)
8696
+{
8697
+ int err;
8698
+
8699
+ if (attach_type == BPF_TRACE_RAW_TP)
8700
+ err = find_btf_by_prefix_kind(btf, BTF_TRACE_PREFIX, name,
8701
+ BTF_KIND_TYPEDEF);
8702
+ else if (attach_type == BPF_LSM_MAC)
8703
+ err = find_btf_by_prefix_kind(btf, BTF_LSM_PREFIX, name,
8704
+ BTF_KIND_FUNC);
8705
+ else if (attach_type == BPF_TRACE_ITER)
8706
+ err = find_btf_by_prefix_kind(btf, BTF_ITER_PREFIX, name,
8707
+ BTF_KIND_FUNC);
8708
+ else
8709
+ err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
8710
+
8711
+ if (err <= 0)
8712
+ pr_warn("%s is not found in vmlinux BTF\n", name);
8713
+
8714
+ return err;
8715
+}
8716
+
8717
+int libbpf_find_vmlinux_btf_id(const char *name,
8718
+ enum bpf_attach_type attach_type)
8719
+{
8720
+ struct btf *btf;
8721
+ int err;
8722
+
8723
+ btf = libbpf_find_kernel_btf();
8724
+ if (IS_ERR(btf)) {
8725
+ pr_warn("vmlinux BTF is not found\n");
8726
+ return -EINVAL;
8727
+ }
8728
+
8729
+ err = __find_vmlinux_btf_id(btf, name, attach_type);
8730
+ btf__free(btf);
8731
+ return err;
8732
+}
8733
+
8734
+static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
8735
+{
8736
+ struct bpf_prog_info_linear *info_linear;
8737
+ struct bpf_prog_info *info;
8738
+ struct btf *btf = NULL;
8739
+ int err = -EINVAL;
8740
+
8741
+ info_linear = bpf_program__get_prog_info_linear(attach_prog_fd, 0);
8742
+ if (IS_ERR_OR_NULL(info_linear)) {
8743
+ pr_warn("failed get_prog_info_linear for FD %d\n",
8744
+ attach_prog_fd);
8745
+ return -EINVAL;
8746
+ }
8747
+ info = &info_linear->info;
8748
+ if (!info->btf_id) {
8749
+ pr_warn("The target program doesn't have BTF\n");
8750
+ goto out;
8751
+ }
8752
+ if (btf__get_from_id(info->btf_id, &btf)) {
8753
+ pr_warn("Failed to get BTF of the program\n");
8754
+ goto out;
8755
+ }
8756
+ err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
8757
+ btf__free(btf);
8758
+ if (err <= 0) {
8759
+ pr_warn("%s is not found in prog's BTF\n", name);
8760
+ goto out;
8761
+ }
8762
+out:
8763
+ free(info_linear);
8764
+ return err;
8765
+}
8766
+
8767
+static int libbpf_find_attach_btf_id(struct bpf_program *prog)
8768
+{
8769
+ enum bpf_attach_type attach_type = prog->expected_attach_type;
8770
+ __u32 attach_prog_fd = prog->attach_prog_fd;
8771
+ const char *name = prog->sec_name;
8772
+ int i, err;
8773
+
8774
+ if (!name)
8775
+ return -EINVAL;
8776
+
8777
+ for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
8778
+ if (!section_defs[i].is_attach_btf)
8779
+ continue;
8780
+ if (strncmp(name, section_defs[i].sec, section_defs[i].len))
8781
+ continue;
8782
+ if (attach_prog_fd)
8783
+ err = libbpf_find_prog_btf_id(name + section_defs[i].len,
8784
+ attach_prog_fd);
8785
+ else
8786
+ err = __find_vmlinux_btf_id(prog->obj->btf_vmlinux,
8787
+ name + section_defs[i].len,
8788
+ attach_type);
8789
+ return err;
8790
+ }
8791
+ pr_warn("failed to identify btf_id based on ELF section name '%s'\n", name);
8792
+ return -ESRCH;
8793
+}
8794
+
8795
+int libbpf_attach_type_by_name(const char *name,
8796
+ enum bpf_attach_type *attach_type)
8797
+{
8798
+ char *type_names;
21508799 int i;
21518800
21528801 if (!name)
21538802 return -EINVAL;
21548803
2155
- for (i = 0; i < ARRAY_SIZE(section_names); i++) {
2156
- if (strncmp(name, section_names[i].sec, section_names[i].len))
8804
+ for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
8805
+ if (strncmp(name, section_defs[i].sec, section_defs[i].len))
21578806 continue;
2158
- *prog_type = section_names[i].prog_type;
2159
- *expected_attach_type = section_names[i].expected_attach_type;
8807
+ if (!section_defs[i].is_attachable)
8808
+ return -EINVAL;
8809
+ *attach_type = section_defs[i].expected_attach_type;
21608810 return 0;
21618811 }
8812
+ pr_debug("failed to guess attach type based on ELF section name '%s'\n", name);
8813
+ type_names = libbpf_get_type_names(true);
8814
+ if (type_names != NULL) {
8815
+ pr_debug("attachable section(type) names are:%s\n", type_names);
8816
+ free(type_names);
8817
+ }
8818
+
21628819 return -EINVAL;
21638820 }
21648821
2165
-static int
2166
-bpf_program__identify_section(struct bpf_program *prog,
2167
- enum bpf_prog_type *prog_type,
2168
- enum bpf_attach_type *expected_attach_type)
2169
-{
2170
- return libbpf_prog_type_by_name(prog->section_name, prog_type,
2171
- expected_attach_type);
2172
-}
2173
-
2174
-int bpf_map__fd(struct bpf_map *map)
8822
+int bpf_map__fd(const struct bpf_map *map)
21758823 {
21768824 return map ? map->fd : -EINVAL;
21778825 }
21788826
2179
-const struct bpf_map_def *bpf_map__def(struct bpf_map *map)
8827
+const struct bpf_map_def *bpf_map__def(const struct bpf_map *map)
21808828 {
21818829 return map ? &map->def : ERR_PTR(-EINVAL);
21828830 }
21838831
2184
-const char *bpf_map__name(struct bpf_map *map)
8832
+const char *bpf_map__name(const struct bpf_map *map)
21858833 {
21868834 return map ? map->name : NULL;
8835
+}
8836
+
8837
+enum bpf_map_type bpf_map__type(const struct bpf_map *map)
8838
+{
8839
+ return map->def.type;
8840
+}
8841
+
8842
+int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type)
8843
+{
8844
+ if (map->fd >= 0)
8845
+ return -EBUSY;
8846
+ map->def.type = type;
8847
+ return 0;
8848
+}
8849
+
8850
+__u32 bpf_map__map_flags(const struct bpf_map *map)
8851
+{
8852
+ return map->def.map_flags;
8853
+}
8854
+
8855
+int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags)
8856
+{
8857
+ if (map->fd >= 0)
8858
+ return -EBUSY;
8859
+ map->def.map_flags = flags;
8860
+ return 0;
8861
+}
8862
+
8863
+__u32 bpf_map__numa_node(const struct bpf_map *map)
8864
+{
8865
+ return map->numa_node;
8866
+}
8867
+
8868
+int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node)
8869
+{
8870
+ if (map->fd >= 0)
8871
+ return -EBUSY;
8872
+ map->numa_node = numa_node;
8873
+ return 0;
8874
+}
8875
+
8876
+__u32 bpf_map__key_size(const struct bpf_map *map)
8877
+{
8878
+ return map->def.key_size;
8879
+}
8880
+
8881
+int bpf_map__set_key_size(struct bpf_map *map, __u32 size)
8882
+{
8883
+ if (map->fd >= 0)
8884
+ return -EBUSY;
8885
+ map->def.key_size = size;
8886
+ return 0;
8887
+}
8888
+
8889
+__u32 bpf_map__value_size(const struct bpf_map *map)
8890
+{
8891
+ return map->def.value_size;
8892
+}
8893
+
8894
+int bpf_map__set_value_size(struct bpf_map *map, __u32 size)
8895
+{
8896
+ if (map->fd >= 0)
8897
+ return -EBUSY;
8898
+ map->def.value_size = size;
8899
+ return 0;
21878900 }
21888901
21898902 __u32 bpf_map__btf_key_type_id(const struct bpf_map *map)
....@@ -2212,25 +8925,63 @@
22128925 return 0;
22138926 }
22148927
2215
-void *bpf_map__priv(struct bpf_map *map)
8928
+void *bpf_map__priv(const struct bpf_map *map)
22168929 {
22178930 return map ? map->priv : ERR_PTR(-EINVAL);
22188931 }
22198932
2220
-bool bpf_map__is_offload_neutral(struct bpf_map *map)
8933
+int bpf_map__set_initial_value(struct bpf_map *map,
8934
+ const void *data, size_t size)
8935
+{
8936
+ if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG ||
8937
+ size != map->def.value_size || map->fd >= 0)
8938
+ return -EINVAL;
8939
+
8940
+ memcpy(map->mmaped, data, size);
8941
+ return 0;
8942
+}
8943
+
8944
+bool bpf_map__is_offload_neutral(const struct bpf_map *map)
22218945 {
22228946 return map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY;
22238947 }
22248948
2225
-void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
8949
+bool bpf_map__is_internal(const struct bpf_map *map)
22268950 {
2227
- map->map_ifindex = ifindex;
8951
+ return map->libbpf_type != LIBBPF_MAP_UNSPEC;
22288952 }
22298953
2230
-struct bpf_map *
2231
-bpf_map__next(struct bpf_map *prev, struct bpf_object *obj)
8954
+__u32 bpf_map__ifindex(const struct bpf_map *map)
22328955 {
2233
- size_t idx;
8956
+ return map->map_ifindex;
8957
+}
8958
+
8959
+int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
8960
+{
8961
+ if (map->fd >= 0)
8962
+ return -EBUSY;
8963
+ map->map_ifindex = ifindex;
8964
+ return 0;
8965
+}
8966
+
8967
+int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
8968
+{
8969
+ if (!bpf_map_type__is_map_in_map(map->def.type)) {
8970
+ pr_warn("error: unsupported map type\n");
8971
+ return -EINVAL;
8972
+ }
8973
+ if (map->inner_map_fd != -1) {
8974
+ pr_warn("error: inner_map_fd already specified\n");
8975
+ return -EINVAL;
8976
+ }
8977
+ map->inner_map_fd = fd;
8978
+ return 0;
8979
+}
8980
+
8981
+static struct bpf_map *
8982
+__bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
8983
+{
8984
+ ssize_t idx;
22348985 struct bpf_map *s, *e;
22358986
22368987 if (!obj || !obj->maps)
....@@ -2239,50 +8990,66 @@
22398990 s = obj->maps;
22408991 e = obj->maps + obj->nr_maps;
22418992
2242
- if (prev == NULL)
2243
- return s;
2244
-
2245
- if ((prev < s) || (prev >= e)) {
2246
- pr_warning("error in %s: map handler doesn't belong to object\n",
2247
- __func__);
8993
+ if ((m < s) || (m >= e)) {
8994
+ pr_warn("error in %s: map handler doesn't belong to object\n",
8995
+ __func__);
22488996 return NULL;
22498997 }
22508998
2251
- idx = (prev - obj->maps) + 1;
2252
- if (idx >= obj->nr_maps)
8999
+ idx = (m - obj->maps) + i;
9000
+ if (idx >= obj->nr_maps || idx < 0)
22539001 return NULL;
22549002 return &obj->maps[idx];
22559003 }
22569004
22579005 struct bpf_map *
2258
-bpf_object__find_map_by_name(struct bpf_object *obj, const char *name)
9006
+bpf_map__next(const struct bpf_map *prev, const struct bpf_object *obj)
9007
+{
9008
+ if (prev == NULL)
9009
+ return obj->maps;
9010
+
9011
+ return __bpf_map__iter(prev, obj, 1);
9012
+}
9013
+
9014
+struct bpf_map *
9015
+bpf_map__prev(const struct bpf_map *next, const struct bpf_object *obj)
9016
+{
9017
+ if (next == NULL) {
9018
+ if (!obj->nr_maps)
9019
+ return NULL;
9020
+ return obj->maps + obj->nr_maps - 1;
9021
+ }
9022
+
9023
+ return __bpf_map__iter(next, obj, -1);
9024
+}
9025
+
9026
+struct bpf_map *
9027
+bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name)
22599028 {
22609029 struct bpf_map *pos;
22619030
2262
- bpf_map__for_each(pos, obj) {
9031
+ bpf_object__for_each_map(pos, obj) {
22639032 if (pos->name && !strcmp(pos->name, name))
22649033 return pos;
22659034 }
22669035 return NULL;
22679036 }
22689037
9038
+int
9039
+bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name)
9040
+{
9041
+ return bpf_map__fd(bpf_object__find_map_by_name(obj, name));
9042
+}
9043
+
22699044 struct bpf_map *
22709045 bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset)
22719046 {
2272
- int i;
2273
-
2274
- for (i = 0; i < obj->nr_maps; i++) {
2275
- if (obj->maps[i].offset == offset)
2276
- return &obj->maps[i];
2277
- }
2278
- return ERR_PTR(-ENOENT);
9047
+ return ERR_PTR(-ENOTSUP);
22799048 }
22809049
22819050 long libbpf_get_error(const void *ptr)
22829051 {
2283
- if (IS_ERR(ptr))
2284
- return PTR_ERR(ptr);
2285
- return 0;
9052
+ return PTR_ERR_OR_ZERO(ptr);
22869053 }
22879054
22889055 int bpf_prog_load(const char *file, enum bpf_prog_type type,
....@@ -2303,8 +9070,6 @@
23039070 {
23049071 struct bpf_object_open_attr open_attr = {};
23059072 struct bpf_program *prog, *first_prog = NULL;
2306
- enum bpf_attach_type expected_attach_type;
2307
- enum bpf_prog_type prog_type;
23089073 struct bpf_object *obj;
23099074 struct bpf_map *map;
23109075 int err;
....@@ -2322,39 +9087,40 @@
23229087 return -ENOENT;
23239088
23249089 bpf_object__for_each_program(prog, obj) {
9090
+ enum bpf_attach_type attach_type = attr->expected_attach_type;
23259091 /*
2326
- * If type is not specified, try to guess it based on
2327
- * section name.
9092
+ * to preserve backwards compatibility, bpf_prog_load treats
9093
+ * attr->prog_type, if specified, as an override to whatever
9094
+ * bpf_object__open guessed
23289095 */
2329
- prog_type = attr->prog_type;
2330
- prog->prog_ifindex = attr->ifindex;
2331
- expected_attach_type = attr->expected_attach_type;
2332
- if (prog_type == BPF_PROG_TYPE_UNSPEC) {
2333
- err = bpf_program__identify_section(prog, &prog_type,
2334
- &expected_attach_type);
2335
- if (err < 0) {
2336
- pr_warning("failed to guess program type based on section name %s\n",
2337
- prog->section_name);
2338
- bpf_object__close(obj);
2339
- return -EINVAL;
2340
- }
9096
+ if (attr->prog_type != BPF_PROG_TYPE_UNSPEC) {
9097
+ bpf_program__set_type(prog, attr->prog_type);
9098
+ bpf_program__set_expected_attach_type(prog,
9099
+ attach_type);
9100
+ }
9101
+ if (bpf_program__get_type(prog) == BPF_PROG_TYPE_UNSPEC) {
9102
+ /*
9103
+ * we haven't guessed from section name and user
9104
+ * didn't provide a fallback type, too bad...
9105
+ */
9106
+ bpf_object__close(obj);
9107
+ return -EINVAL;
23419108 }
23429109
2343
- bpf_program__set_type(prog, prog_type);
2344
- bpf_program__set_expected_attach_type(prog,
2345
- expected_attach_type);
2346
-
2347
- if (!bpf_program__is_function_storage(prog, obj) && !first_prog)
9110
+ prog->prog_ifindex = attr->ifindex;
9111
+ prog->log_level = attr->log_level;
9112
+ prog->prog_flags |= attr->prog_flags;
9113
+ if (!first_prog)
23489114 first_prog = prog;
23499115 }
23509116
2351
- bpf_map__for_each(map, obj) {
9117
+ bpf_object__for_each_map(map, obj) {
23529118 if (!bpf_map__is_offload_neutral(map))
23539119 map->map_ifindex = attr->ifindex;
23549120 }
23559121
23569122 if (!first_prog) {
2357
- pr_warning("object file doesn't contain bpf program\n");
9123
+ pr_warn("object file doesn't contain bpf program\n");
23589124 bpf_object__close(obj);
23599125 return -ENOENT;
23609126 }
....@@ -2362,7 +9128,7 @@
23629128 err = bpf_object__load(obj);
23639129 if (err) {
23649130 bpf_object__close(obj);
2365
- return -EINVAL;
9131
+ return err;
23669132 }
23679133
23689134 *pobj = obj;
....@@ -2370,62 +9136,1817 @@
23709136 return 0;
23719137 }
23729138
2373
-enum bpf_perf_event_ret
2374
-bpf_perf_event_read_simple(void *mem, unsigned long size,
2375
- unsigned long page_size, void **buf, size_t *buf_len,
2376
- bpf_perf_event_print_t fn, void *priv)
9139
+struct bpf_link {
9140
+ int (*detach)(struct bpf_link *link);
9141
+ int (*destroy)(struct bpf_link *link);
9142
+ char *pin_path; /* NULL, if not pinned */
9143
+ int fd; /* hook FD, -1 if not applicable */
9144
+ bool disconnected;
9145
+};
9146
+
9147
+/* Replace link's underlying BPF program with the new one */
9148
+int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog)
23779149 {
2378
- volatile struct perf_event_mmap_page *header = mem;
9150
+ return bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL);
9151
+}
9152
+
9153
+/* Release "ownership" of underlying BPF resource (typically, BPF program
9154
+ * attached to some BPF hook, e.g., tracepoint, kprobe, etc). Disconnected
9155
+ * link, when destructed through bpf_link__destroy() call won't attempt to
9156
+ * detach/unregisted that BPF resource. This is useful in situations where,
9157
+ * say, attached BPF program has to outlive userspace program that attached it
9158
+ * in the system. Depending on type of BPF program, though, there might be
9159
+ * additional steps (like pinning BPF program in BPF FS) necessary to ensure
9160
+ * exit of userspace program doesn't trigger automatic detachment and clean up
9161
+ * inside the kernel.
9162
+ */
9163
+void bpf_link__disconnect(struct bpf_link *link)
9164
+{
9165
+ link->disconnected = true;
9166
+}
9167
+
9168
+int bpf_link__destroy(struct bpf_link *link)
9169
+{
9170
+ int err = 0;
9171
+
9172
+ if (IS_ERR_OR_NULL(link))
9173
+ return 0;
9174
+
9175
+ if (!link->disconnected && link->detach)
9176
+ err = link->detach(link);
9177
+ if (link->destroy)
9178
+ link->destroy(link);
9179
+ if (link->pin_path)
9180
+ free(link->pin_path);
9181
+ free(link);
9182
+
9183
+ return err;
9184
+}
9185
+
9186
+int bpf_link__fd(const struct bpf_link *link)
9187
+{
9188
+ return link->fd;
9189
+}
9190
+
9191
+const char *bpf_link__pin_path(const struct bpf_link *link)
9192
+{
9193
+ return link->pin_path;
9194
+}
9195
+
9196
+static int bpf_link__detach_fd(struct bpf_link *link)
9197
+{
9198
+ return close(link->fd);
9199
+}
9200
+
9201
+struct bpf_link *bpf_link__open(const char *path)
9202
+{
9203
+ struct bpf_link *link;
9204
+ int fd;
9205
+
9206
+ fd = bpf_obj_get(path);
9207
+ if (fd < 0) {
9208
+ fd = -errno;
9209
+ pr_warn("failed to open link at %s: %d\n", path, fd);
9210
+ return ERR_PTR(fd);
9211
+ }
9212
+
9213
+ link = calloc(1, sizeof(*link));
9214
+ if (!link) {
9215
+ close(fd);
9216
+ return ERR_PTR(-ENOMEM);
9217
+ }
9218
+ link->detach = &bpf_link__detach_fd;
9219
+ link->fd = fd;
9220
+
9221
+ link->pin_path = strdup(path);
9222
+ if (!link->pin_path) {
9223
+ bpf_link__destroy(link);
9224
+ return ERR_PTR(-ENOMEM);
9225
+ }
9226
+
9227
+ return link;
9228
+}
9229
+
9230
+int bpf_link__detach(struct bpf_link *link)
9231
+{
9232
+ return bpf_link_detach(link->fd) ? -errno : 0;
9233
+}
9234
+
9235
+int bpf_link__pin(struct bpf_link *link, const char *path)
9236
+{
9237
+ int err;
9238
+
9239
+ if (link->pin_path)
9240
+ return -EBUSY;
9241
+ err = make_parent_dir(path);
9242
+ if (err)
9243
+ return err;
9244
+ err = check_path(path);
9245
+ if (err)
9246
+ return err;
9247
+
9248
+ link->pin_path = strdup(path);
9249
+ if (!link->pin_path)
9250
+ return -ENOMEM;
9251
+
9252
+ if (bpf_obj_pin(link->fd, link->pin_path)) {
9253
+ err = -errno;
9254
+ zfree(&link->pin_path);
9255
+ return err;
9256
+ }
9257
+
9258
+ pr_debug("link fd=%d: pinned at %s\n", link->fd, link->pin_path);
9259
+ return 0;
9260
+}
9261
+
9262
+int bpf_link__unpin(struct bpf_link *link)
9263
+{
9264
+ int err;
9265
+
9266
+ if (!link->pin_path)
9267
+ return -EINVAL;
9268
+
9269
+ err = unlink(link->pin_path);
9270
+ if (err != 0)
9271
+ return -errno;
9272
+
9273
+ pr_debug("link fd=%d: unpinned from %s\n", link->fd, link->pin_path);
9274
+ zfree(&link->pin_path);
9275
+ return 0;
9276
+}
9277
+
9278
+static int bpf_link__detach_perf_event(struct bpf_link *link)
9279
+{
9280
+ int err;
9281
+
9282
+ err = ioctl(link->fd, PERF_EVENT_IOC_DISABLE, 0);
9283
+ if (err)
9284
+ err = -errno;
9285
+
9286
+ close(link->fd);
9287
+ return err;
9288
+}
9289
+
9290
+struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
9291
+ int pfd)
9292
+{
9293
+ char errmsg[STRERR_BUFSIZE];
9294
+ struct bpf_link *link;
9295
+ int prog_fd, err;
9296
+
9297
+ if (pfd < 0) {
9298
+ pr_warn("prog '%s': invalid perf event FD %d\n",
9299
+ prog->name, pfd);
9300
+ return ERR_PTR(-EINVAL);
9301
+ }
9302
+ prog_fd = bpf_program__fd(prog);
9303
+ if (prog_fd < 0) {
9304
+ pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n",
9305
+ prog->name);
9306
+ return ERR_PTR(-EINVAL);
9307
+ }
9308
+
9309
+ link = calloc(1, sizeof(*link));
9310
+ if (!link)
9311
+ return ERR_PTR(-ENOMEM);
9312
+ link->detach = &bpf_link__detach_perf_event;
9313
+ link->fd = pfd;
9314
+
9315
+ if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
9316
+ err = -errno;
9317
+ free(link);
9318
+ pr_warn("prog '%s': failed to attach to pfd %d: %s\n",
9319
+ prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
9320
+ if (err == -EPROTO)
9321
+ pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n",
9322
+ prog->name, pfd);
9323
+ return ERR_PTR(err);
9324
+ }
9325
+ if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
9326
+ err = -errno;
9327
+ free(link);
9328
+ pr_warn("prog '%s': failed to enable pfd %d: %s\n",
9329
+ prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
9330
+ return ERR_PTR(err);
9331
+ }
9332
+ return link;
9333
+}
9334
+
9335
+/*
9336
+ * this function is expected to parse integer in the range of [0, 2^31-1] from
9337
+ * given file using scanf format string fmt. If actual parsed value is
9338
+ * negative, the result might be indistinguishable from error
9339
+ */
9340
+static int parse_uint_from_file(const char *file, const char *fmt)
9341
+{
9342
+ char buf[STRERR_BUFSIZE];
9343
+ int err, ret;
9344
+ FILE *f;
9345
+
9346
+ f = fopen(file, "r");
9347
+ if (!f) {
9348
+ err = -errno;
9349
+ pr_debug("failed to open '%s': %s\n", file,
9350
+ libbpf_strerror_r(err, buf, sizeof(buf)));
9351
+ return err;
9352
+ }
9353
+ err = fscanf(f, fmt, &ret);
9354
+ if (err != 1) {
9355
+ err = err == EOF ? -EIO : -errno;
9356
+ pr_debug("failed to parse '%s': %s\n", file,
9357
+ libbpf_strerror_r(err, buf, sizeof(buf)));
9358
+ fclose(f);
9359
+ return err;
9360
+ }
9361
+ fclose(f);
9362
+ return ret;
9363
+}
9364
+
9365
+static int determine_kprobe_perf_type(void)
9366
+{
9367
+ const char *file = "/sys/bus/event_source/devices/kprobe/type";
9368
+
9369
+ return parse_uint_from_file(file, "%d\n");
9370
+}
9371
+
9372
+static int determine_uprobe_perf_type(void)
9373
+{
9374
+ const char *file = "/sys/bus/event_source/devices/uprobe/type";
9375
+
9376
+ return parse_uint_from_file(file, "%d\n");
9377
+}
9378
+
9379
+static int determine_kprobe_retprobe_bit(void)
9380
+{
9381
+ const char *file = "/sys/bus/event_source/devices/kprobe/format/retprobe";
9382
+
9383
+ return parse_uint_from_file(file, "config:%d\n");
9384
+}
9385
+
9386
+static int determine_uprobe_retprobe_bit(void)
9387
+{
9388
+ const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe";
9389
+
9390
+ return parse_uint_from_file(file, "config:%d\n");
9391
+}
9392
+
9393
+static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
9394
+ uint64_t offset, int pid)
9395
+{
9396
+ struct perf_event_attr attr = {};
9397
+ char errmsg[STRERR_BUFSIZE];
9398
+ int type, pfd, err;
9399
+
9400
+ type = uprobe ? determine_uprobe_perf_type()
9401
+ : determine_kprobe_perf_type();
9402
+ if (type < 0) {
9403
+ pr_warn("failed to determine %s perf type: %s\n",
9404
+ uprobe ? "uprobe" : "kprobe",
9405
+ libbpf_strerror_r(type, errmsg, sizeof(errmsg)));
9406
+ return type;
9407
+ }
9408
+ if (retprobe) {
9409
+ int bit = uprobe ? determine_uprobe_retprobe_bit()
9410
+ : determine_kprobe_retprobe_bit();
9411
+
9412
+ if (bit < 0) {
9413
+ pr_warn("failed to determine %s retprobe bit: %s\n",
9414
+ uprobe ? "uprobe" : "kprobe",
9415
+ libbpf_strerror_r(bit, errmsg, sizeof(errmsg)));
9416
+ return bit;
9417
+ }
9418
+ attr.config |= 1 << bit;
9419
+ }
9420
+ attr.size = sizeof(attr);
9421
+ attr.type = type;
9422
+ attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */
9423
+ attr.config2 = offset; /* kprobe_addr or probe_offset */
9424
+
9425
+ /* pid filter is meaningful only for uprobes */
9426
+ pfd = syscall(__NR_perf_event_open, &attr,
9427
+ pid < 0 ? -1 : pid /* pid */,
9428
+ pid == -1 ? 0 : -1 /* cpu */,
9429
+ -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
9430
+ if (pfd < 0) {
9431
+ err = -errno;
9432
+ pr_warn("%s perf_event_open() failed: %s\n",
9433
+ uprobe ? "uprobe" : "kprobe",
9434
+ libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
9435
+ return err;
9436
+ }
9437
+ return pfd;
9438
+}
9439
+
9440
+struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
9441
+ bool retprobe,
9442
+ const char *func_name)
9443
+{
9444
+ char errmsg[STRERR_BUFSIZE];
9445
+ struct bpf_link *link;
9446
+ int pfd, err;
9447
+
9448
+ pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name,
9449
+ 0 /* offset */, -1 /* pid */);
9450
+ if (pfd < 0) {
9451
+ pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n",
9452
+ prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
9453
+ libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
9454
+ return ERR_PTR(pfd);
9455
+ }
9456
+ link = bpf_program__attach_perf_event(prog, pfd);
9457
+ if (IS_ERR(link)) {
9458
+ close(pfd);
9459
+ err = PTR_ERR(link);
9460
+ pr_warn("prog '%s': failed to attach to %s '%s': %s\n",
9461
+ prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
9462
+ libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
9463
+ return link;
9464
+ }
9465
+ return link;
9466
+}
9467
+
9468
+static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec,
9469
+ struct bpf_program *prog)
9470
+{
9471
+ const char *func_name;
9472
+ bool retprobe;
9473
+
9474
+ func_name = prog->sec_name + sec->len;
9475
+ retprobe = strcmp(sec->sec, "kretprobe/") == 0;
9476
+
9477
+ return bpf_program__attach_kprobe(prog, retprobe, func_name);
9478
+}
9479
+
9480
+struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,
9481
+ bool retprobe, pid_t pid,
9482
+ const char *binary_path,
9483
+ size_t func_offset)
9484
+{
9485
+ char errmsg[STRERR_BUFSIZE];
9486
+ struct bpf_link *link;
9487
+ int pfd, err;
9488
+
9489
+ pfd = perf_event_open_probe(true /* uprobe */, retprobe,
9490
+ binary_path, func_offset, pid);
9491
+ if (pfd < 0) {
9492
+ pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n",
9493
+ prog->name, retprobe ? "uretprobe" : "uprobe",
9494
+ binary_path, func_offset,
9495
+ libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
9496
+ return ERR_PTR(pfd);
9497
+ }
9498
+ link = bpf_program__attach_perf_event(prog, pfd);
9499
+ if (IS_ERR(link)) {
9500
+ close(pfd);
9501
+ err = PTR_ERR(link);
9502
+ pr_warn("prog '%s': failed to attach to %s '%s:0x%zx': %s\n",
9503
+ prog->name, retprobe ? "uretprobe" : "uprobe",
9504
+ binary_path, func_offset,
9505
+ libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
9506
+ return link;
9507
+ }
9508
+ return link;
9509
+}
9510
+
9511
+static int determine_tracepoint_id(const char *tp_category,
9512
+ const char *tp_name)
9513
+{
9514
+ char file[PATH_MAX];
9515
+ int ret;
9516
+
9517
+ ret = snprintf(file, sizeof(file),
9518
+ "/sys/kernel/debug/tracing/events/%s/%s/id",
9519
+ tp_category, tp_name);
9520
+ if (ret < 0)
9521
+ return -errno;
9522
+ if (ret >= sizeof(file)) {
9523
+ pr_debug("tracepoint %s/%s path is too long\n",
9524
+ tp_category, tp_name);
9525
+ return -E2BIG;
9526
+ }
9527
+ return parse_uint_from_file(file, "%d\n");
9528
+}
9529
+
9530
+static int perf_event_open_tracepoint(const char *tp_category,
9531
+ const char *tp_name)
9532
+{
9533
+ struct perf_event_attr attr = {};
9534
+ char errmsg[STRERR_BUFSIZE];
9535
+ int tp_id, pfd, err;
9536
+
9537
+ tp_id = determine_tracepoint_id(tp_category, tp_name);
9538
+ if (tp_id < 0) {
9539
+ pr_warn("failed to determine tracepoint '%s/%s' perf event ID: %s\n",
9540
+ tp_category, tp_name,
9541
+ libbpf_strerror_r(tp_id, errmsg, sizeof(errmsg)));
9542
+ return tp_id;
9543
+ }
9544
+
9545
+ attr.type = PERF_TYPE_TRACEPOINT;
9546
+ attr.size = sizeof(attr);
9547
+ attr.config = tp_id;
9548
+
9549
+ pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu */,
9550
+ -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
9551
+ if (pfd < 0) {
9552
+ err = -errno;
9553
+ pr_warn("tracepoint '%s/%s' perf_event_open() failed: %s\n",
9554
+ tp_category, tp_name,
9555
+ libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
9556
+ return err;
9557
+ }
9558
+ return pfd;
9559
+}
9560
+
9561
+struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,
9562
+ const char *tp_category,
9563
+ const char *tp_name)
9564
+{
9565
+ char errmsg[STRERR_BUFSIZE];
9566
+ struct bpf_link *link;
9567
+ int pfd, err;
9568
+
9569
+ pfd = perf_event_open_tracepoint(tp_category, tp_name);
9570
+ if (pfd < 0) {
9571
+ pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n",
9572
+ prog->name, tp_category, tp_name,
9573
+ libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
9574
+ return ERR_PTR(pfd);
9575
+ }
9576
+ link = bpf_program__attach_perf_event(prog, pfd);
9577
+ if (IS_ERR(link)) {
9578
+ close(pfd);
9579
+ err = PTR_ERR(link);
9580
+ pr_warn("prog '%s': failed to attach to tracepoint '%s/%s': %s\n",
9581
+ prog->name, tp_category, tp_name,
9582
+ libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
9583
+ return link;
9584
+ }
9585
+ return link;
9586
+}
9587
+
9588
+static struct bpf_link *attach_tp(const struct bpf_sec_def *sec,
9589
+ struct bpf_program *prog)
9590
+{
9591
+ char *sec_name, *tp_cat, *tp_name;
9592
+ struct bpf_link *link;
9593
+
9594
+ sec_name = strdup(prog->sec_name);
9595
+ if (!sec_name)
9596
+ return ERR_PTR(-ENOMEM);
9597
+
9598
+ /* extract "tp/<category>/<name>" */
9599
+ tp_cat = sec_name + sec->len;
9600
+ tp_name = strchr(tp_cat, '/');
9601
+ if (!tp_name) {
9602
+ link = ERR_PTR(-EINVAL);
9603
+ goto out;
9604
+ }
9605
+ *tp_name = '\0';
9606
+ tp_name++;
9607
+
9608
+ link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name);
9609
+out:
9610
+ free(sec_name);
9611
+ return link;
9612
+}
9613
+
9614
+struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
9615
+ const char *tp_name)
9616
+{
9617
+ char errmsg[STRERR_BUFSIZE];
9618
+ struct bpf_link *link;
9619
+ int prog_fd, pfd;
9620
+
9621
+ prog_fd = bpf_program__fd(prog);
9622
+ if (prog_fd < 0) {
9623
+ pr_warn("prog '%s': can't attach before loaded\n", prog->name);
9624
+ return ERR_PTR(-EINVAL);
9625
+ }
9626
+
9627
+ link = calloc(1, sizeof(*link));
9628
+ if (!link)
9629
+ return ERR_PTR(-ENOMEM);
9630
+ link->detach = &bpf_link__detach_fd;
9631
+
9632
+ pfd = bpf_raw_tracepoint_open(tp_name, prog_fd);
9633
+ if (pfd < 0) {
9634
+ pfd = -errno;
9635
+ free(link);
9636
+ pr_warn("prog '%s': failed to attach to raw tracepoint '%s': %s\n",
9637
+ prog->name, tp_name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
9638
+ return ERR_PTR(pfd);
9639
+ }
9640
+ link->fd = pfd;
9641
+ return link;
9642
+}
9643
+
9644
+static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec,
9645
+ struct bpf_program *prog)
9646
+{
9647
+ const char *tp_name = prog->sec_name + sec->len;
9648
+
9649
+ return bpf_program__attach_raw_tracepoint(prog, tp_name);
9650
+}
9651
+
9652
+/* Common logic for all BPF program types that attach to a btf_id */
9653
+static struct bpf_link *bpf_program__attach_btf_id(struct bpf_program *prog)
9654
+{
9655
+ char errmsg[STRERR_BUFSIZE];
9656
+ struct bpf_link *link;
9657
+ int prog_fd, pfd;
9658
+
9659
+ prog_fd = bpf_program__fd(prog);
9660
+ if (prog_fd < 0) {
9661
+ pr_warn("prog '%s': can't attach before loaded\n", prog->name);
9662
+ return ERR_PTR(-EINVAL);
9663
+ }
9664
+
9665
+ link = calloc(1, sizeof(*link));
9666
+ if (!link)
9667
+ return ERR_PTR(-ENOMEM);
9668
+ link->detach = &bpf_link__detach_fd;
9669
+
9670
+ pfd = bpf_raw_tracepoint_open(NULL, prog_fd);
9671
+ if (pfd < 0) {
9672
+ pfd = -errno;
9673
+ free(link);
9674
+ pr_warn("prog '%s': failed to attach: %s\n",
9675
+ prog->name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
9676
+ return ERR_PTR(pfd);
9677
+ }
9678
+ link->fd = pfd;
9679
+ return (struct bpf_link *)link;
9680
+}
9681
+
9682
+struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog)
9683
+{
9684
+ return bpf_program__attach_btf_id(prog);
9685
+}
9686
+
9687
+struct bpf_link *bpf_program__attach_lsm(struct bpf_program *prog)
9688
+{
9689
+ return bpf_program__attach_btf_id(prog);
9690
+}
9691
+
9692
+static struct bpf_link *attach_trace(const struct bpf_sec_def *sec,
9693
+ struct bpf_program *prog)
9694
+{
9695
+ return bpf_program__attach_trace(prog);
9696
+}
9697
+
9698
+static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec,
9699
+ struct bpf_program *prog)
9700
+{
9701
+ return bpf_program__attach_lsm(prog);
9702
+}
9703
+
9704
+static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,
9705
+ struct bpf_program *prog)
9706
+{
9707
+ return bpf_program__attach_iter(prog, NULL);
9708
+}
9709
+
9710
+static struct bpf_link *
9711
+bpf_program__attach_fd(struct bpf_program *prog, int target_fd, int btf_id,
9712
+ const char *target_name)
9713
+{
9714
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts,
9715
+ .target_btf_id = btf_id);
9716
+ enum bpf_attach_type attach_type;
9717
+ char errmsg[STRERR_BUFSIZE];
9718
+ struct bpf_link *link;
9719
+ int prog_fd, link_fd;
9720
+
9721
+ prog_fd = bpf_program__fd(prog);
9722
+ if (prog_fd < 0) {
9723
+ pr_warn("prog '%s': can't attach before loaded\n", prog->name);
9724
+ return ERR_PTR(-EINVAL);
9725
+ }
9726
+
9727
+ link = calloc(1, sizeof(*link));
9728
+ if (!link)
9729
+ return ERR_PTR(-ENOMEM);
9730
+ link->detach = &bpf_link__detach_fd;
9731
+
9732
+ attach_type = bpf_program__get_expected_attach_type(prog);
9733
+ link_fd = bpf_link_create(prog_fd, target_fd, attach_type, &opts);
9734
+ if (link_fd < 0) {
9735
+ link_fd = -errno;
9736
+ free(link);
9737
+ pr_warn("prog '%s': failed to attach to %s: %s\n",
9738
+ prog->name, target_name,
9739
+ libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
9740
+ return ERR_PTR(link_fd);
9741
+ }
9742
+ link->fd = link_fd;
9743
+ return link;
9744
+}
9745
+
9746
+struct bpf_link *
9747
+bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd)
9748
+{
9749
+ return bpf_program__attach_fd(prog, cgroup_fd, 0, "cgroup");
9750
+}
9751
+
9752
+struct bpf_link *
9753
+bpf_program__attach_netns(struct bpf_program *prog, int netns_fd)
9754
+{
9755
+ return bpf_program__attach_fd(prog, netns_fd, 0, "netns");
9756
+}
9757
+
9758
+struct bpf_link *bpf_program__attach_xdp(struct bpf_program *prog, int ifindex)
9759
+{
9760
+ /* target_fd/target_ifindex use the same field in LINK_CREATE */
9761
+ return bpf_program__attach_fd(prog, ifindex, 0, "xdp");
9762
+}
9763
+
9764
+struct bpf_link *bpf_program__attach_freplace(struct bpf_program *prog,
9765
+ int target_fd,
9766
+ const char *attach_func_name)
9767
+{
9768
+ int btf_id;
9769
+
9770
+ if (!!target_fd != !!attach_func_name) {
9771
+ pr_warn("prog '%s': supply none or both of target_fd and attach_func_name\n",
9772
+ prog->name);
9773
+ return ERR_PTR(-EINVAL);
9774
+ }
9775
+
9776
+ if (prog->type != BPF_PROG_TYPE_EXT) {
9777
+ pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace",
9778
+ prog->name);
9779
+ return ERR_PTR(-EINVAL);
9780
+ }
9781
+
9782
+ if (target_fd) {
9783
+ btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd);
9784
+ if (btf_id < 0)
9785
+ return ERR_PTR(btf_id);
9786
+
9787
+ return bpf_program__attach_fd(prog, target_fd, btf_id, "freplace");
9788
+ } else {
9789
+ /* no target, so use raw_tracepoint_open for compatibility
9790
+ * with old kernels
9791
+ */
9792
+ return bpf_program__attach_trace(prog);
9793
+ }
9794
+}
9795
+
9796
+struct bpf_link *
9797
+bpf_program__attach_iter(struct bpf_program *prog,
9798
+ const struct bpf_iter_attach_opts *opts)
9799
+{
9800
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
9801
+ char errmsg[STRERR_BUFSIZE];
9802
+ struct bpf_link *link;
9803
+ int prog_fd, link_fd;
9804
+ __u32 target_fd = 0;
9805
+
9806
+ if (!OPTS_VALID(opts, bpf_iter_attach_opts))
9807
+ return ERR_PTR(-EINVAL);
9808
+
9809
+ link_create_opts.iter_info = OPTS_GET(opts, link_info, (void *)0);
9810
+ link_create_opts.iter_info_len = OPTS_GET(opts, link_info_len, 0);
9811
+
9812
+ prog_fd = bpf_program__fd(prog);
9813
+ if (prog_fd < 0) {
9814
+ pr_warn("prog '%s': can't attach before loaded\n", prog->name);
9815
+ return ERR_PTR(-EINVAL);
9816
+ }
9817
+
9818
+ link = calloc(1, sizeof(*link));
9819
+ if (!link)
9820
+ return ERR_PTR(-ENOMEM);
9821
+ link->detach = &bpf_link__detach_fd;
9822
+
9823
+ link_fd = bpf_link_create(prog_fd, target_fd, BPF_TRACE_ITER,
9824
+ &link_create_opts);
9825
+ if (link_fd < 0) {
9826
+ link_fd = -errno;
9827
+ free(link);
9828
+ pr_warn("prog '%s': failed to attach to iterator: %s\n",
9829
+ prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
9830
+ return ERR_PTR(link_fd);
9831
+ }
9832
+ link->fd = link_fd;
9833
+ return link;
9834
+}
9835
+
9836
+struct bpf_link *bpf_program__attach(struct bpf_program *prog)
9837
+{
9838
+ const struct bpf_sec_def *sec_def;
9839
+
9840
+ sec_def = find_sec_def(prog->sec_name);
9841
+ if (!sec_def || !sec_def->attach_fn)
9842
+ return ERR_PTR(-ESRCH);
9843
+
9844
+ return sec_def->attach_fn(sec_def, prog);
9845
+}
9846
+
9847
+static int bpf_link__detach_struct_ops(struct bpf_link *link)
9848
+{
9849
+ __u32 zero = 0;
9850
+
9851
+ if (bpf_map_delete_elem(link->fd, &zero))
9852
+ return -errno;
9853
+
9854
+ return 0;
9855
+}
9856
+
9857
+struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map)
9858
+{
9859
+ struct bpf_struct_ops *st_ops;
9860
+ struct bpf_link *link;
9861
+ __u32 i, zero = 0;
9862
+ int err;
9863
+
9864
+ if (!bpf_map__is_struct_ops(map) || map->fd == -1)
9865
+ return ERR_PTR(-EINVAL);
9866
+
9867
+ link = calloc(1, sizeof(*link));
9868
+ if (!link)
9869
+ return ERR_PTR(-EINVAL);
9870
+
9871
+ st_ops = map->st_ops;
9872
+ for (i = 0; i < btf_vlen(st_ops->type); i++) {
9873
+ struct bpf_program *prog = st_ops->progs[i];
9874
+ void *kern_data;
9875
+ int prog_fd;
9876
+
9877
+ if (!prog)
9878
+ continue;
9879
+
9880
+ prog_fd = bpf_program__fd(prog);
9881
+ kern_data = st_ops->kern_vdata + st_ops->kern_func_off[i];
9882
+ *(unsigned long *)kern_data = prog_fd;
9883
+ }
9884
+
9885
+ err = bpf_map_update_elem(map->fd, &zero, st_ops->kern_vdata, 0);
9886
+ if (err) {
9887
+ err = -errno;
9888
+ free(link);
9889
+ return ERR_PTR(err);
9890
+ }
9891
+
9892
+ link->detach = bpf_link__detach_struct_ops;
9893
+ link->fd = map->fd;
9894
+
9895
+ return link;
9896
+}
9897
+
9898
+enum bpf_perf_event_ret
9899
+bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
9900
+ void **copy_mem, size_t *copy_size,
9901
+ bpf_perf_event_print_t fn, void *private_data)
9902
+{
9903
+ struct perf_event_mmap_page *header = mmap_mem;
9904
+ __u64 data_head = ring_buffer_read_head(header);
23799905 __u64 data_tail = header->data_tail;
2380
- __u64 data_head = header->data_head;
2381
- int ret = LIBBPF_PERF_EVENT_ERROR;
2382
- void *base, *begin, *end;
9906
+ void *base = ((__u8 *)header) + page_size;
9907
+ int ret = LIBBPF_PERF_EVENT_CONT;
9908
+ struct perf_event_header *ehdr;
9909
+ size_t ehdr_size;
23839910
2384
- asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */
2385
- if (data_head == data_tail)
2386
- return LIBBPF_PERF_EVENT_CONT;
9911
+ while (data_head != data_tail) {
9912
+ ehdr = base + (data_tail & (mmap_size - 1));
9913
+ ehdr_size = ehdr->size;
23879914
2388
- base = ((char *)header) + page_size;
9915
+ if (((void *)ehdr) + ehdr_size > base + mmap_size) {
9916
+ void *copy_start = ehdr;
9917
+ size_t len_first = base + mmap_size - copy_start;
9918
+ size_t len_secnd = ehdr_size - len_first;
23899919
2390
- begin = base + data_tail % size;
2391
- end = base + data_head % size;
2392
-
2393
- while (begin != end) {
2394
- struct perf_event_header *ehdr;
2395
-
2396
- ehdr = begin;
2397
- if (begin + ehdr->size > base + size) {
2398
- long len = base + size - begin;
2399
-
2400
- if (*buf_len < ehdr->size) {
2401
- free(*buf);
2402
- *buf = malloc(ehdr->size);
2403
- if (!*buf) {
9920
+ if (*copy_size < ehdr_size) {
9921
+ free(*copy_mem);
9922
+ *copy_mem = malloc(ehdr_size);
9923
+ if (!*copy_mem) {
9924
+ *copy_size = 0;
24049925 ret = LIBBPF_PERF_EVENT_ERROR;
24059926 break;
24069927 }
2407
- *buf_len = ehdr->size;
9928
+ *copy_size = ehdr_size;
24089929 }
24099930
2410
- memcpy(*buf, begin, len);
2411
- memcpy(*buf + len, base, ehdr->size - len);
2412
- ehdr = (void *)*buf;
2413
- begin = base + ehdr->size - len;
2414
- } else if (begin + ehdr->size == base + size) {
2415
- begin = base;
2416
- } else {
2417
- begin += ehdr->size;
9931
+ memcpy(*copy_mem, copy_start, len_first);
9932
+ memcpy(*copy_mem + len_first, base, len_secnd);
9933
+ ehdr = *copy_mem;
24189934 }
24199935
2420
- ret = fn(ehdr, priv);
9936
+ ret = fn(ehdr, private_data);
9937
+ data_tail += ehdr_size;
24219938 if (ret != LIBBPF_PERF_EVENT_CONT)
24229939 break;
2423
-
2424
- data_tail += ehdr->size;
24259940 }
24269941
2427
- __sync_synchronize(); /* smp_mb() */
2428
- header->data_tail = data_tail;
2429
-
9942
+ ring_buffer_write_tail(header, data_tail);
24309943 return ret;
24319944 }
9945
+
9946
+struct perf_buffer;
9947
+
9948
+struct perf_buffer_params {
9949
+ struct perf_event_attr *attr;
9950
+ /* if event_cb is specified, it takes precendence */
9951
+ perf_buffer_event_fn event_cb;
9952
+ /* sample_cb and lost_cb are higher-level common-case callbacks */
9953
+ perf_buffer_sample_fn sample_cb;
9954
+ perf_buffer_lost_fn lost_cb;
9955
+ void *ctx;
9956
+ int cpu_cnt;
9957
+ int *cpus;
9958
+ int *map_keys;
9959
+};
9960
+
9961
+struct perf_cpu_buf {
9962
+ struct perf_buffer *pb;
9963
+ void *base; /* mmap()'ed memory */
9964
+ void *buf; /* for reconstructing segmented data */
9965
+ size_t buf_size;
9966
+ int fd;
9967
+ int cpu;
9968
+ int map_key;
9969
+};
9970
+
9971
+struct perf_buffer {
9972
+ perf_buffer_event_fn event_cb;
9973
+ perf_buffer_sample_fn sample_cb;
9974
+ perf_buffer_lost_fn lost_cb;
9975
+ void *ctx; /* passed into callbacks */
9976
+
9977
+ size_t page_size;
9978
+ size_t mmap_size;
9979
+ struct perf_cpu_buf **cpu_bufs;
9980
+ struct epoll_event *events;
9981
+ int cpu_cnt; /* number of allocated CPU buffers */
9982
+ int epoll_fd; /* perf event FD */
9983
+ int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */
9984
+};
9985
+
9986
+static void perf_buffer__free_cpu_buf(struct perf_buffer *pb,
9987
+ struct perf_cpu_buf *cpu_buf)
9988
+{
9989
+ if (!cpu_buf)
9990
+ return;
9991
+ if (cpu_buf->base &&
9992
+ munmap(cpu_buf->base, pb->mmap_size + pb->page_size))
9993
+ pr_warn("failed to munmap cpu_buf #%d\n", cpu_buf->cpu);
9994
+ if (cpu_buf->fd >= 0) {
9995
+ ioctl(cpu_buf->fd, PERF_EVENT_IOC_DISABLE, 0);
9996
+ close(cpu_buf->fd);
9997
+ }
9998
+ free(cpu_buf->buf);
9999
+ free(cpu_buf);
10000
+}
10001
+
10002
+void perf_buffer__free(struct perf_buffer *pb)
10003
+{
10004
+ int i;
10005
+
10006
+ if (IS_ERR_OR_NULL(pb))
10007
+ return;
10008
+ if (pb->cpu_bufs) {
10009
+ for (i = 0; i < pb->cpu_cnt; i++) {
10010
+ struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
10011
+
10012
+ if (!cpu_buf)
10013
+ continue;
10014
+
10015
+ bpf_map_delete_elem(pb->map_fd, &cpu_buf->map_key);
10016
+ perf_buffer__free_cpu_buf(pb, cpu_buf);
10017
+ }
10018
+ free(pb->cpu_bufs);
10019
+ }
10020
+ if (pb->epoll_fd >= 0)
10021
+ close(pb->epoll_fd);
10022
+ free(pb->events);
10023
+ free(pb);
10024
+}
10025
+
10026
+static struct perf_cpu_buf *
10027
+perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr,
10028
+ int cpu, int map_key)
10029
+{
10030
+ struct perf_cpu_buf *cpu_buf;
10031
+ char msg[STRERR_BUFSIZE];
10032
+ int err;
10033
+
10034
+ cpu_buf = calloc(1, sizeof(*cpu_buf));
10035
+ if (!cpu_buf)
10036
+ return ERR_PTR(-ENOMEM);
10037
+
10038
+ cpu_buf->pb = pb;
10039
+ cpu_buf->cpu = cpu;
10040
+ cpu_buf->map_key = map_key;
10041
+
10042
+ cpu_buf->fd = syscall(__NR_perf_event_open, attr, -1 /* pid */, cpu,
10043
+ -1, PERF_FLAG_FD_CLOEXEC);
10044
+ if (cpu_buf->fd < 0) {
10045
+ err = -errno;
10046
+ pr_warn("failed to open perf buffer event on cpu #%d: %s\n",
10047
+ cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
10048
+ goto error;
10049
+ }
10050
+
10051
+ cpu_buf->base = mmap(NULL, pb->mmap_size + pb->page_size,
10052
+ PROT_READ | PROT_WRITE, MAP_SHARED,
10053
+ cpu_buf->fd, 0);
10054
+ if (cpu_buf->base == MAP_FAILED) {
10055
+ cpu_buf->base = NULL;
10056
+ err = -errno;
10057
+ pr_warn("failed to mmap perf buffer on cpu #%d: %s\n",
10058
+ cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
10059
+ goto error;
10060
+ }
10061
+
10062
+ if (ioctl(cpu_buf->fd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
10063
+ err = -errno;
10064
+ pr_warn("failed to enable perf buffer event on cpu #%d: %s\n",
10065
+ cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
10066
+ goto error;
10067
+ }
10068
+
10069
+ return cpu_buf;
10070
+
10071
+error:
10072
+ perf_buffer__free_cpu_buf(pb, cpu_buf);
10073
+ return (struct perf_cpu_buf *)ERR_PTR(err);
10074
+}
10075
+
10076
+static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
10077
+ struct perf_buffer_params *p);
10078
+
10079
+struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
10080
+ const struct perf_buffer_opts *opts)
10081
+{
10082
+ struct perf_buffer_params p = {};
10083
+ struct perf_event_attr attr = { 0, };
10084
+
10085
+ attr.config = PERF_COUNT_SW_BPF_OUTPUT;
10086
+ attr.type = PERF_TYPE_SOFTWARE;
10087
+ attr.sample_type = PERF_SAMPLE_RAW;
10088
+ attr.sample_period = 1;
10089
+ attr.wakeup_events = 1;
10090
+
10091
+ p.attr = &attr;
10092
+ p.sample_cb = opts ? opts->sample_cb : NULL;
10093
+ p.lost_cb = opts ? opts->lost_cb : NULL;
10094
+ p.ctx = opts ? opts->ctx : NULL;
10095
+
10096
+ return __perf_buffer__new(map_fd, page_cnt, &p);
10097
+}
10098
+
10099
+struct perf_buffer *
10100
+perf_buffer__new_raw(int map_fd, size_t page_cnt,
10101
+ const struct perf_buffer_raw_opts *opts)
10102
+{
10103
+ struct perf_buffer_params p = {};
10104
+
10105
+ p.attr = opts->attr;
10106
+ p.event_cb = opts->event_cb;
10107
+ p.ctx = opts->ctx;
10108
+ p.cpu_cnt = opts->cpu_cnt;
10109
+ p.cpus = opts->cpus;
10110
+ p.map_keys = opts->map_keys;
10111
+
10112
+ return __perf_buffer__new(map_fd, page_cnt, &p);
10113
+}
10114
+
10115
+static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
10116
+ struct perf_buffer_params *p)
10117
+{
10118
+ const char *online_cpus_file = "/sys/devices/system/cpu/online";
10119
+ struct bpf_map_info map;
10120
+ char msg[STRERR_BUFSIZE];
10121
+ struct perf_buffer *pb;
10122
+ bool *online = NULL;
10123
+ __u32 map_info_len;
10124
+ int err, i, j, n;
10125
+
10126
+ if (page_cnt & (page_cnt - 1)) {
10127
+ pr_warn("page count should be power of two, but is %zu\n",
10128
+ page_cnt);
10129
+ return ERR_PTR(-EINVAL);
10130
+ }
10131
+
10132
+ /* best-effort sanity checks */
10133
+ memset(&map, 0, sizeof(map));
10134
+ map_info_len = sizeof(map);
10135
+ err = bpf_obj_get_info_by_fd(map_fd, &map, &map_info_len);
10136
+ if (err) {
10137
+ err = -errno;
10138
+ /* if BPF_OBJ_GET_INFO_BY_FD is supported, will return
10139
+ * -EBADFD, -EFAULT, or -E2BIG on real error
10140
+ */
10141
+ if (err != -EINVAL) {
10142
+ pr_warn("failed to get map info for map FD %d: %s\n",
10143
+ map_fd, libbpf_strerror_r(err, msg, sizeof(msg)));
10144
+ return ERR_PTR(err);
10145
+ }
10146
+ pr_debug("failed to get map info for FD %d; API not supported? Ignoring...\n",
10147
+ map_fd);
10148
+ } else {
10149
+ if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
10150
+ pr_warn("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
10151
+ map.name);
10152
+ return ERR_PTR(-EINVAL);
10153
+ }
10154
+ }
10155
+
10156
+ pb = calloc(1, sizeof(*pb));
10157
+ if (!pb)
10158
+ return ERR_PTR(-ENOMEM);
10159
+
10160
+ pb->event_cb = p->event_cb;
10161
+ pb->sample_cb = p->sample_cb;
10162
+ pb->lost_cb = p->lost_cb;
10163
+ pb->ctx = p->ctx;
10164
+
10165
+ pb->page_size = getpagesize();
10166
+ pb->mmap_size = pb->page_size * page_cnt;
10167
+ pb->map_fd = map_fd;
10168
+
10169
+ pb->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
10170
+ if (pb->epoll_fd < 0) {
10171
+ err = -errno;
10172
+ pr_warn("failed to create epoll instance: %s\n",
10173
+ libbpf_strerror_r(err, msg, sizeof(msg)));
10174
+ goto error;
10175
+ }
10176
+
10177
+ if (p->cpu_cnt > 0) {
10178
+ pb->cpu_cnt = p->cpu_cnt;
10179
+ } else {
10180
+ pb->cpu_cnt = libbpf_num_possible_cpus();
10181
+ if (pb->cpu_cnt < 0) {
10182
+ err = pb->cpu_cnt;
10183
+ goto error;
10184
+ }
10185
+ if (map.max_entries && map.max_entries < pb->cpu_cnt)
10186
+ pb->cpu_cnt = map.max_entries;
10187
+ }
10188
+
10189
+ pb->events = calloc(pb->cpu_cnt, sizeof(*pb->events));
10190
+ if (!pb->events) {
10191
+ err = -ENOMEM;
10192
+ pr_warn("failed to allocate events: out of memory\n");
10193
+ goto error;
10194
+ }
10195
+ pb->cpu_bufs = calloc(pb->cpu_cnt, sizeof(*pb->cpu_bufs));
10196
+ if (!pb->cpu_bufs) {
10197
+ err = -ENOMEM;
10198
+ pr_warn("failed to allocate buffers: out of memory\n");
10199
+ goto error;
10200
+ }
10201
+
10202
+ err = parse_cpu_mask_file(online_cpus_file, &online, &n);
10203
+ if (err) {
10204
+ pr_warn("failed to get online CPU mask: %d\n", err);
10205
+ goto error;
10206
+ }
10207
+
10208
+ for (i = 0, j = 0; i < pb->cpu_cnt; i++) {
10209
+ struct perf_cpu_buf *cpu_buf;
10210
+ int cpu, map_key;
10211
+
10212
+ cpu = p->cpu_cnt > 0 ? p->cpus[i] : i;
10213
+ map_key = p->cpu_cnt > 0 ? p->map_keys[i] : i;
10214
+
10215
+ /* in case user didn't explicitly requested particular CPUs to
10216
+ * be attached to, skip offline/not present CPUs
10217
+ */
10218
+ if (p->cpu_cnt <= 0 && (cpu >= n || !online[cpu]))
10219
+ continue;
10220
+
10221
+ cpu_buf = perf_buffer__open_cpu_buf(pb, p->attr, cpu, map_key);
10222
+ if (IS_ERR(cpu_buf)) {
10223
+ err = PTR_ERR(cpu_buf);
10224
+ goto error;
10225
+ }
10226
+
10227
+ pb->cpu_bufs[j] = cpu_buf;
10228
+
10229
+ err = bpf_map_update_elem(pb->map_fd, &map_key,
10230
+ &cpu_buf->fd, 0);
10231
+ if (err) {
10232
+ err = -errno;
10233
+ pr_warn("failed to set cpu #%d, key %d -> perf FD %d: %s\n",
10234
+ cpu, map_key, cpu_buf->fd,
10235
+ libbpf_strerror_r(err, msg, sizeof(msg)));
10236
+ goto error;
10237
+ }
10238
+
10239
+ pb->events[j].events = EPOLLIN;
10240
+ pb->events[j].data.ptr = cpu_buf;
10241
+ if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd,
10242
+ &pb->events[j]) < 0) {
10243
+ err = -errno;
10244
+ pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n",
10245
+ cpu, cpu_buf->fd,
10246
+ libbpf_strerror_r(err, msg, sizeof(msg)));
10247
+ goto error;
10248
+ }
10249
+ j++;
10250
+ }
10251
+ pb->cpu_cnt = j;
10252
+ free(online);
10253
+
10254
+ return pb;
10255
+
10256
+error:
10257
+ free(online);
10258
+ if (pb)
10259
+ perf_buffer__free(pb);
10260
+ return ERR_PTR(err);
10261
+}
10262
+
10263
+struct perf_sample_raw {
10264
+ struct perf_event_header header;
10265
+ uint32_t size;
10266
+ char data[];
10267
+};
10268
+
10269
+struct perf_sample_lost {
10270
+ struct perf_event_header header;
10271
+ uint64_t id;
10272
+ uint64_t lost;
10273
+ uint64_t sample_id;
10274
+};
10275
+
10276
+static enum bpf_perf_event_ret
10277
+perf_buffer__process_record(struct perf_event_header *e, void *ctx)
10278
+{
10279
+ struct perf_cpu_buf *cpu_buf = ctx;
10280
+ struct perf_buffer *pb = cpu_buf->pb;
10281
+ void *data = e;
10282
+
10283
+ /* user wants full control over parsing perf event */
10284
+ if (pb->event_cb)
10285
+ return pb->event_cb(pb->ctx, cpu_buf->cpu, e);
10286
+
10287
+ switch (e->type) {
10288
+ case PERF_RECORD_SAMPLE: {
10289
+ struct perf_sample_raw *s = data;
10290
+
10291
+ if (pb->sample_cb)
10292
+ pb->sample_cb(pb->ctx, cpu_buf->cpu, s->data, s->size);
10293
+ break;
10294
+ }
10295
+ case PERF_RECORD_LOST: {
10296
+ struct perf_sample_lost *s = data;
10297
+
10298
+ if (pb->lost_cb)
10299
+ pb->lost_cb(pb->ctx, cpu_buf->cpu, s->lost);
10300
+ break;
10301
+ }
10302
+ default:
10303
+ pr_warn("unknown perf sample type %d\n", e->type);
10304
+ return LIBBPF_PERF_EVENT_ERROR;
10305
+ }
10306
+ return LIBBPF_PERF_EVENT_CONT;
10307
+}
10308
+
10309
+static int perf_buffer__process_records(struct perf_buffer *pb,
10310
+ struct perf_cpu_buf *cpu_buf)
10311
+{
10312
+ enum bpf_perf_event_ret ret;
10313
+
10314
+ ret = bpf_perf_event_read_simple(cpu_buf->base, pb->mmap_size,
10315
+ pb->page_size, &cpu_buf->buf,
10316
+ &cpu_buf->buf_size,
10317
+ perf_buffer__process_record, cpu_buf);
10318
+ if (ret != LIBBPF_PERF_EVENT_CONT)
10319
+ return ret;
10320
+ return 0;
10321
+}
10322
+
10323
+int perf_buffer__epoll_fd(const struct perf_buffer *pb)
10324
+{
10325
+ return pb->epoll_fd;
10326
+}
10327
+
10328
+int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)
10329
+{
10330
+ int i, cnt, err;
10331
+
10332
+ cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms);
10333
+ for (i = 0; i < cnt; i++) {
10334
+ struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr;
10335
+
10336
+ err = perf_buffer__process_records(pb, cpu_buf);
10337
+ if (err) {
10338
+ pr_warn("error while processing records: %d\n", err);
10339
+ return err;
10340
+ }
10341
+ }
10342
+ return cnt < 0 ? -errno : cnt;
10343
+}
10344
+
10345
+/* Return number of PERF_EVENT_ARRAY map slots set up by this perf_buffer
10346
+ * manager.
10347
+ */
10348
+size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb)
10349
+{
10350
+ return pb->cpu_cnt;
10351
+}
10352
+
10353
+/*
10354
+ * Return perf_event FD of a ring buffer in *buf_idx* slot of
10355
+ * PERF_EVENT_ARRAY BPF map. This FD can be polled for new data using
10356
+ * select()/poll()/epoll() Linux syscalls.
10357
+ */
10358
+int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx)
10359
+{
10360
+ struct perf_cpu_buf *cpu_buf;
10361
+
10362
+ if (buf_idx >= pb->cpu_cnt)
10363
+ return -EINVAL;
10364
+
10365
+ cpu_buf = pb->cpu_bufs[buf_idx];
10366
+ if (!cpu_buf)
10367
+ return -ENOENT;
10368
+
10369
+ return cpu_buf->fd;
10370
+}
10371
+
10372
+/*
10373
+ * Consume data from perf ring buffer corresponding to slot *buf_idx* in
10374
+ * PERF_EVENT_ARRAY BPF map without waiting/polling. If there is no data to
10375
+ * consume, do nothing and return success.
10376
+ * Returns:
10377
+ * - 0 on success;
10378
+ * - <0 on failure.
10379
+ */
10380
+int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx)
10381
+{
10382
+ struct perf_cpu_buf *cpu_buf;
10383
+
10384
+ if (buf_idx >= pb->cpu_cnt)
10385
+ return -EINVAL;
10386
+
10387
+ cpu_buf = pb->cpu_bufs[buf_idx];
10388
+ if (!cpu_buf)
10389
+ return -ENOENT;
10390
+
10391
+ return perf_buffer__process_records(pb, cpu_buf);
10392
+}
10393
+
10394
+int perf_buffer__consume(struct perf_buffer *pb)
10395
+{
10396
+ int i, err;
10397
+
10398
+ for (i = 0; i < pb->cpu_cnt; i++) {
10399
+ struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
10400
+
10401
+ if (!cpu_buf)
10402
+ continue;
10403
+
10404
+ err = perf_buffer__process_records(pb, cpu_buf);
10405
+ if (err) {
10406
+ pr_warn("perf_buffer: failed to process records in buffer #%d: %d\n", i, err);
10407
+ return err;
10408
+ }
10409
+ }
10410
+ return 0;
10411
+}
10412
+
10413
+struct bpf_prog_info_array_desc {
10414
+ int array_offset; /* e.g. offset of jited_prog_insns */
10415
+ int count_offset; /* e.g. offset of jited_prog_len */
10416
+ int size_offset; /* > 0: offset of rec size,
10417
+ * < 0: fix size of -size_offset
10418
+ */
10419
+};
10420
+
10421
+static struct bpf_prog_info_array_desc bpf_prog_info_array_desc[] = {
10422
+ [BPF_PROG_INFO_JITED_INSNS] = {
10423
+ offsetof(struct bpf_prog_info, jited_prog_insns),
10424
+ offsetof(struct bpf_prog_info, jited_prog_len),
10425
+ -1,
10426
+ },
10427
+ [BPF_PROG_INFO_XLATED_INSNS] = {
10428
+ offsetof(struct bpf_prog_info, xlated_prog_insns),
10429
+ offsetof(struct bpf_prog_info, xlated_prog_len),
10430
+ -1,
10431
+ },
10432
+ [BPF_PROG_INFO_MAP_IDS] = {
10433
+ offsetof(struct bpf_prog_info, map_ids),
10434
+ offsetof(struct bpf_prog_info, nr_map_ids),
10435
+ -(int)sizeof(__u32),
10436
+ },
10437
+ [BPF_PROG_INFO_JITED_KSYMS] = {
10438
+ offsetof(struct bpf_prog_info, jited_ksyms),
10439
+ offsetof(struct bpf_prog_info, nr_jited_ksyms),
10440
+ -(int)sizeof(__u64),
10441
+ },
10442
+ [BPF_PROG_INFO_JITED_FUNC_LENS] = {
10443
+ offsetof(struct bpf_prog_info, jited_func_lens),
10444
+ offsetof(struct bpf_prog_info, nr_jited_func_lens),
10445
+ -(int)sizeof(__u32),
10446
+ },
10447
+ [BPF_PROG_INFO_FUNC_INFO] = {
10448
+ offsetof(struct bpf_prog_info, func_info),
10449
+ offsetof(struct bpf_prog_info, nr_func_info),
10450
+ offsetof(struct bpf_prog_info, func_info_rec_size),
10451
+ },
10452
+ [BPF_PROG_INFO_LINE_INFO] = {
10453
+ offsetof(struct bpf_prog_info, line_info),
10454
+ offsetof(struct bpf_prog_info, nr_line_info),
10455
+ offsetof(struct bpf_prog_info, line_info_rec_size),
10456
+ },
10457
+ [BPF_PROG_INFO_JITED_LINE_INFO] = {
10458
+ offsetof(struct bpf_prog_info, jited_line_info),
10459
+ offsetof(struct bpf_prog_info, nr_jited_line_info),
10460
+ offsetof(struct bpf_prog_info, jited_line_info_rec_size),
10461
+ },
10462
+ [BPF_PROG_INFO_PROG_TAGS] = {
10463
+ offsetof(struct bpf_prog_info, prog_tags),
10464
+ offsetof(struct bpf_prog_info, nr_prog_tags),
10465
+ -(int)sizeof(__u8) * BPF_TAG_SIZE,
10466
+ },
10467
+
10468
+};
10469
+
10470
+static __u32 bpf_prog_info_read_offset_u32(struct bpf_prog_info *info,
10471
+ int offset)
10472
+{
10473
+ __u32 *array = (__u32 *)info;
10474
+
10475
+ if (offset >= 0)
10476
+ return array[offset / sizeof(__u32)];
10477
+ return -(int)offset;
10478
+}
10479
+
10480
+static __u64 bpf_prog_info_read_offset_u64(struct bpf_prog_info *info,
10481
+ int offset)
10482
+{
10483
+ __u64 *array = (__u64 *)info;
10484
+
10485
+ if (offset >= 0)
10486
+ return array[offset / sizeof(__u64)];
10487
+ return -(int)offset;
10488
+}
10489
+
10490
+static void bpf_prog_info_set_offset_u32(struct bpf_prog_info *info, int offset,
10491
+ __u32 val)
10492
+{
10493
+ __u32 *array = (__u32 *)info;
10494
+
10495
+ if (offset >= 0)
10496
+ array[offset / sizeof(__u32)] = val;
10497
+}
10498
+
10499
+static void bpf_prog_info_set_offset_u64(struct bpf_prog_info *info, int offset,
10500
+ __u64 val)
10501
+{
10502
+ __u64 *array = (__u64 *)info;
10503
+
10504
+ if (offset >= 0)
10505
+ array[offset / sizeof(__u64)] = val;
10506
+}
10507
+
10508
+struct bpf_prog_info_linear *
10509
+bpf_program__get_prog_info_linear(int fd, __u64 arrays)
10510
+{
10511
+ struct bpf_prog_info_linear *info_linear;
10512
+ struct bpf_prog_info info = {};
10513
+ __u32 info_len = sizeof(info);
10514
+ __u32 data_len = 0;
10515
+ int i, err;
10516
+ void *ptr;
10517
+
10518
+ if (arrays >> BPF_PROG_INFO_LAST_ARRAY)
10519
+ return ERR_PTR(-EINVAL);
10520
+
10521
+ /* step 1: get array dimensions */
10522
+ err = bpf_obj_get_info_by_fd(fd, &info, &info_len);
10523
+ if (err) {
10524
+ pr_debug("can't get prog info: %s", strerror(errno));
10525
+ return ERR_PTR(-EFAULT);
10526
+ }
10527
+
10528
+ /* step 2: calculate total size of all arrays */
10529
+ for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
10530
+ bool include_array = (arrays & (1UL << i)) > 0;
10531
+ struct bpf_prog_info_array_desc *desc;
10532
+ __u32 count, size;
10533
+
10534
+ desc = bpf_prog_info_array_desc + i;
10535
+
10536
+ /* kernel is too old to support this field */
10537
+ if (info_len < desc->array_offset + sizeof(__u32) ||
10538
+ info_len < desc->count_offset + sizeof(__u32) ||
10539
+ (desc->size_offset > 0 && info_len < desc->size_offset))
10540
+ include_array = false;
10541
+
10542
+ if (!include_array) {
10543
+ arrays &= ~(1UL << i); /* clear the bit */
10544
+ continue;
10545
+ }
10546
+
10547
+ count = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
10548
+ size = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
10549
+
10550
+ data_len += count * size;
10551
+ }
10552
+
10553
+ /* step 3: allocate continuous memory */
10554
+ data_len = roundup(data_len, sizeof(__u64));
10555
+ info_linear = malloc(sizeof(struct bpf_prog_info_linear) + data_len);
10556
+ if (!info_linear)
10557
+ return ERR_PTR(-ENOMEM);
10558
+
10559
+ /* step 4: fill data to info_linear->info */
10560
+ info_linear->arrays = arrays;
10561
+ memset(&info_linear->info, 0, sizeof(info));
10562
+ ptr = info_linear->data;
10563
+
10564
+ for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
10565
+ struct bpf_prog_info_array_desc *desc;
10566
+ __u32 count, size;
10567
+
10568
+ if ((arrays & (1UL << i)) == 0)
10569
+ continue;
10570
+
10571
+ desc = bpf_prog_info_array_desc + i;
10572
+ count = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
10573
+ size = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
10574
+ bpf_prog_info_set_offset_u32(&info_linear->info,
10575
+ desc->count_offset, count);
10576
+ bpf_prog_info_set_offset_u32(&info_linear->info,
10577
+ desc->size_offset, size);
10578
+ bpf_prog_info_set_offset_u64(&info_linear->info,
10579
+ desc->array_offset,
10580
+ ptr_to_u64(ptr));
10581
+ ptr += count * size;
10582
+ }
10583
+
10584
+ /* step 5: call syscall again to get required arrays */
10585
+ err = bpf_obj_get_info_by_fd(fd, &info_linear->info, &info_len);
10586
+ if (err) {
10587
+ pr_debug("can't get prog info: %s", strerror(errno));
10588
+ free(info_linear);
10589
+ return ERR_PTR(-EFAULT);
10590
+ }
10591
+
10592
+ /* step 6: verify the data */
10593
+ for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
10594
+ struct bpf_prog_info_array_desc *desc;
10595
+ __u32 v1, v2;
10596
+
10597
+ if ((arrays & (1UL << i)) == 0)
10598
+ continue;
10599
+
10600
+ desc = bpf_prog_info_array_desc + i;
10601
+ v1 = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
10602
+ v2 = bpf_prog_info_read_offset_u32(&info_linear->info,
10603
+ desc->count_offset);
10604
+ if (v1 != v2)
10605
+ pr_warn("%s: mismatch in element count\n", __func__);
10606
+
10607
+ v1 = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
10608
+ v2 = bpf_prog_info_read_offset_u32(&info_linear->info,
10609
+ desc->size_offset);
10610
+ if (v1 != v2)
10611
+ pr_warn("%s: mismatch in rec size\n", __func__);
10612
+ }
10613
+
10614
+ /* step 7: update info_len and data_len */
10615
+ info_linear->info_len = sizeof(struct bpf_prog_info);
10616
+ info_linear->data_len = data_len;
10617
+
10618
+ return info_linear;
10619
+}
10620
+
10621
+void bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear)
10622
+{
10623
+ int i;
10624
+
10625
+ for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
10626
+ struct bpf_prog_info_array_desc *desc;
10627
+ __u64 addr, offs;
10628
+
10629
+ if ((info_linear->arrays & (1UL << i)) == 0)
10630
+ continue;
10631
+
10632
+ desc = bpf_prog_info_array_desc + i;
10633
+ addr = bpf_prog_info_read_offset_u64(&info_linear->info,
10634
+ desc->array_offset);
10635
+ offs = addr - ptr_to_u64(info_linear->data);
10636
+ bpf_prog_info_set_offset_u64(&info_linear->info,
10637
+ desc->array_offset, offs);
10638
+ }
10639
+}
10640
+
10641
+void bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear)
10642
+{
10643
+ int i;
10644
+
10645
+ for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
10646
+ struct bpf_prog_info_array_desc *desc;
10647
+ __u64 addr, offs;
10648
+
10649
+ if ((info_linear->arrays & (1UL << i)) == 0)
10650
+ continue;
10651
+
10652
+ desc = bpf_prog_info_array_desc + i;
10653
+ offs = bpf_prog_info_read_offset_u64(&info_linear->info,
10654
+ desc->array_offset);
10655
+ addr = offs + ptr_to_u64(info_linear->data);
10656
+ bpf_prog_info_set_offset_u64(&info_linear->info,
10657
+ desc->array_offset, addr);
10658
+ }
10659
+}
10660
+
10661
+int bpf_program__set_attach_target(struct bpf_program *prog,
10662
+ int attach_prog_fd,
10663
+ const char *attach_func_name)
10664
+{
10665
+ int btf_id;
10666
+
10667
+ if (!prog || attach_prog_fd < 0 || !attach_func_name)
10668
+ return -EINVAL;
10669
+
10670
+ if (attach_prog_fd)
10671
+ btf_id = libbpf_find_prog_btf_id(attach_func_name,
10672
+ attach_prog_fd);
10673
+ else
10674
+ btf_id = libbpf_find_vmlinux_btf_id(attach_func_name,
10675
+ prog->expected_attach_type);
10676
+
10677
+ if (btf_id < 0)
10678
+ return btf_id;
10679
+
10680
+ prog->attach_btf_id = btf_id;
10681
+ prog->attach_prog_fd = attach_prog_fd;
10682
+ return 0;
10683
+}
10684
+
10685
+int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz)
10686
+{
10687
+ int err = 0, n, len, start, end = -1;
10688
+ bool *tmp;
10689
+
10690
+ *mask = NULL;
10691
+ *mask_sz = 0;
10692
+
10693
+ /* Each sub string separated by ',' has format \d+-\d+ or \d+ */
10694
+ while (*s) {
10695
+ if (*s == ',' || *s == '\n') {
10696
+ s++;
10697
+ continue;
10698
+ }
10699
+ n = sscanf(s, "%d%n-%d%n", &start, &len, &end, &len);
10700
+ if (n <= 0 || n > 2) {
10701
+ pr_warn("Failed to get CPU range %s: %d\n", s, n);
10702
+ err = -EINVAL;
10703
+ goto cleanup;
10704
+ } else if (n == 1) {
10705
+ end = start;
10706
+ }
10707
+ if (start < 0 || start > end) {
10708
+ pr_warn("Invalid CPU range [%d,%d] in %s\n",
10709
+ start, end, s);
10710
+ err = -EINVAL;
10711
+ goto cleanup;
10712
+ }
10713
+ tmp = realloc(*mask, end + 1);
10714
+ if (!tmp) {
10715
+ err = -ENOMEM;
10716
+ goto cleanup;
10717
+ }
10718
+ *mask = tmp;
10719
+ memset(tmp + *mask_sz, 0, start - *mask_sz);
10720
+ memset(tmp + start, 1, end - start + 1);
10721
+ *mask_sz = end + 1;
10722
+ s += len;
10723
+ }
10724
+ if (!*mask_sz) {
10725
+ pr_warn("Empty CPU range\n");
10726
+ return -EINVAL;
10727
+ }
10728
+ return 0;
10729
+cleanup:
10730
+ free(*mask);
10731
+ *mask = NULL;
10732
+ return err;
10733
+}
10734
+
10735
+int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz)
10736
+{
10737
+ int fd, err = 0, len;
10738
+ char buf[128];
10739
+
10740
+ fd = open(fcpu, O_RDONLY);
10741
+ if (fd < 0) {
10742
+ err = -errno;
10743
+ pr_warn("Failed to open cpu mask file %s: %d\n", fcpu, err);
10744
+ return err;
10745
+ }
10746
+ len = read(fd, buf, sizeof(buf));
10747
+ close(fd);
10748
+ if (len <= 0) {
10749
+ err = len ? -errno : -EINVAL;
10750
+ pr_warn("Failed to read cpu mask from %s: %d\n", fcpu, err);
10751
+ return err;
10752
+ }
10753
+ if (len >= sizeof(buf)) {
10754
+ pr_warn("CPU mask is too big in file %s\n", fcpu);
10755
+ return -E2BIG;
10756
+ }
10757
+ buf[len] = '\0';
10758
+
10759
+ return parse_cpu_mask_str(buf, mask, mask_sz);
10760
+}
10761
+
10762
+int libbpf_num_possible_cpus(void)
10763
+{
10764
+ static const char *fcpu = "/sys/devices/system/cpu/possible";
10765
+ static int cpus;
10766
+ int err, n, i, tmp_cpus;
10767
+ bool *mask;
10768
+
10769
+ tmp_cpus = READ_ONCE(cpus);
10770
+ if (tmp_cpus > 0)
10771
+ return tmp_cpus;
10772
+
10773
+ err = parse_cpu_mask_file(fcpu, &mask, &n);
10774
+ if (err)
10775
+ return err;
10776
+
10777
+ tmp_cpus = 0;
10778
+ for (i = 0; i < n; i++) {
10779
+ if (mask[i])
10780
+ tmp_cpus++;
10781
+ }
10782
+ free(mask);
10783
+
10784
+ WRITE_ONCE(cpus, tmp_cpus);
10785
+ return tmp_cpus;
10786
+}
10787
+
10788
+int bpf_object__open_skeleton(struct bpf_object_skeleton *s,
10789
+ const struct bpf_object_open_opts *opts)
10790
+{
10791
+ DECLARE_LIBBPF_OPTS(bpf_object_open_opts, skel_opts,
10792
+ .object_name = s->name,
10793
+ );
10794
+ struct bpf_object *obj;
10795
+ int i;
10796
+
10797
+ /* Attempt to preserve opts->object_name, unless overriden by user
10798
+ * explicitly. Overwriting object name for skeletons is discouraged,
10799
+ * as it breaks global data maps, because they contain object name
10800
+ * prefix as their own map name prefix. When skeleton is generated,
10801
+ * bpftool is making an assumption that this name will stay the same.
10802
+ */
10803
+ if (opts) {
10804
+ memcpy(&skel_opts, opts, sizeof(*opts));
10805
+ if (!opts->object_name)
10806
+ skel_opts.object_name = s->name;
10807
+ }
10808
+
10809
+ obj = bpf_object__open_mem(s->data, s->data_sz, &skel_opts);
10810
+ if (IS_ERR(obj)) {
10811
+ pr_warn("failed to initialize skeleton BPF object '%s': %ld\n",
10812
+ s->name, PTR_ERR(obj));
10813
+ return PTR_ERR(obj);
10814
+ }
10815
+
10816
+ *s->obj = obj;
10817
+
10818
+ for (i = 0; i < s->map_cnt; i++) {
10819
+ struct bpf_map **map = s->maps[i].map;
10820
+ const char *name = s->maps[i].name;
10821
+ void **mmaped = s->maps[i].mmaped;
10822
+
10823
+ *map = bpf_object__find_map_by_name(obj, name);
10824
+ if (!*map) {
10825
+ pr_warn("failed to find skeleton map '%s'\n", name);
10826
+ return -ESRCH;
10827
+ }
10828
+
10829
+ /* externs shouldn't be pre-setup from user code */
10830
+ if (mmaped && (*map)->libbpf_type != LIBBPF_MAP_KCONFIG)
10831
+ *mmaped = (*map)->mmaped;
10832
+ }
10833
+
10834
+ for (i = 0; i < s->prog_cnt; i++) {
10835
+ struct bpf_program **prog = s->progs[i].prog;
10836
+ const char *name = s->progs[i].name;
10837
+
10838
+ *prog = bpf_object__find_program_by_name(obj, name);
10839
+ if (!*prog) {
10840
+ pr_warn("failed to find skeleton program '%s'\n", name);
10841
+ return -ESRCH;
10842
+ }
10843
+ }
10844
+
10845
+ return 0;
10846
+}
10847
+
10848
+int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
10849
+{
10850
+ int i, err;
10851
+
10852
+ err = bpf_object__load(*s->obj);
10853
+ if (err) {
10854
+ pr_warn("failed to load BPF skeleton '%s': %d\n", s->name, err);
10855
+ return err;
10856
+ }
10857
+
10858
+ for (i = 0; i < s->map_cnt; i++) {
10859
+ struct bpf_map *map = *s->maps[i].map;
10860
+ size_t mmap_sz = bpf_map_mmap_sz(map);
10861
+ int prot, map_fd = bpf_map__fd(map);
10862
+ void **mmaped = s->maps[i].mmaped;
10863
+
10864
+ if (!mmaped)
10865
+ continue;
10866
+
10867
+ if (!(map->def.map_flags & BPF_F_MMAPABLE)) {
10868
+ *mmaped = NULL;
10869
+ continue;
10870
+ }
10871
+
10872
+ if (map->def.map_flags & BPF_F_RDONLY_PROG)
10873
+ prot = PROT_READ;
10874
+ else
10875
+ prot = PROT_READ | PROT_WRITE;
10876
+
10877
+ /* Remap anonymous mmap()-ed "map initialization image" as
10878
+ * a BPF map-backed mmap()-ed memory, but preserving the same
10879
+ * memory address. This will cause kernel to change process'
10880
+ * page table to point to a different piece of kernel memory,
10881
+ * but from userspace point of view memory address (and its
10882
+ * contents, being identical at this point) will stay the
10883
+ * same. This mapping will be released by bpf_object__close()
10884
+ * as per normal clean up procedure, so we don't need to worry
10885
+ * about it from skeleton's clean up perspective.
10886
+ */
10887
+ *mmaped = mmap(map->mmaped, mmap_sz, prot,
10888
+ MAP_SHARED | MAP_FIXED, map_fd, 0);
10889
+ if (*mmaped == MAP_FAILED) {
10890
+ err = -errno;
10891
+ *mmaped = NULL;
10892
+ pr_warn("failed to re-mmap() map '%s': %d\n",
10893
+ bpf_map__name(map), err);
10894
+ return err;
10895
+ }
10896
+ }
10897
+
10898
+ return 0;
10899
+}
10900
+
10901
+int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
10902
+{
10903
+ int i;
10904
+
10905
+ for (i = 0; i < s->prog_cnt; i++) {
10906
+ struct bpf_program *prog = *s->progs[i].prog;
10907
+ struct bpf_link **link = s->progs[i].link;
10908
+ const struct bpf_sec_def *sec_def;
10909
+
10910
+ if (!prog->load)
10911
+ continue;
10912
+
10913
+ sec_def = find_sec_def(prog->sec_name);
10914
+ if (!sec_def || !sec_def->attach_fn)
10915
+ continue;
10916
+
10917
+ *link = sec_def->attach_fn(sec_def, prog);
10918
+ if (IS_ERR(*link)) {
10919
+ pr_warn("failed to auto-attach program '%s': %ld\n",
10920
+ bpf_program__name(prog), PTR_ERR(*link));
10921
+ return PTR_ERR(*link);
10922
+ }
10923
+ }
10924
+
10925
+ return 0;
10926
+}
10927
+
10928
+void bpf_object__detach_skeleton(struct bpf_object_skeleton *s)
10929
+{
10930
+ int i;
10931
+
10932
+ for (i = 0; i < s->prog_cnt; i++) {
10933
+ struct bpf_link **link = s->progs[i].link;
10934
+
10935
+ bpf_link__destroy(*link);
10936
+ *link = NULL;
10937
+ }
10938
+}
10939
+
10940
+void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s)
10941
+{
10942
+ if (!s)
10943
+ return;
10944
+
10945
+ if (s->progs)
10946
+ bpf_object__detach_skeleton(s);
10947
+ if (s->obj)
10948
+ bpf_object__close(*s->obj);
10949
+ free(s->maps);
10950
+ free(s->progs);
10951
+ free(s);
10952
+}