hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/tools/perf/builtin-trace.c
....@@ -12,21 +12,33 @@
1212 * Initially based on the 'trace' prototype by Thomas Gleixner:
1313 *
1414 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
15
- *
16
- * Released under the GPL v2. (and only v2, not any later version)
1715 */
1816
17
+#include "util/record.h"
1918 #include <traceevent/event-parse.h>
2019 #include <api/fs/tracing_path.h>
20
+#include <bpf/bpf.h>
21
+#include "util/bpf_map.h"
22
+#include "util/rlimit.h"
2123 #include "builtin.h"
2224 #include "util/cgroup.h"
2325 #include "util/color.h"
26
+#include "util/config.h"
2427 #include "util/debug.h"
28
+#include "util/dso.h"
2529 #include "util/env.h"
2630 #include "util/event.h"
31
+#include "util/evsel.h"
32
+#include "util/evsel_fprintf.h"
33
+#include "util/synthetic-events.h"
2734 #include "util/evlist.h"
35
+#include "util/evswitch.h"
36
+#include "util/mmap.h"
37
+#include <subcmd/pager.h>
2838 #include <subcmd/exec-cmd.h>
2939 #include "util/machine.h"
40
+#include "util/map.h"
41
+#include "util/symbol.h"
3042 #include "util/path.h"
3143 #include "util/session.h"
3244 #include "util/thread.h"
....@@ -35,6 +47,8 @@
3547 #include "util/intlist.h"
3648 #include "util/thread_map.h"
3749 #include "util/stat.h"
50
+#include "util/tool.h"
51
+#include "util/util.h"
3852 #include "trace/beauty/beauty.h"
3953 #include "trace-event.h"
4054 #include "util/parse-events.h"
....@@ -44,6 +58,7 @@
4458 #include "string2.h"
4559 #include "syscalltbl.h"
4660 #include "rb_resort.h"
61
+#include "../perf.h"
4762
4863 #include <errno.h>
4964 #include <inttypes.h>
....@@ -57,9 +72,12 @@
5772 #include <linux/random.h>
5873 #include <linux/stringify.h>
5974 #include <linux/time64.h>
75
+#include <linux/zalloc.h>
6076 #include <fcntl.h>
77
+#include <sys/sysmacros.h>
6178
62
-#include "sane_ctype.h"
79
+#include <linux/ctype.h>
80
+#include <perf/mmap.h>
6381
6482 #ifndef O_CLOEXEC
6583 # define O_CLOEXEC 02000000
....@@ -69,26 +87,67 @@
6987 # define F_LINUX_SPECIFIC_BASE 1024
7088 #endif
7189
90
+#define RAW_SYSCALL_ARGS_NUM 6
91
+
92
+/*
93
+ * strtoul: Go from a string to a value, i.e. for msr: MSR_FS_BASE to 0xc0000100
94
+ */
95
+struct syscall_arg_fmt {
96
+ size_t (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
97
+ bool (*strtoul)(char *bf, size_t size, struct syscall_arg *arg, u64 *val);
98
+ unsigned long (*mask_val)(struct syscall_arg *arg, unsigned long val);
99
+ void *parm;
100
+ const char *name;
101
+ u16 nr_entries; // for arrays
102
+ bool show_zero;
103
+};
104
+
105
+struct syscall_fmt {
106
+ const char *name;
107
+ const char *alias;
108
+ struct {
109
+ const char *sys_enter,
110
+ *sys_exit;
111
+ } bpf_prog_name;
112
+ struct syscall_arg_fmt arg[RAW_SYSCALL_ARGS_NUM];
113
+ u8 nr_args;
114
+ bool errpid;
115
+ bool timeout;
116
+ bool hexret;
117
+};
118
+
72119 struct trace {
73120 struct perf_tool tool;
74121 struct syscalltbl *sctbl;
75122 struct {
76
- int max;
77123 struct syscall *table;
124
+ struct bpf_map *map;
125
+ struct { // per syscall BPF_MAP_TYPE_PROG_ARRAY
126
+ struct bpf_map *sys_enter,
127
+ *sys_exit;
128
+ } prog_array;
78129 struct {
79
- struct perf_evsel *sys_enter,
130
+ struct evsel *sys_enter,
80131 *sys_exit,
81132 *augmented;
82133 } events;
134
+ struct bpf_program *unaugmented_prog;
83135 } syscalls;
136
+ struct {
137
+ struct bpf_map *map;
138
+ } dump;
84139 struct record_opts opts;
85
- struct perf_evlist *evlist;
140
+ struct evlist *evlist;
86141 struct machine *host;
87142 struct thread *current;
143
+ struct bpf_object *bpf_obj;
88144 struct cgroup *cgroup;
89145 u64 base_time;
90146 FILE *output;
91147 unsigned long nr_events;
148
+ unsigned long nr_events_printed;
149
+ unsigned long max_events;
150
+ struct evswitch evswitch;
92151 struct strlist *ev_qualifier;
93152 struct {
94153 size_t nr;
....@@ -97,6 +156,7 @@
97156 struct {
98157 size_t nr;
99158 pid_t *entries;
159
+ struct bpf_map *map;
100160 } filter_pids;
101161 double duration_filter;
102162 double runtime_ms;
....@@ -106,6 +166,10 @@
106166 } stats;
107167 unsigned int max_stack;
108168 unsigned int min_stack;
169
+ int raw_augmented_syscalls_args_size;
170
+ bool raw_augmented_syscalls;
171
+ bool fd_path_disabled;
172
+ bool sort_events;
109173 bool not_ev_qualifier;
110174 bool live;
111175 bool full_time;
....@@ -113,15 +177,28 @@
113177 bool multiple_threads;
114178 bool summary;
115179 bool summary_only;
180
+ bool errno_summary;
116181 bool failure_only;
117182 bool show_comm;
118183 bool print_sample;
119184 bool show_tool_stats;
120185 bool trace_syscalls;
186
+ bool libtraceevent_print;
121187 bool kernel_syscallchains;
188
+ s16 args_alignment;
189
+ bool show_tstamp;
190
+ bool show_duration;
191
+ bool show_zeros;
192
+ bool show_arg_names;
193
+ bool show_string_prefix;
122194 bool force;
123195 bool vfs_getname;
124196 int trace_pgfaults;
197
+ char *perfconfig_events;
198
+ struct {
199
+ struct ordered_events data;
200
+ u64 last;
201
+ } oe;
125202 };
126203
127204 struct tp_field {
....@@ -181,7 +258,7 @@
181258 return 0;
182259 }
183260
184
-static int tp_field__init_uint(struct tp_field *field, struct format_field *format_field, bool needs_swap)
261
+static int tp_field__init_uint(struct tp_field *field, struct tep_format_field *format_field, bool needs_swap)
185262 {
186263 return __tp_field__init_uint(field, format_field->size, format_field->offset, needs_swap);
187264 }
....@@ -198,7 +275,7 @@
198275 return 0;
199276 }
200277
201
-static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
278
+static int tp_field__init_ptr(struct tp_field *field, struct tep_format_field *format_field)
202279 {
203280 return __tp_field__init_ptr(field, format_field->offset);
204281 }
....@@ -210,11 +287,90 @@
210287 };
211288 };
212289
213
-static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
214
- struct tp_field *field,
215
- const char *name)
290
+/*
291
+ * The evsel->priv as used by 'perf trace'
292
+ * sc: for raw_syscalls:sys_{enter,exit} and syscalls:sys_{enter,exit}_SYSCALLNAME
293
+ * fmt: for all the other tracepoints
294
+ */
295
+struct evsel_trace {
296
+ struct syscall_tp sc;
297
+ struct syscall_arg_fmt *fmt;
298
+};
299
+
300
+static struct evsel_trace *evsel_trace__new(void)
216301 {
217
- struct format_field *format_field = perf_evsel__field(evsel, name);
302
+ return zalloc(sizeof(struct evsel_trace));
303
+}
304
+
305
+static void evsel_trace__delete(struct evsel_trace *et)
306
+{
307
+ if (et == NULL)
308
+ return;
309
+
310
+ zfree(&et->fmt);
311
+ free(et);
312
+}
313
+
314
+/*
315
+ * Used with raw_syscalls:sys_{enter,exit} and with the
316
+ * syscalls:sys_{enter,exit}_SYSCALL tracepoints
317
+ */
318
+static inline struct syscall_tp *__evsel__syscall_tp(struct evsel *evsel)
319
+{
320
+ struct evsel_trace *et = evsel->priv;
321
+
322
+ return &et->sc;
323
+}
324
+
325
+static struct syscall_tp *evsel__syscall_tp(struct evsel *evsel)
326
+{
327
+ if (evsel->priv == NULL) {
328
+ evsel->priv = evsel_trace__new();
329
+ if (evsel->priv == NULL)
330
+ return NULL;
331
+ }
332
+
333
+ return __evsel__syscall_tp(evsel);
334
+}
335
+
336
+/*
337
+ * Used with all the other tracepoints.
338
+ */
339
+static inline struct syscall_arg_fmt *__evsel__syscall_arg_fmt(struct evsel *evsel)
340
+{
341
+ struct evsel_trace *et = evsel->priv;
342
+
343
+ return et->fmt;
344
+}
345
+
346
+static struct syscall_arg_fmt *evsel__syscall_arg_fmt(struct evsel *evsel)
347
+{
348
+ struct evsel_trace *et = evsel->priv;
349
+
350
+ if (evsel->priv == NULL) {
351
+ et = evsel->priv = evsel_trace__new();
352
+
353
+ if (et == NULL)
354
+ return NULL;
355
+ }
356
+
357
+ if (et->fmt == NULL) {
358
+ et->fmt = calloc(evsel->tp_format->format.nr_fields, sizeof(struct syscall_arg_fmt));
359
+ if (et->fmt == NULL)
360
+ goto out_delete;
361
+ }
362
+
363
+ return __evsel__syscall_arg_fmt(evsel);
364
+
365
+out_delete:
366
+ evsel_trace__delete(evsel->priv);
367
+ evsel->priv = NULL;
368
+ return NULL;
369
+}
370
+
371
+static int evsel__init_tp_uint_field(struct evsel *evsel, struct tp_field *field, const char *name)
372
+{
373
+ struct tep_format_field *format_field = evsel__field(evsel, name);
218374
219375 if (format_field == NULL)
220376 return -1;
....@@ -223,14 +379,12 @@
223379 }
224380
225381 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
226
- ({ struct syscall_tp *sc = evsel->priv;\
227
- perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
382
+ ({ struct syscall_tp *sc = __evsel__syscall_tp(evsel);\
383
+ evsel__init_tp_uint_field(evsel, &sc->name, #name); })
228384
229
-static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
230
- struct tp_field *field,
231
- const char *name)
385
+static int evsel__init_tp_ptr_field(struct evsel *evsel, struct tp_field *field, const char *name)
232386 {
233
- struct format_field *format_field = perf_evsel__field(evsel, name);
387
+ struct tep_format_field *format_field = evsel__field(evsel, name);
234388
235389 if (format_field == NULL)
236390 return -1;
....@@ -239,117 +393,136 @@
239393 }
240394
241395 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
242
- ({ struct syscall_tp *sc = evsel->priv;\
243
- perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
396
+ ({ struct syscall_tp *sc = __evsel__syscall_tp(evsel);\
397
+ evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
244398
245
-static void perf_evsel__delete_priv(struct perf_evsel *evsel)
399
+static void evsel__delete_priv(struct evsel *evsel)
246400 {
247401 zfree(&evsel->priv);
248
- perf_evsel__delete(evsel);
402
+ evsel__delete(evsel);
249403 }
250404
251
-static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel)
405
+static int evsel__init_syscall_tp(struct evsel *evsel)
252406 {
253
- struct syscall_tp *sc = evsel->priv = malloc(sizeof(struct syscall_tp));
407
+ struct syscall_tp *sc = evsel__syscall_tp(evsel);
254408
255
- if (evsel->priv != NULL) {
256
- if (perf_evsel__init_tp_uint_field(evsel, &sc->id, "__syscall_nr"))
257
- goto out_delete;
409
+ if (sc != NULL) {
410
+ if (evsel__init_tp_uint_field(evsel, &sc->id, "__syscall_nr") &&
411
+ evsel__init_tp_uint_field(evsel, &sc->id, "nr"))
412
+ return -ENOENT;
258413 return 0;
259414 }
260415
261416 return -ENOMEM;
262
-out_delete:
263
- zfree(&evsel->priv);
264
- return -ENOENT;
265417 }
266418
267
-static int perf_evsel__init_augmented_syscall_tp(struct perf_evsel *evsel)
419
+static int evsel__init_augmented_syscall_tp(struct evsel *evsel, struct evsel *tp)
268420 {
269
- struct syscall_tp *sc = evsel->priv = malloc(sizeof(struct syscall_tp));
421
+ struct syscall_tp *sc = evsel__syscall_tp(evsel);
270422
271
- if (evsel->priv != NULL) { /* field, sizeof_field, offsetof_field */
272
- if (__tp_field__init_uint(&sc->id, sizeof(long), sizeof(long long), evsel->needs_swap))
273
- goto out_delete;
423
+ if (sc != NULL) {
424
+ struct tep_format_field *syscall_id = evsel__field(tp, "id");
425
+ if (syscall_id == NULL)
426
+ syscall_id = evsel__field(tp, "__syscall_nr");
427
+ if (syscall_id == NULL ||
428
+ __tp_field__init_uint(&sc->id, syscall_id->size, syscall_id->offset, evsel->needs_swap))
429
+ return -EINVAL;
274430
275431 return 0;
276432 }
277433
278434 return -ENOMEM;
279
-out_delete:
280
- zfree(&evsel->priv);
281
- return -EINVAL;
282435 }
283436
284
-static int perf_evsel__init_augmented_syscall_tp_args(struct perf_evsel *evsel)
437
+static int evsel__init_augmented_syscall_tp_args(struct evsel *evsel)
285438 {
286
- struct syscall_tp *sc = evsel->priv;
439
+ struct syscall_tp *sc = __evsel__syscall_tp(evsel);
287440
288441 return __tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64));
289442 }
290443
291
-static int perf_evsel__init_raw_syscall_tp(struct perf_evsel *evsel, void *handler)
444
+static int evsel__init_augmented_syscall_tp_ret(struct evsel *evsel)
292445 {
293
- evsel->priv = malloc(sizeof(struct syscall_tp));
294
- if (evsel->priv != NULL) {
446
+ struct syscall_tp *sc = __evsel__syscall_tp(evsel);
447
+
448
+ return __tp_field__init_uint(&sc->ret, sizeof(u64), sc->id.offset + sizeof(u64), evsel->needs_swap);
449
+}
450
+
451
+static int evsel__init_raw_syscall_tp(struct evsel *evsel, void *handler)
452
+{
453
+ if (evsel__syscall_tp(evsel) != NULL) {
295454 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
296
- goto out_delete;
455
+ return -ENOENT;
297456
298457 evsel->handler = handler;
299458 return 0;
300459 }
301460
302461 return -ENOMEM;
303
-
304
-out_delete:
305
- zfree(&evsel->priv);
306
- return -ENOENT;
307462 }
308463
309
-static struct perf_evsel *perf_evsel__raw_syscall_newtp(const char *direction, void *handler)
464
+static struct evsel *perf_evsel__raw_syscall_newtp(const char *direction, void *handler)
310465 {
311
- struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
466
+ struct evsel *evsel = evsel__newtp("raw_syscalls", direction);
312467
313468 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
314469 if (IS_ERR(evsel))
315
- evsel = perf_evsel__newtp("syscalls", direction);
470
+ evsel = evsel__newtp("syscalls", direction);
316471
317472 if (IS_ERR(evsel))
318473 return NULL;
319474
320
- if (perf_evsel__init_raw_syscall_tp(evsel, handler))
475
+ if (evsel__init_raw_syscall_tp(evsel, handler))
321476 goto out_delete;
322477
323478 return evsel;
324479
325480 out_delete:
326
- perf_evsel__delete_priv(evsel);
481
+ evsel__delete_priv(evsel);
327482 return NULL;
328483 }
329484
330485 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
331
- ({ struct syscall_tp *fields = evsel->priv; \
486
+ ({ struct syscall_tp *fields = __evsel__syscall_tp(evsel); \
332487 fields->name.integer(&fields->name, sample); })
333488
334489 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
335
- ({ struct syscall_tp *fields = evsel->priv; \
490
+ ({ struct syscall_tp *fields = __evsel__syscall_tp(evsel); \
336491 fields->name.pointer(&fields->name, sample); })
337492
338
-size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, int val)
493
+size_t strarray__scnprintf_suffix(struct strarray *sa, char *bf, size_t size, const char *intfmt, bool show_suffix, int val)
339494 {
340495 int idx = val - sa->offset;
341496
342
- if (idx < 0 || idx >= sa->nr_entries || sa->entries[idx] == NULL)
343
- return scnprintf(bf, size, intfmt, val);
497
+ if (idx < 0 || idx >= sa->nr_entries || sa->entries[idx] == NULL) {
498
+ size_t printed = scnprintf(bf, size, intfmt, val);
499
+ if (show_suffix)
500
+ printed += scnprintf(bf + printed, size - printed, " /* %s??? */", sa->prefix);
501
+ return printed;
502
+ }
344503
345
- return scnprintf(bf, size, "%s", sa->entries[idx]);
504
+ return scnprintf(bf, size, "%s%s", sa->entries[idx], show_suffix ? sa->prefix : "");
505
+}
506
+
507
+size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, bool show_prefix, int val)
508
+{
509
+ int idx = val - sa->offset;
510
+
511
+ if (idx < 0 || idx >= sa->nr_entries || sa->entries[idx] == NULL) {
512
+ size_t printed = scnprintf(bf, size, intfmt, val);
513
+ if (show_prefix)
514
+ printed += scnprintf(bf + printed, size - printed, " /* %s??? */", sa->prefix);
515
+ return printed;
516
+ }
517
+
518
+ return scnprintf(bf, size, "%s%s", show_prefix ? sa->prefix : "", sa->entries[idx]);
346519 }
347520
348521 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
349522 const char *intfmt,
350523 struct syscall_arg *arg)
351524 {
352
- return strarray__scnprintf(arg->parm, bf, size, intfmt, arg->val);
525
+ return strarray__scnprintf(arg->parm, bf, size, intfmt, arg->show_string_prefix, arg->val);
353526 }
354527
355528 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
....@@ -360,34 +533,123 @@
360533
361534 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
362535
363
-struct strarrays {
364
- int nr_entries;
365
- struct strarray **entries;
366
-};
536
+bool syscall_arg__strtoul_strarray(char *bf, size_t size, struct syscall_arg *arg, u64 *ret)
537
+{
538
+ return strarray__strtoul(arg->parm, bf, size, ret);
539
+}
367540
368
-#define DEFINE_STRARRAYS(array) struct strarrays strarrays__##array = { \
369
- .nr_entries = ARRAY_SIZE(array), \
370
- .entries = array, \
541
+bool syscall_arg__strtoul_strarray_flags(char *bf, size_t size, struct syscall_arg *arg, u64 *ret)
542
+{
543
+ return strarray__strtoul_flags(arg->parm, bf, size, ret);
544
+}
545
+
546
+bool syscall_arg__strtoul_strarrays(char *bf, size_t size, struct syscall_arg *arg, u64 *ret)
547
+{
548
+ return strarrays__strtoul(arg->parm, bf, size, ret);
549
+}
550
+
551
+size_t syscall_arg__scnprintf_strarray_flags(char *bf, size_t size, struct syscall_arg *arg)
552
+{
553
+ return strarray__scnprintf_flags(arg->parm, bf, size, arg->show_string_prefix, arg->val);
554
+}
555
+
556
+size_t strarrays__scnprintf(struct strarrays *sas, char *bf, size_t size, const char *intfmt, bool show_prefix, int val)
557
+{
558
+ size_t printed;
559
+ int i;
560
+
561
+ for (i = 0; i < sas->nr_entries; ++i) {
562
+ struct strarray *sa = sas->entries[i];
563
+ int idx = val - sa->offset;
564
+
565
+ if (idx >= 0 && idx < sa->nr_entries) {
566
+ if (sa->entries[idx] == NULL)
567
+ break;
568
+ return scnprintf(bf, size, "%s%s", show_prefix ? sa->prefix : "", sa->entries[idx]);
569
+ }
570
+ }
571
+
572
+ printed = scnprintf(bf, size, intfmt, val);
573
+ if (show_prefix)
574
+ printed += scnprintf(bf + printed, size - printed, " /* %s??? */", sas->entries[0]->prefix);
575
+ return printed;
576
+}
577
+
578
+bool strarray__strtoul(struct strarray *sa, char *bf, size_t size, u64 *ret)
579
+{
580
+ int i;
581
+
582
+ for (i = 0; i < sa->nr_entries; ++i) {
583
+ if (sa->entries[i] && strncmp(sa->entries[i], bf, size) == 0 && sa->entries[i][size] == '\0') {
584
+ *ret = sa->offset + i;
585
+ return true;
586
+ }
587
+ }
588
+
589
+ return false;
590
+}
591
+
592
+bool strarray__strtoul_flags(struct strarray *sa, char *bf, size_t size, u64 *ret)
593
+{
594
+ u64 val = 0;
595
+ char *tok = bf, *sep, *end;
596
+
597
+ *ret = 0;
598
+
599
+ while (size != 0) {
600
+ int toklen = size;
601
+
602
+ sep = memchr(tok, '|', size);
603
+ if (sep != NULL) {
604
+ size -= sep - tok + 1;
605
+
606
+ end = sep - 1;
607
+ while (end > tok && isspace(*end))
608
+ --end;
609
+
610
+ toklen = end - tok + 1;
611
+ }
612
+
613
+ while (isspace(*tok))
614
+ ++tok;
615
+
616
+ if (isalpha(*tok) || *tok == '_') {
617
+ if (!strarray__strtoul(sa, tok, toklen, &val))
618
+ return false;
619
+ } else {
620
+ bool is_hexa = tok[0] == 0 && (tok[1] = 'x' || tok[1] == 'X');
621
+
622
+ val = strtoul(tok, NULL, is_hexa ? 16 : 0);
623
+ }
624
+
625
+ *ret |= (1 << (val - 1));
626
+
627
+ if (sep == NULL)
628
+ break;
629
+ tok = sep + 1;
630
+ }
631
+
632
+ return true;
633
+}
634
+
635
+bool strarrays__strtoul(struct strarrays *sas, char *bf, size_t size, u64 *ret)
636
+{
637
+ int i;
638
+
639
+ for (i = 0; i < sas->nr_entries; ++i) {
640
+ struct strarray *sa = sas->entries[i];
641
+
642
+ if (strarray__strtoul(sa, bf, size, ret))
643
+ return true;
644
+ }
645
+
646
+ return false;
371647 }
372648
373649 size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size,
374650 struct syscall_arg *arg)
375651 {
376
- struct strarrays *sas = arg->parm;
377
- int i;
378
-
379
- for (i = 0; i < sas->nr_entries; ++i) {
380
- struct strarray *sa = sas->entries[i];
381
- int idx = arg->val - sa->offset;
382
-
383
- if (idx >= 0 && idx < sa->nr_entries) {
384
- if (sa->entries[idx] == NULL)
385
- break;
386
- return scnprintf(bf, size, "%s", sa->entries[idx]);
387
- }
388
- }
389
-
390
- return scnprintf(bf, size, "%d", arg->val);
652
+ return strarrays__scnprintf(arg->parm, bf, size, "%d", arg->show_string_prefix, arg->val);
391653 }
392654
393655 #ifndef AT_FDCWD
....@@ -398,9 +660,10 @@
398660 struct syscall_arg *arg)
399661 {
400662 int fd = arg->val;
663
+ const char *prefix = "AT_FD";
401664
402665 if (fd == AT_FDCWD)
403
- return scnprintf(bf, size, "CWD");
666
+ return scnprintf(bf, size, "%s%s", arg->show_string_prefix ? prefix : "", "CWD");
404667
405668 return syscall_arg__scnprintf_fd(bf, size, arg);
406669 }
....@@ -417,6 +680,13 @@
417680 return scnprintf(bf, size, "%#lx", arg->val);
418681 }
419682
683
+size_t syscall_arg__scnprintf_ptr(char *bf, size_t size, struct syscall_arg *arg)
684
+{
685
+ if (arg->val == 0)
686
+ return scnprintf(bf, size, "NULL");
687
+ return syscall_arg__scnprintf_hex(bf, size, arg);
688
+}
689
+
420690 size_t syscall_arg__scnprintf_int(char *bf, size_t size, struct syscall_arg *arg)
421691 {
422692 return scnprintf(bf, size, "%d", arg->val);
....@@ -427,17 +697,36 @@
427697 return scnprintf(bf, size, "%ld", arg->val);
428698 }
429699
700
+static size_t syscall_arg__scnprintf_char_array(char *bf, size_t size, struct syscall_arg *arg)
701
+{
702
+ // XXX Hey, maybe for sched:sched_switch prev/next comm fields we can
703
+ // fill missing comms using thread__set_comm()...
704
+ // here or in a special syscall_arg__scnprintf_pid_sched_tp...
705
+ return scnprintf(bf, size, "\"%-.*s\"", arg->fmt->nr_entries ?: arg->len, arg->val);
706
+}
707
+
708
+#define SCA_CHAR_ARRAY syscall_arg__scnprintf_char_array
709
+
430710 static const char *bpf_cmd[] = {
431711 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
432712 "MAP_GET_NEXT_KEY", "PROG_LOAD",
433713 };
434
-static DEFINE_STRARRAY(bpf_cmd);
714
+static DEFINE_STRARRAY(bpf_cmd, "BPF_");
715
+
716
+static const char *fsmount_flags[] = {
717
+ [1] = "CLOEXEC",
718
+};
719
+static DEFINE_STRARRAY(fsmount_flags, "FSMOUNT_");
720
+
721
+#include "trace/beauty/generated/fsconfig_arrays.c"
722
+
723
+static DEFINE_STRARRAY(fsconfig_cmds, "FSCONFIG_");
435724
436725 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
437
-static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
726
+static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, "EPOLL_CTL_", 1);
438727
439728 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
440
-static DEFINE_STRARRAY(itimers);
729
+static DEFINE_STRARRAY(itimers, "ITIMER_");
441730
442731 static const char *keyctl_options[] = {
443732 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
....@@ -446,7 +735,7 @@
446735 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
447736 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
448737 };
449
-static DEFINE_STRARRAY(keyctl_options);
738
+static DEFINE_STRARRAY(keyctl_options, "KEYCTL_");
450739
451740 static const char *whences[] = { "SET", "CUR", "END",
452741 #ifdef SEEK_DATA
....@@ -456,7 +745,7 @@
456745 "HOLE",
457746 #endif
458747 };
459
-static DEFINE_STRARRAY(whences);
748
+static DEFINE_STRARRAY(whences, "SEEK_");
460749
461750 static const char *fcntl_cmds[] = {
462751 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
....@@ -464,7 +753,7 @@
464753 "SETLK64", "SETLKW64", "SETOWN_EX", "GETOWN_EX",
465754 "GETOWNER_UIDS",
466755 };
467
-static DEFINE_STRARRAY(fcntl_cmds);
756
+static DEFINE_STRARRAY(fcntl_cmds, "F_");
468757
469758 static const char *fcntl_linux_specific_cmds[] = {
470759 "SETLEASE", "GETLEASE", "NOTIFY", [5] = "CANCELLK", "DUPFD_CLOEXEC",
....@@ -472,7 +761,7 @@
472761 "GET_RW_HINT", "SET_RW_HINT", "GET_FILE_RW_HINT", "SET_FILE_RW_HINT",
473762 };
474763
475
-static DEFINE_STRARRAY_OFFSET(fcntl_linux_specific_cmds, F_LINUX_SPECIFIC_BASE);
764
+static DEFINE_STRARRAY_OFFSET(fcntl_linux_specific_cmds, "F_", F_LINUX_SPECIFIC_BASE);
476765
477766 static struct strarray *fcntl_cmds_arrays[] = {
478767 &strarray__fcntl_cmds,
....@@ -486,39 +775,31 @@
486775 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
487776 "RTTIME",
488777 };
489
-static DEFINE_STRARRAY(rlimit_resources);
778
+static DEFINE_STRARRAY(rlimit_resources, "RLIMIT_");
490779
491780 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
492
-static DEFINE_STRARRAY(sighow);
781
+static DEFINE_STRARRAY(sighow, "SIG_");
493782
494783 static const char *clockid[] = {
495784 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
496785 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
497786 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
498787 };
499
-static DEFINE_STRARRAY(clockid);
500
-
501
-static const char *socket_families[] = {
502
- "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
503
- "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
504
- "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
505
- "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
506
- "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
507
- "ALG", "NFC", "VSOCK",
508
-};
509
-static DEFINE_STRARRAY(socket_families);
788
+static DEFINE_STRARRAY(clockid, "CLOCK_");
510789
511790 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
512791 struct syscall_arg *arg)
513792 {
793
+ bool show_prefix = arg->show_string_prefix;
794
+ const char *suffix = "_OK";
514795 size_t printed = 0;
515796 int mode = arg->val;
516797
517798 if (mode == F_OK) /* 0 */
518
- return scnprintf(bf, size, "F");
799
+ return scnprintf(bf, size, "F%s", show_prefix ? suffix : "");
519800 #define P_MODE(n) \
520801 if (mode & n##_OK) { \
521
- printed += scnprintf(bf + printed, size - printed, "%s", #n); \
802
+ printed += scnprintf(bf + printed, size - printed, "%s%s", #n, show_prefix ? suffix : ""); \
522803 mode &= ~n##_OK; \
523804 }
524805
....@@ -543,11 +824,13 @@
543824 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
544825 struct syscall_arg *arg)
545826 {
827
+ bool show_prefix = arg->show_string_prefix;
828
+ const char *prefix = "O_";
546829 int printed = 0, flags = arg->val;
547830
548831 #define P_FLAG(n) \
549832 if (flags & O_##n) { \
550
- printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
833
+ printed += scnprintf(bf + printed, size - printed, "%s%s%s", printed ? "|" : "", show_prefix ? prefix : "", #n); \
551834 flags &= ~O_##n; \
552835 }
553836
....@@ -573,11 +856,13 @@
573856 static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
574857 struct syscall_arg *arg)
575858 {
859
+ bool show_prefix = arg->show_string_prefix;
860
+ const char *prefix = "GRND_";
576861 int printed = 0, flags = arg->val;
577862
578863 #define P_FLAG(n) \
579864 if (flags & GRND_##n) { \
580
- printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
865
+ printed += scnprintf(bf + printed, size - printed, "%s%s%s", printed ? "|" : "", show_prefix ? prefix : "", #n); \
581866 flags &= ~GRND_##n; \
582867 }
583868
....@@ -595,6 +880,12 @@
595880
596881 #define STRARRAY(name, array) \
597882 { .scnprintf = SCA_STRARRAY, \
883
+ .strtoul = STUL_STRARRAY, \
884
+ .parm = &strarray__##array, }
885
+
886
+#define STRARRAY_FLAGS(name, array) \
887
+ { .scnprintf = SCA_STRARRAY_FLAGS, \
888
+ .strtoul = STUL_STRARRAY_FLAGS, \
598889 .parm = &strarray__##array, }
599890
600891 #include "trace/beauty/arch_errno_names.c"
....@@ -613,28 +904,20 @@
613904 #include "trace/beauty/socket_type.c"
614905 #include "trace/beauty/waitid_options.c"
615906
616
-struct syscall_arg_fmt {
617
- size_t (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
618
- void *parm;
619
- const char *name;
620
- bool show_zero;
621
-};
622
-
623
-static struct syscall_fmt {
624
- const char *name;
625
- const char *alias;
626
- struct syscall_arg_fmt arg[6];
627
- u8 nr_args;
628
- bool errpid;
629
- bool timeout;
630
- bool hexret;
631
-} syscall_fmts[] = {
907
+static struct syscall_fmt syscall_fmts[] = {
632908 { .name = "access",
633909 .arg = { [1] = { .scnprintf = SCA_ACCMODE, /* mode */ }, }, },
910
+ { .name = "arch_prctl",
911
+ .arg = { [0] = { .scnprintf = SCA_X86_ARCH_PRCTL_CODE, /* code */ },
912
+ [1] = { .scnprintf = SCA_PTR, /* arg2 */ }, }, },
913
+ { .name = "bind",
914
+ .arg = { [0] = { .scnprintf = SCA_INT, /* fd */ },
915
+ [1] = { .scnprintf = SCA_SOCKADDR, /* umyaddr */ },
916
+ [2] = { .scnprintf = SCA_INT, /* addrlen */ }, }, },
634917 { .name = "bpf",
635918 .arg = { [0] = STRARRAY(cmd, bpf_cmd), }, },
636919 { .name = "brk", .hexret = true,
637
- .arg = { [0] = { .scnprintf = SCA_HEX, /* brk */ }, }, },
920
+ .arg = { [0] = { .scnprintf = SCA_PTR, /* brk */ }, }, },
638921 { .name = "clock_gettime",
639922 .arg = { [0] = STRARRAY(clk_id, clockid), }, },
640923 { .name = "clone", .errpid = true, .nr_args = 5,
....@@ -645,6 +928,10 @@
645928 [4] = { .name = "tls", .scnprintf = SCA_HEX, }, }, },
646929 { .name = "close",
647930 .arg = { [0] = { .scnprintf = SCA_CLOSE_FD, /* fd */ }, }, },
931
+ { .name = "connect",
932
+ .arg = { [0] = { .scnprintf = SCA_INT, /* fd */ },
933
+ [1] = { .scnprintf = SCA_SOCKADDR, /* servaddr */ },
934
+ [2] = { .scnprintf = SCA_INT, /* addrlen */ }, }, },
648935 { .name = "epoll_ctl",
649936 .arg = { [1] = STRARRAY(op, epoll_ctl_ops), }, },
650937 { .name = "eventfd2",
....@@ -654,12 +941,22 @@
654941 { .name = "fchownat",
655942 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
656943 { .name = "fcntl",
657
- .arg = { [1] = { .scnprintf = SCA_FCNTL_CMD, /* cmd */
944
+ .arg = { [1] = { .scnprintf = SCA_FCNTL_CMD, /* cmd */
945
+ .strtoul = STUL_STRARRAYS,
658946 .parm = &strarrays__fcntl_cmds_arrays,
659947 .show_zero = true, },
660948 [2] = { .scnprintf = SCA_FCNTL_ARG, /* arg */ }, }, },
661949 { .name = "flock",
662950 .arg = { [1] = { .scnprintf = SCA_FLOCK, /* cmd */ }, }, },
951
+ { .name = "fsconfig",
952
+ .arg = { [1] = STRARRAY(cmd, fsconfig_cmds), }, },
953
+ { .name = "fsmount",
954
+ .arg = { [1] = STRARRAY_FLAGS(flags, fsmount_flags),
955
+ [2] = { .scnprintf = SCA_FSMOUNT_ATTR_FLAGS, /* attr_flags */ }, }, },
956
+ { .name = "fspick",
957
+ .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ },
958
+ [1] = { .scnprintf = SCA_FILENAME, /* path */ },
959
+ [2] = { .scnprintf = SCA_FSPICK_FLAGS, /* flags */ }, }, },
663960 { .name = "fstat", .alias = "newfstat", },
664961 { .name = "fstatat", .alias = "newfstatat", },
665962 { .name = "futex",
....@@ -710,31 +1007,33 @@
7101007 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
7111008 { .name = "mknodat",
7121009 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
713
- { .name = "mlock",
714
- .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
715
- { .name = "mlockall",
716
- .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
7171010 { .name = "mmap", .hexret = true,
7181011 /* The standard mmap maps to old_mmap on s390x */
7191012 #if defined(__s390x__)
7201013 .alias = "old_mmap",
7211014 #endif
722
- .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ },
723
- [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ },
724
- [3] = { .scnprintf = SCA_MMAP_FLAGS, /* flags */ }, }, },
1015
+ .arg = { [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ },
1016
+ [3] = { .scnprintf = SCA_MMAP_FLAGS, /* flags */
1017
+ .strtoul = STUL_STRARRAY_FLAGS,
1018
+ .parm = &strarray__mmap_flags, },
1019
+ [5] = { .scnprintf = SCA_HEX, /* offset */ }, }, },
1020
+ { .name = "mount",
1021
+ .arg = { [0] = { .scnprintf = SCA_FILENAME, /* dev_name */ },
1022
+ [3] = { .scnprintf = SCA_MOUNT_FLAGS, /* flags */
1023
+ .mask_val = SCAMV_MOUNT_FLAGS, /* flags */ }, }, },
1024
+ { .name = "move_mount",
1025
+ .arg = { [0] = { .scnprintf = SCA_FDAT, /* from_dfd */ },
1026
+ [1] = { .scnprintf = SCA_FILENAME, /* from_pathname */ },
1027
+ [2] = { .scnprintf = SCA_FDAT, /* to_dfd */ },
1028
+ [3] = { .scnprintf = SCA_FILENAME, /* to_pathname */ },
1029
+ [4] = { .scnprintf = SCA_MOVE_MOUNT_FLAGS, /* flags */ }, }, },
7251030 { .name = "mprotect",
7261031 .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ },
7271032 [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, }, },
7281033 { .name = "mq_unlink",
7291034 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* u_name */ }, }, },
7301035 { .name = "mremap", .hexret = true,
731
- .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ },
732
- [3] = { .scnprintf = SCA_MREMAP_FLAGS, /* flags */ },
733
- [4] = { .scnprintf = SCA_HEX, /* new_addr */ }, }, },
734
- { .name = "munlock",
735
- .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
736
- { .name = "munmap",
737
- .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
1036
+ .arg = { [3] = { .scnprintf = SCA_MREMAP_FLAGS, /* flags */ }, }, },
7381037 { .name = "name_to_handle_at",
7391038 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
7401039 { .name = "newfstatat",
....@@ -763,8 +1062,10 @@
7631062 [3] = { .scnprintf = SCA_INT, /* pkey */ }, }, },
7641063 { .name = "poll", .timeout = true, },
7651064 { .name = "ppoll", .timeout = true, },
766
- { .name = "prctl", .alias = "arch_prctl",
767
- .arg = { [0] = { .scnprintf = SCA_PRCTL_OPTION, /* option */ },
1065
+ { .name = "prctl",
1066
+ .arg = { [0] = { .scnprintf = SCA_PRCTL_OPTION, /* option */
1067
+ .strtoul = STUL_STRARRAY,
1068
+ .parm = &strarray__prctl_options, },
7681069 [1] = { .scnprintf = SCA_PRCTL_ARG2, /* arg2 */ },
7691070 [2] = { .scnprintf = SCA_PRCTL_ARG3, /* arg3 */ }, }, },
7701071 { .name = "pread", .alias = "pread64", },
....@@ -781,7 +1082,12 @@
7811082 { .name = "recvmsg",
7821083 .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
7831084 { .name = "renameat",
784
- .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1085
+ .arg = { [0] = { .scnprintf = SCA_FDAT, /* olddirfd */ },
1086
+ [2] = { .scnprintf = SCA_FDAT, /* newdirfd */ }, }, },
1087
+ { .name = "renameat2",
1088
+ .arg = { [0] = { .scnprintf = SCA_FDAT, /* olddirfd */ },
1089
+ [2] = { .scnprintf = SCA_FDAT, /* newdirfd */ },
1090
+ [4] = { .scnprintf = SCA_RENAMEAT2_FLAGS, /* flags */ }, }, },
7851091 { .name = "rt_sigaction",
7861092 .arg = { [0] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
7871093 { .name = "rt_sigprocmask",
....@@ -796,12 +1102,14 @@
7961102 .arg = { [0] = { .scnprintf = SCA_SECCOMP_OP, /* op */ },
7971103 [1] = { .scnprintf = SCA_SECCOMP_FLAGS, /* flags */ }, }, },
7981104 { .name = "select", .timeout = true, },
1105
+ { .name = "sendfile", .alias = "sendfile64", },
7991106 { .name = "sendmmsg",
8001107 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
8011108 { .name = "sendmsg",
8021109 .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
8031110 { .name = "sendto",
804
- .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
1111
+ .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ },
1112
+ [4] = { .scnprintf = SCA_SOCKADDR, /* addr */ }, }, },
8051113 { .name = "set_tid_address", .errpid = true, },
8061114 { .name = "setitimer",
8071115 .arg = { [0] = STRARRAY(which, itimers), }, },
....@@ -826,10 +1134,14 @@
8261134 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, },
8271135 { .name = "symlinkat",
8281136 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1137
+ { .name = "sync_file_range",
1138
+ .arg = { [3] = { .scnprintf = SCA_SYNC_FILE_RANGE_FLAGS, /* flags */ }, }, },
8291139 { .name = "tgkill",
8301140 .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
8311141 { .name = "tkill",
8321142 .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1143
+ { .name = "umount2", .alias = "umount",
1144
+ .arg = { [0] = { .scnprintf = SCA_FILENAME, /* name */ }, }, },
8331145 { .name = "uname", .alias = "newuname", },
8341146 { .name = "unlinkat",
8351147 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
....@@ -847,25 +1159,66 @@
8471159 return strcmp(name, fmt->name);
8481160 }
8491161
1162
+static struct syscall_fmt *__syscall_fmt__find(struct syscall_fmt *fmts, const int nmemb, const char *name)
1163
+{
1164
+ return bsearch(name, fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1165
+}
1166
+
8501167 static struct syscall_fmt *syscall_fmt__find(const char *name)
8511168 {
8521169 const int nmemb = ARRAY_SIZE(syscall_fmts);
853
- return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1170
+ return __syscall_fmt__find(syscall_fmts, nmemb, name);
1171
+}
1172
+
1173
+static struct syscall_fmt *__syscall_fmt__find_by_alias(struct syscall_fmt *fmts, const int nmemb, const char *alias)
1174
+{
1175
+ int i;
1176
+
1177
+ for (i = 0; i < nmemb; ++i) {
1178
+ if (fmts[i].alias && strcmp(fmts[i].alias, alias) == 0)
1179
+ return &fmts[i];
1180
+ }
1181
+
1182
+ return NULL;
1183
+}
1184
+
1185
+static struct syscall_fmt *syscall_fmt__find_by_alias(const char *alias)
1186
+{
1187
+ const int nmemb = ARRAY_SIZE(syscall_fmts);
1188
+ return __syscall_fmt__find_by_alias(syscall_fmts, nmemb, alias);
8541189 }
8551190
8561191 /*
8571192 * is_exit: is this "exit" or "exit_group"?
8581193 * is_open: is this "open" or "openat"? To associate the fd returned in sys_exit with the pathname in sys_enter.
1194
+ * args_size: sum of the sizes of the syscall arguments, anything after that is augmented stuff: pathname for openat, etc.
1195
+ * nonexistent: Just a hole in the syscall table, syscall id not allocated
8591196 */
8601197 struct syscall {
861
- struct event_format *tp_format;
1198
+ struct tep_event *tp_format;
8621199 int nr_args;
1200
+ int args_size;
1201
+ struct {
1202
+ struct bpf_program *sys_enter,
1203
+ *sys_exit;
1204
+ } bpf_prog;
8631205 bool is_exit;
8641206 bool is_open;
865
- struct format_field *args;
1207
+ bool nonexistent;
1208
+ struct tep_format_field *args;
8661209 const char *name;
8671210 struct syscall_fmt *fmt;
8681211 struct syscall_arg_fmt *arg_fmt;
1212
+};
1213
+
1214
+/*
1215
+ * Must match what is in the BPF program:
1216
+ *
1217
+ * tools/perf/examples/bpf/augmented_raw_syscalls.c
1218
+ */
1219
+struct bpf_map_syscall_entry {
1220
+ bool enabled;
1221
+ u16 string_args_len[RAW_SYSCALL_ARGS_NUM];
8691222 };
8701223
8711224 /*
....@@ -914,9 +1267,9 @@
9141267 char *name;
9151268 } filename;
9161269 struct {
917
- int max;
918
- char **table;
919
- } paths;
1270
+ int max;
1271
+ struct file *table;
1272
+ } files;
9201273
9211274 struct intlist *syscall_stats;
9221275 };
....@@ -925,10 +1278,10 @@
9251278 {
9261279 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
9271280
928
- if (ttrace)
929
- ttrace->paths.max = -1;
930
-
931
- ttrace->syscall_stats = intlist__new(NULL);
1281
+ if (ttrace) {
1282
+ ttrace->files.max = -1;
1283
+ ttrace->syscall_stats = intlist__new(NULL);
1284
+ }
9321285
9331286 return ttrace;
9341287 }
....@@ -970,30 +1323,51 @@
9701323
9711324 static const size_t trace__entry_str_size = 2048;
9721325
1326
+static struct file *thread_trace__files_entry(struct thread_trace *ttrace, int fd)
1327
+{
1328
+ if (fd < 0)
1329
+ return NULL;
1330
+
1331
+ if (fd > ttrace->files.max) {
1332
+ struct file *nfiles = realloc(ttrace->files.table, (fd + 1) * sizeof(struct file));
1333
+
1334
+ if (nfiles == NULL)
1335
+ return NULL;
1336
+
1337
+ if (ttrace->files.max != -1) {
1338
+ memset(nfiles + ttrace->files.max + 1, 0,
1339
+ (fd - ttrace->files.max) * sizeof(struct file));
1340
+ } else {
1341
+ memset(nfiles, 0, (fd + 1) * sizeof(struct file));
1342
+ }
1343
+
1344
+ ttrace->files.table = nfiles;
1345
+ ttrace->files.max = fd;
1346
+ }
1347
+
1348
+ return ttrace->files.table + fd;
1349
+}
1350
+
1351
+struct file *thread__files_entry(struct thread *thread, int fd)
1352
+{
1353
+ return thread_trace__files_entry(thread__priv(thread), fd);
1354
+}
1355
+
9731356 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
9741357 {
9751358 struct thread_trace *ttrace = thread__priv(thread);
1359
+ struct file *file = thread_trace__files_entry(ttrace, fd);
9761360
977
- if (fd > ttrace->paths.max) {
978
- char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
979
-
980
- if (npath == NULL)
981
- return -1;
982
-
983
- if (ttrace->paths.max != -1) {
984
- memset(npath + ttrace->paths.max + 1, 0,
985
- (fd - ttrace->paths.max) * sizeof(char *));
986
- } else {
987
- memset(npath, 0, (fd + 1) * sizeof(char *));
988
- }
989
-
990
- ttrace->paths.table = npath;
991
- ttrace->paths.max = fd;
1361
+ if (file != NULL) {
1362
+ struct stat st;
1363
+ if (stat(pathname, &st) == 0)
1364
+ file->dev_maj = major(st.st_rdev);
1365
+ file->pathname = strdup(pathname);
1366
+ if (file->pathname)
1367
+ return 0;
9921368 }
9931369
994
- ttrace->paths.table[fd] = strdup(pathname);
995
-
996
- return ttrace->paths.table[fd] != NULL ? 0 : -1;
1370
+ return -1;
9971371 }
9981372
9991373 static int thread__read_fd_path(struct thread *thread, int fd)
....@@ -1027,13 +1401,13 @@
10271401 {
10281402 struct thread_trace *ttrace = thread__priv(thread);
10291403
1030
- if (ttrace == NULL)
1404
+ if (ttrace == NULL || trace->fd_path_disabled)
10311405 return NULL;
10321406
10331407 if (fd < 0)
10341408 return NULL;
10351409
1036
- if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1410
+ if ((fd > ttrace->files.max || ttrace->files.table[fd].pathname == NULL)) {
10371411 if (!trace->live)
10381412 return NULL;
10391413 ++trace->stats.proc_getname;
....@@ -1041,7 +1415,7 @@
10411415 return NULL;
10421416 }
10431417
1044
- return ttrace->paths.table[fd];
1418
+ return ttrace->files.table[fd].pathname;
10451419 }
10461420
10471421 size_t syscall_arg__scnprintf_fd(char *bf, size_t size, struct syscall_arg *arg)
....@@ -1080,8 +1454,8 @@
10801454 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
10811455 struct thread_trace *ttrace = thread__priv(arg->thread);
10821456
1083
- if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1084
- zfree(&ttrace->paths.table[fd]);
1457
+ if (ttrace && fd >= 0 && fd <= ttrace->files.max)
1458
+ zfree(&ttrace->files.table[fd].pathname);
10851459
10861460 return printed;
10871461 }
....@@ -1095,10 +1469,29 @@
10951469 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
10961470 }
10971471
1472
+static size_t syscall_arg__scnprintf_augmented_string(struct syscall_arg *arg, char *bf, size_t size)
1473
+{
1474
+ struct augmented_arg *augmented_arg = arg->augmented.args;
1475
+ size_t printed = scnprintf(bf, size, "\"%.*s\"", augmented_arg->size, augmented_arg->value);
1476
+ /*
1477
+ * So that the next arg with a payload can consume its augmented arg, i.e. for rename* syscalls
1478
+ * we would have two strings, each prefixed by its size.
1479
+ */
1480
+ int consumed = sizeof(*augmented_arg) + augmented_arg->size;
1481
+
1482
+ arg->augmented.args = ((void *)arg->augmented.args) + consumed;
1483
+ arg->augmented.size -= consumed;
1484
+
1485
+ return printed;
1486
+}
1487
+
10981488 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
10991489 struct syscall_arg *arg)
11001490 {
11011491 unsigned long ptr = arg->val;
1492
+
1493
+ if (arg->augmented.args)
1494
+ return syscall_arg__scnprintf_augmented_string(arg, bf, size);
11021495
11031496 if (!arg->trace->vfs_getname)
11041497 return scnprintf(bf, size, "%#x", ptr);
....@@ -1142,11 +1535,9 @@
11421535 interrupted = sig == SIGINT;
11431536 }
11441537
1145
-static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1146
- u64 duration, bool duration_calculated, u64 tstamp, FILE *fp)
1538
+static size_t trace__fprintf_comm_tid(struct trace *trace, struct thread *thread, FILE *fp)
11471539 {
1148
- size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1149
- printed += fprintf_duration(duration, duration_calculated, fp);
1540
+ size_t printed = 0;
11501541
11511542 if (trace->multiple_threads) {
11521543 if (trace->show_comm)
....@@ -1155,6 +1546,18 @@
11551546 }
11561547
11571548 return printed;
1549
+}
1550
+
1551
+static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1552
+ u64 duration, bool duration_calculated, u64 tstamp, FILE *fp)
1553
+{
1554
+ size_t printed = 0;
1555
+
1556
+ if (trace->show_tstamp)
1557
+ printed = trace__fprintf_tstamp(trace, tstamp, fp);
1558
+ if (trace->show_duration)
1559
+ printed += fprintf_duration(duration, duration_calculated, fp);
1560
+ return printed + trace__fprintf_comm_tid(trace, thread, fp);
11581561 }
11591562
11601563 static int trace__process_event(struct trace *trace, struct machine *machine,
....@@ -1194,7 +1597,7 @@
11941597
11951598 if (symbol_conf.kptr_restrict) {
11961599 pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
1197
- "Check /proc/sys/kernel/kptr_restrict.\n\n"
1600
+ "Check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
11981601 "Kernel samples will not be resolved.\n");
11991602 machine->kptr_restrict_warned = true;
12001603 return NULL;
....@@ -1203,7 +1606,7 @@
12031606 return machine__resolve_kernel_addr(vmachine, addrp, modp);
12041607 }
12051608
1206
-static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1609
+static int trace__symbols_init(struct trace *trace, struct evlist *evlist)
12071610 {
12081611 int err = symbol__init(NULL);
12091612
....@@ -1219,8 +1622,8 @@
12191622 goto out;
12201623
12211624 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1222
- evlist->threads, trace__tool_process, false,
1223
- trace->opts.proc_map_timeout, 1);
1625
+ evlist->core.threads, trace__tool_process, false,
1626
+ 1);
12241627 out:
12251628 if (err)
12261629 symbol__exit();
....@@ -1240,7 +1643,7 @@
12401643 {
12411644 int idx;
12421645
1243
- if (nr_args == 6 && sc->fmt && sc->fmt->nr_args != 0)
1646
+ if (nr_args == RAW_SYSCALL_ARGS_NUM && sc->fmt && sc->fmt->nr_args != 0)
12441647 nr_args = sc->fmt->nr_args;
12451648
12461649 sc->arg_fmt = calloc(nr_args, sizeof(*sc->arg_fmt));
....@@ -1256,31 +1659,60 @@
12561659 return 0;
12571660 }
12581661
1259
-static int syscall__set_arg_fmts(struct syscall *sc)
1260
-{
1261
- struct format_field *field;
1262
- int idx = 0, len;
1662
+static struct syscall_arg_fmt syscall_arg_fmts__by_name[] = {
1663
+ { .name = "msr", .scnprintf = SCA_X86_MSR, .strtoul = STUL_X86_MSR, },
1664
+ { .name = "vector", .scnprintf = SCA_X86_IRQ_VECTORS, .strtoul = STUL_X86_IRQ_VECTORS, },
1665
+};
12631666
1264
- for (field = sc->args; field; field = field->next, ++idx) {
1265
- if (sc->fmt && sc->fmt->arg[idx].scnprintf)
1667
+static int syscall_arg_fmt__cmp(const void *name, const void *fmtp)
1668
+{
1669
+ const struct syscall_arg_fmt *fmt = fmtp;
1670
+ return strcmp(name, fmt->name);
1671
+}
1672
+
1673
+static struct syscall_arg_fmt *
1674
+__syscall_arg_fmt__find_by_name(struct syscall_arg_fmt *fmts, const int nmemb, const char *name)
1675
+{
1676
+ return bsearch(name, fmts, nmemb, sizeof(struct syscall_arg_fmt), syscall_arg_fmt__cmp);
1677
+}
1678
+
1679
+static struct syscall_arg_fmt *syscall_arg_fmt__find_by_name(const char *name)
1680
+{
1681
+ const int nmemb = ARRAY_SIZE(syscall_arg_fmts__by_name);
1682
+ return __syscall_arg_fmt__find_by_name(syscall_arg_fmts__by_name, nmemb, name);
1683
+}
1684
+
1685
+static struct tep_format_field *
1686
+syscall_arg_fmt__init_array(struct syscall_arg_fmt *arg, struct tep_format_field *field)
1687
+{
1688
+ struct tep_format_field *last_field = NULL;
1689
+ int len;
1690
+
1691
+ for (; field; field = field->next, ++arg) {
1692
+ last_field = field;
1693
+
1694
+ if (arg->scnprintf)
12661695 continue;
12671696
1697
+ len = strlen(field->name);
1698
+
12681699 if (strcmp(field->type, "const char *") == 0 &&
1269
- (strcmp(field->name, "filename") == 0 ||
1270
- strcmp(field->name, "path") == 0 ||
1271
- strcmp(field->name, "pathname") == 0))
1272
- sc->arg_fmt[idx].scnprintf = SCA_FILENAME;
1273
- else if (field->flags & FIELD_IS_POINTER)
1274
- sc->arg_fmt[idx].scnprintf = syscall_arg__scnprintf_hex;
1700
+ ((len >= 4 && strcmp(field->name + len - 4, "name") == 0) ||
1701
+ strstr(field->name, "path") != NULL))
1702
+ arg->scnprintf = SCA_FILENAME;
1703
+ else if ((field->flags & TEP_FIELD_IS_POINTER) || strstr(field->name, "addr"))
1704
+ arg->scnprintf = SCA_PTR;
12751705 else if (strcmp(field->type, "pid_t") == 0)
1276
- sc->arg_fmt[idx].scnprintf = SCA_PID;
1706
+ arg->scnprintf = SCA_PID;
12771707 else if (strcmp(field->type, "umode_t") == 0)
1278
- sc->arg_fmt[idx].scnprintf = SCA_MODE_T;
1279
- else if ((strcmp(field->type, "int") == 0 ||
1708
+ arg->scnprintf = SCA_MODE_T;
1709
+ else if ((field->flags & TEP_FIELD_IS_ARRAY) && strstr(field->type, "char")) {
1710
+ arg->scnprintf = SCA_CHAR_ARRAY;
1711
+ arg->nr_entries = field->arraylen;
1712
+ } else if ((strcmp(field->type, "int") == 0 ||
12801713 strcmp(field->type, "unsigned int") == 0 ||
12811714 strcmp(field->type, "long") == 0) &&
1282
- (len = strlen(field->name)) >= 2 &&
1283
- strcmp(field->name + len - 2, "fd") == 0) {
1715
+ len >= 2 && strcmp(field->name + len - 2, "fd") == 0) {
12841716 /*
12851717 * /sys/kernel/tracing/events/syscalls/sys_enter*
12861718 * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c
....@@ -1288,9 +1720,26 @@
12881720 * 23 unsigned int
12891721 * 7 unsigned long
12901722 */
1291
- sc->arg_fmt[idx].scnprintf = SCA_FD;
1723
+ arg->scnprintf = SCA_FD;
1724
+ } else {
1725
+ struct syscall_arg_fmt *fmt = syscall_arg_fmt__find_by_name(field->name);
1726
+
1727
+ if (fmt) {
1728
+ arg->scnprintf = fmt->scnprintf;
1729
+ arg->strtoul = fmt->strtoul;
1730
+ }
12921731 }
12931732 }
1733
+
1734
+ return last_field;
1735
+}
1736
+
1737
+static int syscall__set_arg_fmts(struct syscall *sc)
1738
+{
1739
+ struct tep_format_field *last_field = syscall_arg_fmt__init_array(sc->arg_fmt, sc->args);
1740
+
1741
+ if (last_field)
1742
+ sc->args_size = last_field->offset + last_field->size;
12941743
12951744 return 0;
12961745 }
....@@ -1301,29 +1750,40 @@
13011750 struct syscall *sc;
13021751 const char *name = syscalltbl__name(trace->sctbl, id);
13031752
1304
- if (name == NULL)
1305
- return -1;
1753
+#ifdef HAVE_SYSCALL_TABLE_SUPPORT
1754
+ if (trace->syscalls.table == NULL) {
1755
+ trace->syscalls.table = calloc(trace->sctbl->syscalls.max_id + 1, sizeof(*sc));
1756
+ if (trace->syscalls.table == NULL)
1757
+ return -ENOMEM;
1758
+ }
1759
+#else
1760
+ if (id > trace->sctbl->syscalls.max_id || (id == 0 && trace->syscalls.table == NULL)) {
1761
+ // When using libaudit we don't know beforehand what is the max syscall id
1762
+ struct syscall *table = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
13061763
1307
- if (id > trace->syscalls.max) {
1308
- struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1764
+ if (table == NULL)
1765
+ return -ENOMEM;
13091766
1310
- if (nsyscalls == NULL)
1311
- return -1;
1767
+ // Need to memset from offset 0 and +1 members if brand new
1768
+ if (trace->syscalls.table == NULL)
1769
+ memset(table, 0, (id + 1) * sizeof(*sc));
1770
+ else
1771
+ memset(table + trace->sctbl->syscalls.max_id + 1, 0, (id - trace->sctbl->syscalls.max_id) * sizeof(*sc));
13121772
1313
- if (trace->syscalls.max != -1) {
1314
- memset(nsyscalls + trace->syscalls.max + 1, 0,
1315
- (id - trace->syscalls.max) * sizeof(*sc));
1316
- } else {
1317
- memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1318
- }
1773
+ trace->syscalls.table = table;
1774
+ trace->sctbl->syscalls.max_id = id;
1775
+ }
1776
+#endif
1777
+ sc = trace->syscalls.table + id;
1778
+ if (sc->nonexistent)
1779
+ return -EEXIST;
13191780
1320
- trace->syscalls.table = nsyscalls;
1321
- trace->syscalls.max = id;
1781
+ if (name == NULL) {
1782
+ sc->nonexistent = true;
1783
+ return -EEXIST;
13221784 }
13231785
1324
- sc = trace->syscalls.table + id;
13251786 sc->name = name;
1326
-
13271787 sc->fmt = syscall_fmt__find(sc->name);
13281788
13291789 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
....@@ -1334,11 +1794,18 @@
13341794 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
13351795 }
13361796
1337
- if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? 6 : sc->tp_format->format.nr_fields))
1338
- return -1;
1797
+ /*
1798
+ * Fails to read trace point format via sysfs node, so the trace point
1799
+ * doesn't exist. Set the 'nonexistent' flag as true.
1800
+ */
1801
+ if (IS_ERR(sc->tp_format)) {
1802
+ sc->nonexistent = true;
1803
+ return PTR_ERR(sc->tp_format);
1804
+ }
13391805
1340
- if (IS_ERR(sc->tp_format))
1341
- return -1;
1806
+ if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ?
1807
+ RAW_SYSCALL_ARGS_NUM : sc->tp_format->format.nr_fields))
1808
+ return -ENOMEM;
13421809
13431810 sc->args = sc->tp_format->format.fields;
13441811 /*
....@@ -1357,14 +1824,33 @@
13571824 return syscall__set_arg_fmts(sc);
13581825 }
13591826
1827
+static int evsel__init_tp_arg_scnprintf(struct evsel *evsel)
1828
+{
1829
+ struct syscall_arg_fmt *fmt = evsel__syscall_arg_fmt(evsel);
1830
+
1831
+ if (fmt != NULL) {
1832
+ syscall_arg_fmt__init_array(fmt, evsel->tp_format->format.fields);
1833
+ return 0;
1834
+ }
1835
+
1836
+ return -ENOMEM;
1837
+}
1838
+
1839
+static int intcmp(const void *a, const void *b)
1840
+{
1841
+ const int *one = a, *another = b;
1842
+
1843
+ return *one - *another;
1844
+}
1845
+
13601846 static int trace__validate_ev_qualifier(struct trace *trace)
13611847 {
1362
- int err = 0, i;
1363
- size_t nr_allocated;
1848
+ int err = 0;
1849
+ bool printed_invalid_prefix = false;
13641850 struct str_node *pos;
1851
+ size_t nr_used = 0, nr_allocated = strlist__nr_entries(trace->ev_qualifier);
13651852
1366
- trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1367
- trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1853
+ trace->ev_qualifier_ids.entries = malloc(nr_allocated *
13681854 sizeof(trace->ev_qualifier_ids.entries[0]));
13691855
13701856 if (trace->ev_qualifier_ids.entries == NULL) {
....@@ -1373,9 +1859,6 @@
13731859 err = -EINVAL;
13741860 goto out;
13751861 }
1376
-
1377
- nr_allocated = trace->ev_qualifier_ids.nr;
1378
- i = 0;
13791862
13801863 strlist__for_each_entry(pos, trace->ev_qualifier) {
13811864 const char *sc = pos->s;
....@@ -1386,17 +1869,18 @@
13861869 if (id >= 0)
13871870 goto matches;
13881871
1389
- if (err == 0) {
1390
- fputs("Error:\tInvalid syscall ", trace->output);
1391
- err = -EINVAL;
1872
+ if (!printed_invalid_prefix) {
1873
+ pr_debug("Skipping unknown syscalls: ");
1874
+ printed_invalid_prefix = true;
13921875 } else {
1393
- fputs(", ", trace->output);
1876
+ pr_debug(", ");
13941877 }
13951878
1396
- fputs(sc, trace->output);
1879
+ pr_debug("%s", sc);
1880
+ continue;
13971881 }
13981882 matches:
1399
- trace->ev_qualifier_ids.entries[i++] = id;
1883
+ trace->ev_qualifier_ids.entries[nr_used++] = id;
14001884 if (match_next == -1)
14011885 continue;
14021886
....@@ -1404,7 +1888,7 @@
14041888 id = syscalltbl__strglobmatch_next(trace->sctbl, sc, &match_next);
14051889 if (id < 0)
14061890 break;
1407
- if (nr_allocated == trace->ev_qualifier_ids.nr) {
1891
+ if (nr_allocated == nr_used) {
14081892 void *entries;
14091893
14101894 nr_allocated += 8;
....@@ -1417,20 +1901,36 @@
14171901 }
14181902 trace->ev_qualifier_ids.entries = entries;
14191903 }
1420
- trace->ev_qualifier_ids.nr++;
1421
- trace->ev_qualifier_ids.entries[i++] = id;
1904
+ trace->ev_qualifier_ids.entries[nr_used++] = id;
14221905 }
14231906 }
14241907
1425
- if (err < 0) {
1426
- fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1427
- "\nHint:\tand: 'man syscalls'\n", trace->output);
1428
-out_free:
1429
- zfree(&trace->ev_qualifier_ids.entries);
1430
- trace->ev_qualifier_ids.nr = 0;
1431
- }
1908
+ trace->ev_qualifier_ids.nr = nr_used;
1909
+ qsort(trace->ev_qualifier_ids.entries, nr_used, sizeof(int), intcmp);
14321910 out:
1911
+ if (printed_invalid_prefix)
1912
+ pr_debug("\n");
14331913 return err;
1914
+out_free:
1915
+ zfree(&trace->ev_qualifier_ids.entries);
1916
+ trace->ev_qualifier_ids.nr = 0;
1917
+ goto out;
1918
+}
1919
+
1920
+static __maybe_unused bool trace__syscall_enabled(struct trace *trace, int id)
1921
+{
1922
+ bool in_ev_qualifier;
1923
+
1924
+ if (trace->ev_qualifier_ids.nr == 0)
1925
+ return true;
1926
+
1927
+ in_ev_qualifier = bsearch(&id, trace->ev_qualifier_ids.entries,
1928
+ trace->ev_qualifier_ids.nr, sizeof(int), intcmp) != NULL;
1929
+
1930
+ if (in_ev_qualifier)
1931
+ return !trace->not_ev_qualifier;
1932
+
1933
+ return trace->not_ev_qualifier;
14341934 }
14351935
14361936 /*
....@@ -1459,31 +1959,49 @@
14591959 return scnprintf(bf, size, "arg%d: ", arg->idx);
14601960 }
14611961
1462
-static size_t syscall__scnprintf_val(struct syscall *sc, char *bf, size_t size,
1463
- struct syscall_arg *arg, unsigned long val)
1962
+/*
1963
+ * Check if the value is in fact zero, i.e. mask whatever needs masking, such
1964
+ * as mount 'flags' argument that needs ignoring some magic flag, see comment
1965
+ * in tools/perf/trace/beauty/mount_flags.c
1966
+ */
1967
+static unsigned long syscall_arg_fmt__mask_val(struct syscall_arg_fmt *fmt, struct syscall_arg *arg, unsigned long val)
14641968 {
1465
- if (sc->arg_fmt && sc->arg_fmt[arg->idx].scnprintf) {
1969
+ if (fmt && fmt->mask_val)
1970
+ return fmt->mask_val(arg, val);
1971
+
1972
+ return val;
1973
+}
1974
+
1975
+static size_t syscall_arg_fmt__scnprintf_val(struct syscall_arg_fmt *fmt, char *bf, size_t size,
1976
+ struct syscall_arg *arg, unsigned long val)
1977
+{
1978
+ if (fmt && fmt->scnprintf) {
14661979 arg->val = val;
1467
- if (sc->arg_fmt[arg->idx].parm)
1468
- arg->parm = sc->arg_fmt[arg->idx].parm;
1469
- return sc->arg_fmt[arg->idx].scnprintf(bf, size, arg);
1980
+ if (fmt->parm)
1981
+ arg->parm = fmt->parm;
1982
+ return fmt->scnprintf(bf, size, arg);
14701983 }
14711984 return scnprintf(bf, size, "%ld", val);
14721985 }
14731986
14741987 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1475
- unsigned char *args, struct trace *trace,
1476
- struct thread *thread)
1988
+ unsigned char *args, void *augmented_args, int augmented_args_size,
1989
+ struct trace *trace, struct thread *thread)
14771990 {
14781991 size_t printed = 0;
14791992 unsigned long val;
14801993 u8 bit = 1;
14811994 struct syscall_arg arg = {
14821995 .args = args,
1996
+ .augmented = {
1997
+ .size = augmented_args_size,
1998
+ .args = augmented_args,
1999
+ },
14832000 .idx = 0,
14842001 .mask = 0,
14852002 .trace = trace,
14862003 .thread = thread,
2004
+ .show_string_prefix = trace->show_string_prefix,
14872005 };
14882006 struct thread_trace *ttrace = thread__priv(thread);
14892007
....@@ -1495,14 +2013,20 @@
14952013 ttrace->ret_scnprintf = NULL;
14962014
14972015 if (sc->args != NULL) {
1498
- struct format_field *field;
2016
+ struct tep_format_field *field;
14992017
15002018 for (field = sc->args; field;
15012019 field = field->next, ++arg.idx, bit <<= 1) {
15022020 if (arg.mask & bit)
15032021 continue;
15042022
2023
+ arg.fmt = &sc->arg_fmt[arg.idx];
15052024 val = syscall_arg__val(&arg, arg.idx);
2025
+ /*
2026
+ * Some syscall args need some mask, most don't and
2027
+ * return val untouched.
2028
+ */
2029
+ val = syscall_arg_fmt__mask_val(&sc->arg_fmt[arg.idx], &arg, val);
15062030
15072031 /*
15082032 * Suppress this argument if its value is zero and
....@@ -1510,6 +2034,7 @@
15102034 * strarray for it.
15112035 */
15122036 if (val == 0 &&
2037
+ !trace->show_zeros &&
15132038 !(sc->arg_fmt &&
15142039 (sc->arg_fmt[arg.idx].show_zero ||
15152040 sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAY ||
....@@ -1517,9 +2042,13 @@
15172042 sc->arg_fmt[arg.idx].parm))
15182043 continue;
15192044
1520
- printed += scnprintf(bf + printed, size - printed,
1521
- "%s%s: ", printed ? ", " : "", field->name);
1522
- printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val);
2045
+ printed += scnprintf(bf + printed, size - printed, "%s", printed ? ", " : "");
2046
+
2047
+ if (trace->show_arg_names)
2048
+ printed += scnprintf(bf + printed, size - printed, "%s: ", field->name);
2049
+
2050
+ printed += syscall_arg_fmt__scnprintf_val(&sc->arg_fmt[arg.idx],
2051
+ bf + printed, size - printed, &arg, val);
15232052 }
15242053 } else if (IS_ERR(sc->tp_format)) {
15252054 /*
....@@ -1534,7 +2063,7 @@
15342063 if (printed)
15352064 printed += scnprintf(bf + printed, size - printed, ", ");
15362065 printed += syscall__scnprintf_name(sc, bf + printed, size - printed, &arg);
1537
- printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val);
2066
+ printed += syscall_arg_fmt__scnprintf_val(&sc->arg_fmt[arg.idx], bf + printed, size - printed, &arg, val);
15382067 next_arg:
15392068 ++arg.idx;
15402069 bit <<= 1;
....@@ -1544,13 +2073,14 @@
15442073 return printed;
15452074 }
15462075
1547
-typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
2076
+typedef int (*tracepoint_handler)(struct trace *trace, struct evsel *evsel,
15482077 union perf_event *event,
15492078 struct perf_sample *sample);
15502079
15512080 static struct syscall *trace__syscall_info(struct trace *trace,
1552
- struct perf_evsel *evsel, int id)
2081
+ struct evsel *evsel, int id)
15532082 {
2083
+ int err = 0;
15542084
15552085 if (id < 0) {
15562086
....@@ -1567,35 +2097,60 @@
15672097 if (verbose > 1) {
15682098 static u64 n;
15692099 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1570
- id, perf_evsel__name(evsel), ++n);
2100
+ id, evsel__name(evsel), ++n);
15712101 }
15722102 return NULL;
15732103 }
15742104
1575
- if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1576
- trace__read_syscall_info(trace, id))
2105
+ err = -EINVAL;
2106
+
2107
+#ifdef HAVE_SYSCALL_TABLE_SUPPORT
2108
+ if (id > trace->sctbl->syscalls.max_id) {
2109
+#else
2110
+ if (id >= trace->sctbl->syscalls.max_id) {
2111
+ /*
2112
+ * With libaudit we don't know beforehand what is the max_id,
2113
+ * so we let trace__read_syscall_info() figure that out as we
2114
+ * go on reading syscalls.
2115
+ */
2116
+ err = trace__read_syscall_info(trace, id);
2117
+ if (err)
2118
+#endif
2119
+ goto out_cant_read;
2120
+ }
2121
+
2122
+ if ((trace->syscalls.table == NULL || trace->syscalls.table[id].name == NULL) &&
2123
+ (err = trace__read_syscall_info(trace, id)) != 0)
15772124 goto out_cant_read;
15782125
1579
- if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
2126
+ if (trace->syscalls.table && trace->syscalls.table[id].nonexistent)
15802127 goto out_cant_read;
15812128
15822129 return &trace->syscalls.table[id];
15832130
15842131 out_cant_read:
15852132 if (verbose > 0) {
1586
- fprintf(trace->output, "Problems reading syscall %d", id);
1587
- if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
2133
+ char sbuf[STRERR_BUFSIZE];
2134
+ fprintf(trace->output, "Problems reading syscall %d: %d (%s)", id, -err, str_error_r(-err, sbuf, sizeof(sbuf)));
2135
+ if (id <= trace->sctbl->syscalls.max_id && trace->syscalls.table[id].name != NULL)
15882136 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
15892137 fputs(" information\n", trace->output);
15902138 }
15912139 return NULL;
15922140 }
15932141
1594
-static void thread__update_stats(struct thread_trace *ttrace,
1595
- int id, struct perf_sample *sample)
2142
+struct syscall_stats {
2143
+ struct stats stats;
2144
+ u64 nr_failures;
2145
+ int max_errno;
2146
+ u32 *errnos;
2147
+};
2148
+
2149
+static void thread__update_stats(struct thread *thread, struct thread_trace *ttrace,
2150
+ int id, struct perf_sample *sample, long err, bool errno_summary)
15962151 {
15972152 struct int_node *inode;
1598
- struct stats *stats;
2153
+ struct syscall_stats *stats;
15992154 u64 duration = 0;
16002155
16012156 inode = intlist__findnew(ttrace->syscall_stats, id);
....@@ -1604,23 +2159,53 @@
16042159
16052160 stats = inode->priv;
16062161 if (stats == NULL) {
1607
- stats = malloc(sizeof(struct stats));
2162
+ stats = malloc(sizeof(*stats));
16082163 if (stats == NULL)
16092164 return;
1610
- init_stats(stats);
2165
+
2166
+ stats->nr_failures = 0;
2167
+ stats->max_errno = 0;
2168
+ stats->errnos = NULL;
2169
+ init_stats(&stats->stats);
16112170 inode->priv = stats;
16122171 }
16132172
16142173 if (ttrace->entry_time && sample->time > ttrace->entry_time)
16152174 duration = sample->time - ttrace->entry_time;
16162175
1617
- update_stats(stats, duration);
2176
+ update_stats(&stats->stats, duration);
2177
+
2178
+ if (err < 0) {
2179
+ ++stats->nr_failures;
2180
+
2181
+ if (!errno_summary)
2182
+ return;
2183
+
2184
+ err = -err;
2185
+ if (err > stats->max_errno) {
2186
+ u32 *new_errnos = realloc(stats->errnos, err * sizeof(u32));
2187
+
2188
+ if (new_errnos) {
2189
+ memset(new_errnos + stats->max_errno, 0, (err - stats->max_errno) * sizeof(u32));
2190
+ } else {
2191
+ pr_debug("Not enough memory for errno stats for thread \"%s\"(%d/%d), results will be incomplete\n",
2192
+ thread__comm_str(thread), thread->pid_, thread->tid);
2193
+ return;
2194
+ }
2195
+
2196
+ stats->errnos = new_errnos;
2197
+ stats->max_errno = err;
2198
+ }
2199
+
2200
+ ++stats->errnos[err - 1];
2201
+ }
16182202 }
16192203
16202204 static int trace__printf_interrupted_entry(struct trace *trace)
16212205 {
16222206 struct thread_trace *ttrace;
16232207 size_t printed;
2208
+ int len;
16242209
16252210 if (trace->failure_only || trace->current == NULL)
16262211 return 0;
....@@ -1631,13 +2216,20 @@
16312216 return 0;
16322217
16332218 printed = trace__fprintf_entry_head(trace, trace->current, 0, false, ttrace->entry_time, trace->output);
1634
- printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
2219
+ printed += len = fprintf(trace->output, "%s)", ttrace->entry_str);
2220
+
2221
+ if (len < trace->args_alignment - 4)
2222
+ printed += fprintf(trace->output, "%-*s", trace->args_alignment - 4 - len, " ");
2223
+
2224
+ printed += fprintf(trace->output, " ...\n");
2225
+
16352226 ttrace->entry_pending = false;
2227
+ ++trace->nr_events_printed;
16362228
16372229 return printed;
16382230 }
16392231
1640
-static int trace__fprintf_sample(struct trace *trace, struct perf_evsel *evsel,
2232
+static int trace__fprintf_sample(struct trace *trace, struct evsel *evsel,
16412233 struct perf_sample *sample, struct thread *thread)
16422234 {
16432235 int printed = 0;
....@@ -1646,7 +2238,7 @@
16462238 double ts = (double)sample->time / NSEC_PER_MSEC;
16472239
16482240 printed += fprintf(trace->output, "%22s %10.3f %s %d/%d [%d]\n",
1649
- perf_evsel__name(evsel), ts,
2241
+ evsel__name(evsel), ts,
16502242 thread__comm_str(thread),
16512243 sample->pid, sample->tid, sample->cpu);
16522244 }
....@@ -1654,15 +2246,43 @@
16542246 return printed;
16552247 }
16562248
1657
-static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
2249
+static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sample, int *augmented_args_size, int raw_augmented_args_size)
2250
+{
2251
+ void *augmented_args = NULL;
2252
+ /*
2253
+ * For now with BPF raw_augmented we hook into raw_syscalls:sys_enter
2254
+ * and there we get all 6 syscall args plus the tracepoint common fields
2255
+ * that gets calculated at the start and the syscall_nr (another long).
2256
+ * So we check if that is the case and if so don't look after the
2257
+ * sc->args_size but always after the full raw_syscalls:sys_enter payload,
2258
+ * which is fixed.
2259
+ *
2260
+ * We'll revisit this later to pass s->args_size to the BPF augmenter
2261
+ * (now tools/perf/examples/bpf/augmented_raw_syscalls.c, so that it
2262
+ * copies only what we need for each syscall, like what happens when we
2263
+ * use syscalls:sys_enter_NAME, so that we reduce the kernel/userspace
2264
+ * traffic to just what is needed for each syscall.
2265
+ */
2266
+ int args_size = raw_augmented_args_size ?: sc->args_size;
2267
+
2268
+ *augmented_args_size = sample->raw_size - args_size;
2269
+ if (*augmented_args_size > 0)
2270
+ augmented_args = sample->raw_data + args_size;
2271
+
2272
+ return augmented_args;
2273
+}
2274
+
2275
+static int trace__sys_enter(struct trace *trace, struct evsel *evsel,
16582276 union perf_event *event __maybe_unused,
16592277 struct perf_sample *sample)
16602278 {
16612279 char *msg;
16622280 void *args;
1663
- size_t printed = 0;
2281
+ int printed = 0;
16642282 struct thread *thread;
16652283 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
2284
+ int augmented_args_size = 0;
2285
+ void *augmented_args = NULL;
16662286 struct syscall *sc = trace__syscall_info(trace, evsel, id);
16672287 struct thread_trace *ttrace;
16682288
....@@ -1686,18 +2306,34 @@
16862306
16872307 if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
16882308 trace__printf_interrupted_entry(trace);
1689
-
2309
+ /*
2310
+ * If this is raw_syscalls.sys_enter, then it always comes with the 6 possible
2311
+ * arguments, even if the syscall being handled, say "openat", uses only 4 arguments
2312
+ * this breaks syscall__augmented_args() check for augmented args, as we calculate
2313
+ * syscall->args_size using each syscalls:sys_enter_NAME tracefs format file,
2314
+ * so when handling, say the openat syscall, we end up getting 6 args for the
2315
+ * raw_syscalls:sys_enter event, when we expected just 4, we end up mistakenly
2316
+ * thinking that the extra 2 u64 args are the augmented filename, so just check
2317
+ * here and avoid using augmented syscalls when the evsel is the raw_syscalls one.
2318
+ */
2319
+ if (evsel != trace->syscalls.events.sys_enter)
2320
+ augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls_args_size);
16902321 ttrace->entry_time = sample->time;
16912322 msg = ttrace->entry_str;
16922323 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
16932324
16942325 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1695
- args, trace, thread);
2326
+ args, augmented_args, augmented_args_size, trace, thread);
16962327
16972328 if (sc->is_exit) {
16982329 if (!(trace->duration_filter || trace->summary_only || trace->failure_only || trace->min_stack)) {
2330
+ int alignment = 0;
2331
+
16992332 trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output);
1700
- fprintf(trace->output, "%-70s)\n", ttrace->entry_str);
2333
+ printed = fprintf(trace->output, "%s)", ttrace->entry_str);
2334
+ if (trace->args_alignment > printed)
2335
+ alignment = trace->args_alignment - printed;
2336
+ fprintf(trace->output, "%*s= ?\n", alignment, " ");
17012337 }
17022338 } else {
17032339 ttrace->entry_pending = true;
....@@ -1715,7 +2351,7 @@
17152351 return err;
17162352 }
17172353
1718
-static int trace__fprintf_sys_enter(struct trace *trace, struct perf_evsel *evsel,
2354
+static int trace__fprintf_sys_enter(struct trace *trace, struct evsel *evsel,
17192355 struct perf_sample *sample)
17202356 {
17212357 struct thread_trace *ttrace;
....@@ -1723,7 +2359,8 @@
17232359 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
17242360 struct syscall *sc = trace__syscall_info(trace, evsel, id);
17252361 char msg[1024];
1726
- void *args;
2362
+ void *args, *augmented_args = NULL;
2363
+ int augmented_args_size;
17272364
17282365 if (sc == NULL)
17292366 return -1;
....@@ -1738,7 +2375,8 @@
17382375 goto out_put;
17392376
17402377 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1741
- syscall__scnprintf_args(sc, msg, sizeof(msg), args, trace, thread);
2378
+ augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls_args_size);
2379
+ syscall__scnprintf_args(sc, msg, sizeof(msg), args, augmented_args, augmented_args_size, trace, thread);
17422380 fprintf(trace->output, "%s", msg);
17432381 err = 0;
17442382 out_put:
....@@ -1746,20 +2384,22 @@
17462384 return err;
17472385 }
17482386
1749
-static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel,
2387
+static int trace__resolve_callchain(struct trace *trace, struct evsel *evsel,
17502388 struct perf_sample *sample,
17512389 struct callchain_cursor *cursor)
17522390 {
17532391 struct addr_location al;
1754
- int max_stack = evsel->attr.sample_max_stack ?
1755
- evsel->attr.sample_max_stack :
2392
+ int max_stack = evsel->core.attr.sample_max_stack ?
2393
+ evsel->core.attr.sample_max_stack :
17562394 trace->max_stack;
2395
+ int err;
17572396
1758
- if (machine__resolve(trace->host, &al, sample) < 0 ||
1759
- thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, max_stack))
2397
+ if (machine__resolve(trace->host, &al, sample) < 0)
17602398 return -1;
17612399
1762
- return 0;
2400
+ err = thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, max_stack);
2401
+ addr_location__put(&al);
2402
+ return err;
17632403 }
17642404
17652405 static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
....@@ -1769,18 +2409,18 @@
17692409 EVSEL__PRINT_DSO |
17702410 EVSEL__PRINT_UNKNOWN_AS_ADDR;
17712411
1772
- return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
2412
+ return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, symbol_conf.bt_stop_list, trace->output);
17732413 }
17742414
1775
-static const char *errno_to_name(struct perf_evsel *evsel, int err)
2415
+static const char *errno_to_name(struct evsel *evsel, int err)
17762416 {
1777
- struct perf_env *env = perf_evsel__env(evsel);
2417
+ struct perf_env *env = evsel__env(evsel);
17782418 const char *arch_name = perf_env__arch(env);
17792419
17802420 return arch_syscalls__strerrno(arch_name, err);
17812421 }
17822422
1783
-static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
2423
+static int trace__sys_exit(struct trace *trace, struct evsel *evsel,
17842424 union perf_event *event __maybe_unused,
17852425 struct perf_sample *sample)
17862426 {
....@@ -1788,7 +2428,8 @@
17882428 u64 duration = 0;
17892429 bool duration_calculated = false;
17902430 struct thread *thread;
1791
- int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
2431
+ int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0, printed = 0;
2432
+ int alignment = trace->args_alignment;
17922433 struct syscall *sc = trace__syscall_info(trace, evsel, id);
17932434 struct thread_trace *ttrace;
17942435
....@@ -1802,12 +2443,12 @@
18022443
18032444 trace__fprintf_sample(trace, evsel, sample, thread);
18042445
1805
- if (trace->summary)
1806
- thread__update_stats(ttrace, id, sample);
1807
-
18082446 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
18092447
1810
- if (sc->is_open && ret >= 0 && ttrace->filename.pending_open) {
2448
+ if (trace->summary)
2449
+ thread__update_stats(thread, ttrace, id, sample, ret, trace->errno_summary);
2450
+
2451
+ if (!trace->fd_path_disabled && sc->is_open && ret >= 0 && ttrace->filename.pending_open) {
18112452 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
18122453 ttrace->filename.pending_open = false;
18132454 ++trace->stats.vfs_getname;
....@@ -1836,28 +2477,38 @@
18362477 trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output);
18372478
18382479 if (ttrace->entry_pending) {
1839
- fprintf(trace->output, "%-70s", ttrace->entry_str);
2480
+ printed = fprintf(trace->output, "%s", ttrace->entry_str);
18402481 } else {
1841
- fprintf(trace->output, " ... [");
2482
+ printed += fprintf(trace->output, " ... [");
18422483 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1843
- fprintf(trace->output, "]: %s()", sc->name);
2484
+ printed += 9;
2485
+ printed += fprintf(trace->output, "]: %s()", sc->name);
18442486 }
2487
+
2488
+ printed++; /* the closing ')' */
2489
+
2490
+ if (alignment > printed)
2491
+ alignment -= printed;
2492
+ else
2493
+ alignment = 0;
2494
+
2495
+ fprintf(trace->output, ")%*s= ", alignment, " ");
18452496
18462497 if (sc->fmt == NULL) {
18472498 if (ret < 0)
18482499 goto errno_print;
18492500 signed_print:
1850
- fprintf(trace->output, ") = %ld", ret);
2501
+ fprintf(trace->output, "%ld", ret);
18512502 } else if (ret < 0) {
18522503 errno_print: {
18532504 char bf[STRERR_BUFSIZE];
18542505 const char *emsg = str_error_r(-ret, bf, sizeof(bf)),
18552506 *e = errno_to_name(evsel, -ret);
18562507
1857
- fprintf(trace->output, ") = -1 %s %s", e, emsg);
2508
+ fprintf(trace->output, "-1 %s (%s)", e, emsg);
18582509 }
18592510 } else if (ret == 0 && sc->fmt->timeout)
1860
- fprintf(trace->output, ") = 0 Timeout");
2511
+ fprintf(trace->output, "0 (Timeout)");
18612512 else if (ttrace->ret_scnprintf) {
18622513 char bf[1024];
18632514 struct syscall_arg arg = {
....@@ -1867,14 +2518,14 @@
18672518 };
18682519 ttrace->ret_scnprintf(bf, sizeof(bf), &arg);
18692520 ttrace->ret_scnprintf = NULL;
1870
- fprintf(trace->output, ") = %s", bf);
2521
+ fprintf(trace->output, "%s", bf);
18712522 } else if (sc->fmt->hexret)
1872
- fprintf(trace->output, ") = %#lx", ret);
2523
+ fprintf(trace->output, "%#lx", ret);
18732524 else if (sc->fmt->errpid) {
18742525 struct thread *child = machine__find_thread(trace->host, ret, ret);
18752526
18762527 if (child != NULL) {
1877
- fprintf(trace->output, ") = %ld", ret);
2528
+ fprintf(trace->output, "%ld", ret);
18782529 if (child->comm_set)
18792530 fprintf(trace->output, " (%s)", thread__comm_str(child));
18802531 thread__put(child);
....@@ -1884,10 +2535,17 @@
18842535
18852536 fputc('\n', trace->output);
18862537
2538
+ /*
2539
+ * We only consider an 'event' for the sake of --max-events a non-filtered
2540
+ * sys_enter + sys_exit and other tracepoint events.
2541
+ */
2542
+ if (++trace->nr_events_printed == trace->max_events && trace->max_events != ULONG_MAX)
2543
+ interrupted = true;
2544
+
18872545 if (callchain_ret > 0)
18882546 trace__fprintf_callchain(trace, sample);
18892547 else if (callchain_ret < 0)
1890
- pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
2548
+ pr_err("Problem processing %s callchain, skipping...\n", evsel__name(evsel));
18912549 out:
18922550 ttrace->entry_pending = false;
18932551 err = 0;
....@@ -1896,7 +2554,7 @@
18962554 return err;
18972555 }
18982556
1899
-static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
2557
+static int trace__vfs_getname(struct trace *trace, struct evsel *evsel,
19002558 union perf_event *event __maybe_unused,
19012559 struct perf_sample *sample)
19022560 {
....@@ -1905,7 +2563,7 @@
19052563 size_t filename_len, entry_str_len, to_move;
19062564 ssize_t remaining_space;
19072565 char *pos;
1908
- const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
2566
+ const char *filename = evsel__rawptr(evsel, sample, "pathname");
19092567
19102568 if (!thread)
19112569 goto out;
....@@ -1957,11 +2615,11 @@
19572615 return 0;
19582616 }
19592617
1960
-static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
2618
+static int trace__sched_stat_runtime(struct trace *trace, struct evsel *evsel,
19612619 union perf_event *event __maybe_unused,
19622620 struct perf_sample *sample)
19632621 {
1964
- u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
2622
+ u64 runtime = evsel__intval(evsel, sample, "runtime");
19652623 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
19662624 struct thread *thread = machine__findnew_thread(trace->host,
19672625 sample->pid,
....@@ -1980,10 +2638,10 @@
19802638 out_dump:
19812639 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
19822640 evsel->name,
1983
- perf_evsel__strval(evsel, sample, "comm"),
1984
- (pid_t)perf_evsel__intval(evsel, sample, "pid"),
2641
+ evsel__strval(evsel, sample, "comm"),
2642
+ (pid_t)evsel__intval(evsel, sample, "pid"),
19852643 runtime,
1986
- perf_evsel__intval(evsel, sample, "vruntime"));
2644
+ evsel__intval(evsel, sample, "vruntime"));
19872645 goto out_put;
19882646 }
19892647
....@@ -2016,13 +2674,99 @@
20162674 {
20172675 binary__fprintf(sample->raw_data, sample->raw_size, 8,
20182676 bpf_output__printer, NULL, trace->output);
2677
+ ++trace->nr_events_printed;
20192678 }
20202679
2021
-static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
2680
+static size_t trace__fprintf_tp_fields(struct trace *trace, struct evsel *evsel, struct perf_sample *sample,
2681
+ struct thread *thread, void *augmented_args, int augmented_args_size)
2682
+{
2683
+ char bf[2048];
2684
+ size_t size = sizeof(bf);
2685
+ struct tep_format_field *field = evsel->tp_format->format.fields;
2686
+ struct syscall_arg_fmt *arg = __evsel__syscall_arg_fmt(evsel);
2687
+ size_t printed = 0;
2688
+ unsigned long val;
2689
+ u8 bit = 1;
2690
+ struct syscall_arg syscall_arg = {
2691
+ .augmented = {
2692
+ .size = augmented_args_size,
2693
+ .args = augmented_args,
2694
+ },
2695
+ .idx = 0,
2696
+ .mask = 0,
2697
+ .trace = trace,
2698
+ .thread = thread,
2699
+ .show_string_prefix = trace->show_string_prefix,
2700
+ };
2701
+
2702
+ for (; field && arg; field = field->next, ++syscall_arg.idx, bit <<= 1, ++arg) {
2703
+ if (syscall_arg.mask & bit)
2704
+ continue;
2705
+
2706
+ syscall_arg.len = 0;
2707
+ syscall_arg.fmt = arg;
2708
+ if (field->flags & TEP_FIELD_IS_ARRAY) {
2709
+ int offset = field->offset;
2710
+
2711
+ if (field->flags & TEP_FIELD_IS_DYNAMIC) {
2712
+ offset = format_field__intval(field, sample, evsel->needs_swap);
2713
+ syscall_arg.len = offset >> 16;
2714
+ offset &= 0xffff;
2715
+ }
2716
+
2717
+ val = (uintptr_t)(sample->raw_data + offset);
2718
+ } else
2719
+ val = format_field__intval(field, sample, evsel->needs_swap);
2720
+ /*
2721
+ * Some syscall args need some mask, most don't and
2722
+ * return val untouched.
2723
+ */
2724
+ val = syscall_arg_fmt__mask_val(arg, &syscall_arg, val);
2725
+
2726
+ /*
2727
+ * Suppress this argument if its value is zero and
2728
+ * and we don't have a string associated in an
2729
+ * strarray for it.
2730
+ */
2731
+ if (val == 0 &&
2732
+ !trace->show_zeros &&
2733
+ !((arg->show_zero ||
2734
+ arg->scnprintf == SCA_STRARRAY ||
2735
+ arg->scnprintf == SCA_STRARRAYS) &&
2736
+ arg->parm))
2737
+ continue;
2738
+
2739
+ printed += scnprintf(bf + printed, size - printed, "%s", printed ? ", " : "");
2740
+
2741
+ /*
2742
+ * XXX Perhaps we should have a show_tp_arg_names,
2743
+ * leaving show_arg_names just for syscalls?
2744
+ */
2745
+ if (1 || trace->show_arg_names)
2746
+ printed += scnprintf(bf + printed, size - printed, "%s: ", field->name);
2747
+
2748
+ printed += syscall_arg_fmt__scnprintf_val(arg, bf + printed, size - printed, &syscall_arg, val);
2749
+ }
2750
+
2751
+ return printed + fprintf(trace->output, "%s", bf);
2752
+}
2753
+
2754
+static int trace__event_handler(struct trace *trace, struct evsel *evsel,
20222755 union perf_event *event __maybe_unused,
20232756 struct perf_sample *sample)
20242757 {
2758
+ struct thread *thread;
20252759 int callchain_ret = 0;
2760
+ /*
2761
+ * Check if we called perf_evsel__disable(evsel) due to, for instance,
2762
+ * this event's max_events having been hit and this is an entry coming
2763
+ * from the ring buffer that we should discard, since the max events
2764
+ * have already been considered/printed.
2765
+ */
2766
+ if (evsel->disabled)
2767
+ return 0;
2768
+
2769
+ thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
20262770
20272771 if (sample->callchain) {
20282772 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
....@@ -2036,32 +2780,63 @@
20362780 trace__printf_interrupted_entry(trace);
20372781 trace__fprintf_tstamp(trace, sample->time, trace->output);
20382782
2039
- if (trace->trace_syscalls)
2783
+ if (trace->trace_syscalls && trace->show_duration)
20402784 fprintf(trace->output, "( ): ");
20412785
2042
- fprintf(trace->output, "%s:", evsel->name);
2786
+ if (thread)
2787
+ trace__fprintf_comm_tid(trace, thread, trace->output);
20432788
2044
- if (perf_evsel__is_bpf_output(evsel)) {
2045
- if (evsel == trace->syscalls.events.augmented)
2789
+ if (evsel == trace->syscalls.events.augmented) {
2790
+ int id = perf_evsel__sc_tp_uint(evsel, id, sample);
2791
+ struct syscall *sc = trace__syscall_info(trace, evsel, id);
2792
+
2793
+ if (sc) {
2794
+ fprintf(trace->output, "%s(", sc->name);
20462795 trace__fprintf_sys_enter(trace, evsel, sample);
2047
- else
2048
- bpf_output__fprintf(trace, sample);
2796
+ fputc(')', trace->output);
2797
+ goto newline;
2798
+ }
2799
+
2800
+ /*
2801
+ * XXX: Not having the associated syscall info or not finding/adding
2802
+ * the thread should never happen, but if it does...
2803
+ * fall thru and print it as a bpf_output event.
2804
+ */
2805
+ }
2806
+
2807
+ fprintf(trace->output, "%s(", evsel->name);
2808
+
2809
+ if (evsel__is_bpf_output(evsel)) {
2810
+ bpf_output__fprintf(trace, sample);
20492811 } else if (evsel->tp_format) {
20502812 if (strncmp(evsel->tp_format->name, "sys_enter_", 10) ||
20512813 trace__fprintf_sys_enter(trace, evsel, sample)) {
2052
- event_format__fprintf(evsel->tp_format, sample->cpu,
2053
- sample->raw_data, sample->raw_size,
2054
- trace->output);
2814
+ if (trace->libtraceevent_print) {
2815
+ event_format__fprintf(evsel->tp_format, sample->cpu,
2816
+ sample->raw_data, sample->raw_size,
2817
+ trace->output);
2818
+ } else {
2819
+ trace__fprintf_tp_fields(trace, evsel, sample, thread, NULL, 0);
2820
+ }
20552821 }
20562822 }
20572823
2058
- fprintf(trace->output, "\n");
2824
+newline:
2825
+ fprintf(trace->output, ")\n");
20592826
20602827 if (callchain_ret > 0)
20612828 trace__fprintf_callchain(trace, sample);
20622829 else if (callchain_ret < 0)
2063
- pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
2830
+ pr_err("Problem processing %s callchain, skipping...\n", evsel__name(evsel));
2831
+
2832
+ ++trace->nr_events_printed;
2833
+
2834
+ if (evsel->max_events != ULONG_MAX && ++evsel->nr_events_printed == evsel->max_events) {
2835
+ evsel__disable(evsel);
2836
+ evsel__close(evsel);
2837
+ }
20642838 out:
2839
+ thread__put(thread);
20652840 return 0;
20662841 }
20672842
....@@ -2083,7 +2858,7 @@
20832858 }
20842859
20852860 static int trace__pgfault(struct trace *trace,
2086
- struct perf_evsel *evsel,
2861
+ struct evsel *evsel,
20872862 union perf_event *event __maybe_unused,
20882863 struct perf_sample *sample)
20892864 {
....@@ -2109,7 +2884,7 @@
21092884 if (ttrace == NULL)
21102885 goto out_put;
21112886
2112
- if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2887
+ if (evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
21132888 ttrace->pfmaj++;
21142889 else
21152890 ttrace->pfmin++;
....@@ -2122,7 +2897,7 @@
21222897 trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output);
21232898
21242899 fprintf(trace->output, "%sfault [",
2125
- evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2900
+ evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
21262901 "maj" : "min");
21272902
21282903 print_location(trace->output, sample, &al, false, true);
....@@ -2147,7 +2922,9 @@
21472922 if (callchain_ret > 0)
21482923 trace__fprintf_callchain(trace, sample);
21492924 else if (callchain_ret < 0)
2150
- pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
2925
+ pr_err("Problem processing %s callchain, skipping...\n", evsel__name(evsel));
2926
+
2927
+ ++trace->nr_events_printed;
21512928 out:
21522929 err = 0;
21532930 out_put:
....@@ -2156,7 +2933,7 @@
21562933 }
21572934
21582935 static void trace__set_base_time(struct trace *trace,
2159
- struct perf_evsel *evsel,
2936
+ struct evsel *evsel,
21602937 struct perf_sample *sample)
21612938 {
21622939 /*
....@@ -2168,14 +2945,14 @@
21682945 * appears in our event stream (vfs_getname comes to mind).
21692946 */
21702947 if (trace->base_time == 0 && !trace->full_time &&
2171
- (evsel->attr.sample_type & PERF_SAMPLE_TIME))
2948
+ (evsel->core.attr.sample_type & PERF_SAMPLE_TIME))
21722949 trace->base_time = sample->time;
21732950 }
21742951
21752952 static int trace__process_sample(struct perf_tool *tool,
21762953 union perf_event *event,
21772954 struct perf_sample *sample,
2178
- struct perf_evsel *evsel,
2955
+ struct evsel *evsel,
21792956 struct machine *machine __maybe_unused)
21802957 {
21812958 struct trace *trace = container_of(tool, struct trace, tool);
....@@ -2209,21 +2986,23 @@
22092986 "-m", "1024",
22102987 "-c", "1",
22112988 };
2212
-
2989
+ pid_t pid = getpid();
2990
+ char *filter = asprintf__tp_filter_pids(1, &pid);
22132991 const char * const sc_args[] = { "-e", };
22142992 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
22152993 const char * const majpf_args[] = { "-e", "major-faults" };
22162994 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
22172995 const char * const minpf_args[] = { "-e", "minor-faults" };
22182996 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2997
+ int err = -1;
22192998
2220
- /* +1 is for the event string below */
2221
- rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2999
+ /* +3 is for the event string below and the pid filter */
3000
+ rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 3 +
22223001 majpf_args_nr + minpf_args_nr + argc;
22233002 rec_argv = calloc(rec_argc + 1, sizeof(char *));
22243003
2225
- if (rec_argv == NULL)
2226
- return -ENOMEM;
3004
+ if (rec_argv == NULL || filter == NULL)
3005
+ goto out_free;
22273006
22283007 j = 0;
22293008 for (i = 0; i < ARRAY_SIZE(record_args); i++)
....@@ -2240,10 +3019,12 @@
22403019 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
22413020 else {
22423021 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2243
- free(rec_argv);
2244
- return -1;
3022
+ goto out_free;
22453023 }
22463024 }
3025
+
3026
+ rec_argv[j++] = "--filter";
3027
+ rec_argv[j++] = filter;
22473028
22483029 if (trace->trace_pgfaults & TRACE_PFMAJ)
22493030 for (i = 0; i < majpf_args_nr; i++)
....@@ -2256,42 +3037,53 @@
22563037 for (i = 0; i < (unsigned int)argc; i++)
22573038 rec_argv[j++] = argv[i];
22583039
2259
- return cmd_record(j, rec_argv);
3040
+ err = cmd_record(j, rec_argv);
3041
+out_free:
3042
+ free(filter);
3043
+ free(rec_argv);
3044
+ return err;
22603045 }
22613046
22623047 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
22633048
2264
-static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
3049
+static bool evlist__add_vfs_getname(struct evlist *evlist)
22653050 {
22663051 bool found = false;
2267
- struct perf_evsel *evsel, *tmp;
2268
- struct parse_events_error err = { .idx = 0, };
2269
- int ret = parse_events(evlist, "probe:vfs_getname*", &err);
3052
+ struct evsel *evsel, *tmp;
3053
+ struct parse_events_error err;
3054
+ int ret;
22703055
2271
- if (ret)
3056
+ bzero(&err, sizeof(err));
3057
+ ret = parse_events(evlist, "probe:vfs_getname*", &err);
3058
+ if (ret) {
3059
+ free(err.str);
3060
+ free(err.help);
3061
+ free(err.first_str);
3062
+ free(err.first_help);
22723063 return false;
3064
+ }
22733065
22743066 evlist__for_each_entry_safe(evlist, evsel, tmp) {
2275
- if (!strstarts(perf_evsel__name(evsel), "probe:vfs_getname"))
3067
+ if (!strstarts(evsel__name(evsel), "probe:vfs_getname"))
22763068 continue;
22773069
2278
- if (perf_evsel__field(evsel, "pathname")) {
3070
+ if (evsel__field(evsel, "pathname")) {
22793071 evsel->handler = trace__vfs_getname;
22803072 found = true;
22813073 continue;
22823074 }
22833075
2284
- list_del_init(&evsel->node);
3076
+ list_del_init(&evsel->core.node);
22853077 evsel->evlist = NULL;
2286
- perf_evsel__delete(evsel);
3078
+ evsel__delete(evsel);
22873079 }
22883080
22893081 return found;
22903082 }
22913083
2292
-static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
3084
+static struct evsel *evsel__new_pgfault(u64 config)
22933085 {
2294
- struct perf_evsel *evsel;
3086
+ struct evsel *evsel;
22953087 struct perf_event_attr attr = {
22963088 .type = PERF_TYPE_SOFTWARE,
22973089 .mmap_data = 1,
....@@ -2302,7 +3094,7 @@
23023094
23033095 event_attr_init(&attr);
23043096
2305
- evsel = perf_evsel__new(&attr);
3097
+ evsel = evsel__new(&attr);
23063098 if (evsel)
23073099 evsel->handler = trace__pgfault;
23083100
....@@ -2312,7 +3104,7 @@
23123104 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
23133105 {
23143106 const u32 type = event->header.type;
2315
- struct perf_evsel *evsel;
3107
+ struct evsel *evsel;
23163108
23173109 if (type != PERF_RECORD_SAMPLE) {
23183110 trace__process_event(trace, trace->host, event, sample);
....@@ -2325,24 +3117,30 @@
23253117 return;
23263118 }
23273119
3120
+ if (evswitch__discard(&trace->evswitch, evsel))
3121
+ return;
3122
+
23283123 trace__set_base_time(trace, evsel, sample);
23293124
2330
- if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
3125
+ if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT &&
23313126 sample->raw_data == NULL) {
23323127 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2333
- perf_evsel__name(evsel), sample->tid,
3128
+ evsel__name(evsel), sample->tid,
23343129 sample->cpu, sample->raw_size);
23353130 } else {
23363131 tracepoint_handler handler = evsel->handler;
23373132 handler(trace, evsel, event, sample);
23383133 }
3134
+
3135
+ if (trace->nr_events_printed >= trace->max_events && trace->max_events != ULONG_MAX)
3136
+ interrupted = true;
23393137 }
23403138
23413139 static int trace__add_syscall_newtp(struct trace *trace)
23423140 {
23433141 int ret = -1;
2344
- struct perf_evlist *evlist = trace->evlist;
2345
- struct perf_evsel *sys_enter, *sys_exit;
3142
+ struct evlist *evlist = trace->evlist;
3143
+ struct evsel *sys_enter, *sys_exit;
23463144
23473145 sys_enter = perf_evsel__raw_syscall_newtp("sys_enter", trace__sys_enter);
23483146 if (sys_enter == NULL)
....@@ -2358,11 +3156,11 @@
23583156 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
23593157 goto out_delete_sys_exit;
23603158
2361
- perf_evsel__config_callchain(sys_enter, &trace->opts, &callchain_param);
2362
- perf_evsel__config_callchain(sys_exit, &trace->opts, &callchain_param);
3159
+ evsel__config_callchain(sys_enter, &trace->opts, &callchain_param);
3160
+ evsel__config_callchain(sys_exit, &trace->opts, &callchain_param);
23633161
2364
- perf_evlist__add(evlist, sys_enter);
2365
- perf_evlist__add(evlist, sys_exit);
3162
+ evlist__add(evlist, sys_enter);
3163
+ evlist__add(evlist, sys_exit);
23663164
23673165 if (callchain_param.enabled && !trace->kernel_syscallchains) {
23683166 /*
....@@ -2370,7 +3168,7 @@
23703168 * leading to the syscall, allow overriding that for
23713169 * debugging reasons using --kernel_syscall_callchains
23723170 */
2373
- sys_exit->attr.exclude_callchain_kernel = 1;
3171
+ sys_exit->core.attr.exclude_callchain_kernel = 1;
23743172 }
23753173
23763174 trace->syscalls.events.sys_enter = sys_enter;
....@@ -2381,16 +3179,16 @@
23813179 return ret;
23823180
23833181 out_delete_sys_exit:
2384
- perf_evsel__delete_priv(sys_exit);
3182
+ evsel__delete_priv(sys_exit);
23853183 out_delete_sys_enter:
2386
- perf_evsel__delete_priv(sys_enter);
3184
+ evsel__delete_priv(sys_enter);
23873185 goto out;
23883186 }
23893187
2390
-static int trace__set_ev_qualifier_filter(struct trace *trace)
3188
+static int trace__set_ev_qualifier_tp_filter(struct trace *trace)
23913189 {
23923190 int err = -1;
2393
- struct perf_evsel *sys_exit;
3191
+ struct evsel *sys_exit;
23943192 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
23953193 trace->ev_qualifier_ids.nr,
23963194 trace->ev_qualifier_ids.entries);
....@@ -2398,10 +3196,9 @@
23983196 if (filter == NULL)
23993197 goto out_enomem;
24003198
2401
- if (!perf_evsel__append_tp_filter(trace->syscalls.events.sys_enter,
2402
- filter)) {
3199
+ if (!evsel__append_tp_filter(trace->syscalls.events.sys_enter, filter)) {
24033200 sys_exit = trace->syscalls.events.sys_exit;
2404
- err = perf_evsel__append_tp_filter(sys_exit, filter);
3201
+ err = evsel__append_tp_filter(sys_exit, filter);
24053202 }
24063203
24073204 free(filter);
....@@ -2412,9 +3209,450 @@
24123209 goto out;
24133210 }
24143211
3212
+#ifdef HAVE_LIBBPF_SUPPORT
3213
+static struct bpf_map *trace__find_bpf_map_by_name(struct trace *trace, const char *name)
3214
+{
3215
+ if (trace->bpf_obj == NULL)
3216
+ return NULL;
3217
+
3218
+ return bpf_object__find_map_by_name(trace->bpf_obj, name);
3219
+}
3220
+
3221
+static void trace__set_bpf_map_filtered_pids(struct trace *trace)
3222
+{
3223
+ trace->filter_pids.map = trace__find_bpf_map_by_name(trace, "pids_filtered");
3224
+}
3225
+
3226
+static void trace__set_bpf_map_syscalls(struct trace *trace)
3227
+{
3228
+ trace->syscalls.map = trace__find_bpf_map_by_name(trace, "syscalls");
3229
+ trace->syscalls.prog_array.sys_enter = trace__find_bpf_map_by_name(trace, "syscalls_sys_enter");
3230
+ trace->syscalls.prog_array.sys_exit = trace__find_bpf_map_by_name(trace, "syscalls_sys_exit");
3231
+}
3232
+
3233
+static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace, const char *name)
3234
+{
3235
+ if (trace->bpf_obj == NULL)
3236
+ return NULL;
3237
+
3238
+ return bpf_object__find_program_by_title(trace->bpf_obj, name);
3239
+}
3240
+
3241
+static struct bpf_program *trace__find_syscall_bpf_prog(struct trace *trace, struct syscall *sc,
3242
+ const char *prog_name, const char *type)
3243
+{
3244
+ struct bpf_program *prog;
3245
+
3246
+ if (prog_name == NULL) {
3247
+ char default_prog_name[256];
3248
+ scnprintf(default_prog_name, sizeof(default_prog_name), "!syscalls:sys_%s_%s", type, sc->name);
3249
+ prog = trace__find_bpf_program_by_title(trace, default_prog_name);
3250
+ if (prog != NULL)
3251
+ goto out_found;
3252
+ if (sc->fmt && sc->fmt->alias) {
3253
+ scnprintf(default_prog_name, sizeof(default_prog_name), "!syscalls:sys_%s_%s", type, sc->fmt->alias);
3254
+ prog = trace__find_bpf_program_by_title(trace, default_prog_name);
3255
+ if (prog != NULL)
3256
+ goto out_found;
3257
+ }
3258
+ goto out_unaugmented;
3259
+ }
3260
+
3261
+ prog = trace__find_bpf_program_by_title(trace, prog_name);
3262
+
3263
+ if (prog != NULL) {
3264
+out_found:
3265
+ return prog;
3266
+ }
3267
+
3268
+ pr_debug("Couldn't find BPF prog \"%s\" to associate with syscalls:sys_%s_%s, not augmenting it\n",
3269
+ prog_name, type, sc->name);
3270
+out_unaugmented:
3271
+ return trace->syscalls.unaugmented_prog;
3272
+}
3273
+
3274
+static void trace__init_syscall_bpf_progs(struct trace *trace, int id)
3275
+{
3276
+ struct syscall *sc = trace__syscall_info(trace, NULL, id);
3277
+
3278
+ if (sc == NULL)
3279
+ return;
3280
+
3281
+ sc->bpf_prog.sys_enter = trace__find_syscall_bpf_prog(trace, sc, sc->fmt ? sc->fmt->bpf_prog_name.sys_enter : NULL, "enter");
3282
+ sc->bpf_prog.sys_exit = trace__find_syscall_bpf_prog(trace, sc, sc->fmt ? sc->fmt->bpf_prog_name.sys_exit : NULL, "exit");
3283
+}
3284
+
3285
+static int trace__bpf_prog_sys_enter_fd(struct trace *trace, int id)
3286
+{
3287
+ struct syscall *sc = trace__syscall_info(trace, NULL, id);
3288
+ return sc ? bpf_program__fd(sc->bpf_prog.sys_enter) : bpf_program__fd(trace->syscalls.unaugmented_prog);
3289
+}
3290
+
3291
+static int trace__bpf_prog_sys_exit_fd(struct trace *trace, int id)
3292
+{
3293
+ struct syscall *sc = trace__syscall_info(trace, NULL, id);
3294
+ return sc ? bpf_program__fd(sc->bpf_prog.sys_exit) : bpf_program__fd(trace->syscalls.unaugmented_prog);
3295
+}
3296
+
3297
+static void trace__init_bpf_map_syscall_args(struct trace *trace, int id, struct bpf_map_syscall_entry *entry)
3298
+{
3299
+ struct syscall *sc = trace__syscall_info(trace, NULL, id);
3300
+ int arg = 0;
3301
+
3302
+ if (sc == NULL)
3303
+ goto out;
3304
+
3305
+ for (; arg < sc->nr_args; ++arg) {
3306
+ entry->string_args_len[arg] = 0;
3307
+ if (sc->arg_fmt[arg].scnprintf == SCA_FILENAME) {
3308
+ /* Should be set like strace -s strsize */
3309
+ entry->string_args_len[arg] = PATH_MAX;
3310
+ }
3311
+ }
3312
+out:
3313
+ for (; arg < 6; ++arg)
3314
+ entry->string_args_len[arg] = 0;
3315
+}
3316
+static int trace__set_ev_qualifier_bpf_filter(struct trace *trace)
3317
+{
3318
+ int fd = bpf_map__fd(trace->syscalls.map);
3319
+ struct bpf_map_syscall_entry value = {
3320
+ .enabled = !trace->not_ev_qualifier,
3321
+ };
3322
+ int err = 0;
3323
+ size_t i;
3324
+
3325
+ for (i = 0; i < trace->ev_qualifier_ids.nr; ++i) {
3326
+ int key = trace->ev_qualifier_ids.entries[i];
3327
+
3328
+ if (value.enabled) {
3329
+ trace__init_bpf_map_syscall_args(trace, key, &value);
3330
+ trace__init_syscall_bpf_progs(trace, key);
3331
+ }
3332
+
3333
+ err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST);
3334
+ if (err)
3335
+ break;
3336
+ }
3337
+
3338
+ return err;
3339
+}
3340
+
3341
+static int __trace__init_syscalls_bpf_map(struct trace *trace, bool enabled)
3342
+{
3343
+ int fd = bpf_map__fd(trace->syscalls.map);
3344
+ struct bpf_map_syscall_entry value = {
3345
+ .enabled = enabled,
3346
+ };
3347
+ int err = 0, key;
3348
+
3349
+ for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) {
3350
+ if (enabled)
3351
+ trace__init_bpf_map_syscall_args(trace, key, &value);
3352
+
3353
+ err = bpf_map_update_elem(fd, &key, &value, BPF_ANY);
3354
+ if (err)
3355
+ break;
3356
+ }
3357
+
3358
+ return err;
3359
+}
3360
+
3361
+static int trace__init_syscalls_bpf_map(struct trace *trace)
3362
+{
3363
+ bool enabled = true;
3364
+
3365
+ if (trace->ev_qualifier_ids.nr)
3366
+ enabled = trace->not_ev_qualifier;
3367
+
3368
+ return __trace__init_syscalls_bpf_map(trace, enabled);
3369
+}
3370
+
3371
+static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace, struct syscall *sc)
3372
+{
3373
+ struct tep_format_field *field, *candidate_field;
3374
+ int id;
3375
+
3376
+ /*
3377
+ * We're only interested in syscalls that have a pointer:
3378
+ */
3379
+ for (field = sc->args; field; field = field->next) {
3380
+ if (field->flags & TEP_FIELD_IS_POINTER)
3381
+ goto try_to_find_pair;
3382
+ }
3383
+
3384
+ return NULL;
3385
+
3386
+try_to_find_pair:
3387
+ for (id = 0; id < trace->sctbl->syscalls.nr_entries; ++id) {
3388
+ struct syscall *pair = trace__syscall_info(trace, NULL, id);
3389
+ struct bpf_program *pair_prog;
3390
+ bool is_candidate = false;
3391
+
3392
+ if (pair == NULL || pair == sc ||
3393
+ pair->bpf_prog.sys_enter == trace->syscalls.unaugmented_prog)
3394
+ continue;
3395
+
3396
+ for (field = sc->args, candidate_field = pair->args;
3397
+ field && candidate_field; field = field->next, candidate_field = candidate_field->next) {
3398
+ bool is_pointer = field->flags & TEP_FIELD_IS_POINTER,
3399
+ candidate_is_pointer = candidate_field->flags & TEP_FIELD_IS_POINTER;
3400
+
3401
+ if (is_pointer) {
3402
+ if (!candidate_is_pointer) {
3403
+ // The candidate just doesn't copies our pointer arg, might copy other pointers we want.
3404
+ continue;
3405
+ }
3406
+ } else {
3407
+ if (candidate_is_pointer) {
3408
+ // The candidate might copy a pointer we don't have, skip it.
3409
+ goto next_candidate;
3410
+ }
3411
+ continue;
3412
+ }
3413
+
3414
+ if (strcmp(field->type, candidate_field->type))
3415
+ goto next_candidate;
3416
+
3417
+ is_candidate = true;
3418
+ }
3419
+
3420
+ if (!is_candidate)
3421
+ goto next_candidate;
3422
+
3423
+ /*
3424
+ * Check if the tentative pair syscall augmenter has more pointers, if it has,
3425
+ * then it may be collecting that and we then can't use it, as it would collect
3426
+ * more than what is common to the two syscalls.
3427
+ */
3428
+ if (candidate_field) {
3429
+ for (candidate_field = candidate_field->next; candidate_field; candidate_field = candidate_field->next)
3430
+ if (candidate_field->flags & TEP_FIELD_IS_POINTER)
3431
+ goto next_candidate;
3432
+ }
3433
+
3434
+ pair_prog = pair->bpf_prog.sys_enter;
3435
+ /*
3436
+ * If the pair isn't enabled, then its bpf_prog.sys_enter will not
3437
+ * have been searched for, so search it here and if it returns the
3438
+ * unaugmented one, then ignore it, otherwise we'll reuse that BPF
3439
+ * program for a filtered syscall on a non-filtered one.
3440
+ *
3441
+ * For instance, we have "!syscalls:sys_enter_renameat" and that is
3442
+ * useful for "renameat2".
3443
+ */
3444
+ if (pair_prog == NULL) {
3445
+ pair_prog = trace__find_syscall_bpf_prog(trace, pair, pair->fmt ? pair->fmt->bpf_prog_name.sys_enter : NULL, "enter");
3446
+ if (pair_prog == trace->syscalls.unaugmented_prog)
3447
+ goto next_candidate;
3448
+ }
3449
+
3450
+ pr_debug("Reusing \"%s\" BPF sys_enter augmenter for \"%s\"\n", pair->name, sc->name);
3451
+ return pair_prog;
3452
+ next_candidate:
3453
+ continue;
3454
+ }
3455
+
3456
+ return NULL;
3457
+}
3458
+
3459
+static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace)
3460
+{
3461
+ int map_enter_fd = bpf_map__fd(trace->syscalls.prog_array.sys_enter),
3462
+ map_exit_fd = bpf_map__fd(trace->syscalls.prog_array.sys_exit);
3463
+ int err = 0, key;
3464
+
3465
+ for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) {
3466
+ int prog_fd;
3467
+
3468
+ if (!trace__syscall_enabled(trace, key))
3469
+ continue;
3470
+
3471
+ trace__init_syscall_bpf_progs(trace, key);
3472
+
3473
+ // It'll get at least the "!raw_syscalls:unaugmented"
3474
+ prog_fd = trace__bpf_prog_sys_enter_fd(trace, key);
3475
+ err = bpf_map_update_elem(map_enter_fd, &key, &prog_fd, BPF_ANY);
3476
+ if (err)
3477
+ break;
3478
+ prog_fd = trace__bpf_prog_sys_exit_fd(trace, key);
3479
+ err = bpf_map_update_elem(map_exit_fd, &key, &prog_fd, BPF_ANY);
3480
+ if (err)
3481
+ break;
3482
+ }
3483
+
3484
+ /*
3485
+ * Now lets do a second pass looking for enabled syscalls without
3486
+ * an augmenter that have a signature that is a superset of another
3487
+ * syscall with an augmenter so that we can auto-reuse it.
3488
+ *
3489
+ * I.e. if we have an augmenter for the "open" syscall that has
3490
+ * this signature:
3491
+ *
3492
+ * int open(const char *pathname, int flags, mode_t mode);
3493
+ *
3494
+ * I.e. that will collect just the first string argument, then we
3495
+ * can reuse it for the 'creat' syscall, that has this signature:
3496
+ *
3497
+ * int creat(const char *pathname, mode_t mode);
3498
+ *
3499
+ * and for:
3500
+ *
3501
+ * int stat(const char *pathname, struct stat *statbuf);
3502
+ * int lstat(const char *pathname, struct stat *statbuf);
3503
+ *
3504
+ * Because the 'open' augmenter will collect the first arg as a string,
3505
+ * and leave alone all the other args, which already helps with
3506
+ * beautifying 'stat' and 'lstat''s pathname arg.
3507
+ *
3508
+ * Then, in time, when 'stat' gets an augmenter that collects both
3509
+ * first and second arg (this one on the raw_syscalls:sys_exit prog
3510
+ * array tail call, then that one will be used.
3511
+ */
3512
+ for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) {
3513
+ struct syscall *sc = trace__syscall_info(trace, NULL, key);
3514
+ struct bpf_program *pair_prog;
3515
+ int prog_fd;
3516
+
3517
+ if (sc == NULL || sc->bpf_prog.sys_enter == NULL)
3518
+ continue;
3519
+
3520
+ /*
3521
+ * For now we're just reusing the sys_enter prog, and if it
3522
+ * already has an augmenter, we don't need to find one.
3523
+ */
3524
+ if (sc->bpf_prog.sys_enter != trace->syscalls.unaugmented_prog)
3525
+ continue;
3526
+
3527
+ /*
3528
+ * Look at all the other syscalls for one that has a signature
3529
+ * that is close enough that we can share:
3530
+ */
3531
+ pair_prog = trace__find_usable_bpf_prog_entry(trace, sc);
3532
+ if (pair_prog == NULL)
3533
+ continue;
3534
+
3535
+ sc->bpf_prog.sys_enter = pair_prog;
3536
+
3537
+ /*
3538
+ * Update the BPF_MAP_TYPE_PROG_SHARED for raw_syscalls:sys_enter
3539
+ * with the fd for the program we're reusing:
3540
+ */
3541
+ prog_fd = bpf_program__fd(sc->bpf_prog.sys_enter);
3542
+ err = bpf_map_update_elem(map_enter_fd, &key, &prog_fd, BPF_ANY);
3543
+ if (err)
3544
+ break;
3545
+ }
3546
+
3547
+
3548
+ return err;
3549
+}
3550
+
3551
+static void trace__delete_augmented_syscalls(struct trace *trace)
3552
+{
3553
+ struct evsel *evsel, *tmp;
3554
+
3555
+ evlist__remove(trace->evlist, trace->syscalls.events.augmented);
3556
+ evsel__delete(trace->syscalls.events.augmented);
3557
+ trace->syscalls.events.augmented = NULL;
3558
+
3559
+ evlist__for_each_entry_safe(trace->evlist, tmp, evsel) {
3560
+ if (evsel->bpf_obj == trace->bpf_obj) {
3561
+ evlist__remove(trace->evlist, evsel);
3562
+ evsel__delete(evsel);
3563
+ }
3564
+
3565
+ }
3566
+
3567
+ bpf_object__close(trace->bpf_obj);
3568
+ trace->bpf_obj = NULL;
3569
+}
3570
+#else // HAVE_LIBBPF_SUPPORT
3571
+static struct bpf_map *trace__find_bpf_map_by_name(struct trace *trace __maybe_unused,
3572
+ const char *name __maybe_unused)
3573
+{
3574
+ return NULL;
3575
+}
3576
+
3577
+static void trace__set_bpf_map_filtered_pids(struct trace *trace __maybe_unused)
3578
+{
3579
+}
3580
+
3581
+static void trace__set_bpf_map_syscalls(struct trace *trace __maybe_unused)
3582
+{
3583
+}
3584
+
3585
+static int trace__set_ev_qualifier_bpf_filter(struct trace *trace __maybe_unused)
3586
+{
3587
+ return 0;
3588
+}
3589
+
3590
+static int trace__init_syscalls_bpf_map(struct trace *trace __maybe_unused)
3591
+{
3592
+ return 0;
3593
+}
3594
+
3595
+static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace __maybe_unused,
3596
+ const char *name __maybe_unused)
3597
+{
3598
+ return NULL;
3599
+}
3600
+
3601
+static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace __maybe_unused)
3602
+{
3603
+ return 0;
3604
+}
3605
+
3606
+static void trace__delete_augmented_syscalls(struct trace *trace __maybe_unused)
3607
+{
3608
+}
3609
+#endif // HAVE_LIBBPF_SUPPORT
3610
+
3611
+static bool trace__only_augmented_syscalls_evsels(struct trace *trace)
3612
+{
3613
+ struct evsel *evsel;
3614
+
3615
+ evlist__for_each_entry(trace->evlist, evsel) {
3616
+ if (evsel == trace->syscalls.events.augmented ||
3617
+ evsel->bpf_obj == trace->bpf_obj)
3618
+ continue;
3619
+
3620
+ return false;
3621
+ }
3622
+
3623
+ return true;
3624
+}
3625
+
3626
+static int trace__set_ev_qualifier_filter(struct trace *trace)
3627
+{
3628
+ if (trace->syscalls.map)
3629
+ return trace__set_ev_qualifier_bpf_filter(trace);
3630
+ if (trace->syscalls.events.sys_enter)
3631
+ return trace__set_ev_qualifier_tp_filter(trace);
3632
+ return 0;
3633
+}
3634
+
3635
+static int bpf_map__set_filter_pids(struct bpf_map *map __maybe_unused,
3636
+ size_t npids __maybe_unused, pid_t *pids __maybe_unused)
3637
+{
3638
+ int err = 0;
3639
+#ifdef HAVE_LIBBPF_SUPPORT
3640
+ bool value = true;
3641
+ int map_fd = bpf_map__fd(map);
3642
+ size_t i;
3643
+
3644
+ for (i = 0; i < npids; ++i) {
3645
+ err = bpf_map_update_elem(map_fd, &pids[i], &value, BPF_ANY);
3646
+ if (err)
3647
+ break;
3648
+ }
3649
+#endif
3650
+ return err;
3651
+}
3652
+
24153653 static int trace__set_filter_loop_pids(struct trace *trace)
24163654 {
2417
- unsigned int nr = 1;
3655
+ unsigned int nr = 1, err;
24183656 pid_t pids[32] = {
24193657 getpid(),
24203658 };
....@@ -2426,20 +3664,237 @@
24263664 if (parent == NULL)
24273665 break;
24283666
2429
- if (!strcmp(thread__comm_str(parent), "sshd")) {
3667
+ if (!strcmp(thread__comm_str(parent), "sshd") ||
3668
+ strstarts(thread__comm_str(parent), "gnome-terminal")) {
24303669 pids[nr++] = parent->tid;
24313670 break;
24323671 }
24333672 thread = parent;
24343673 }
24353674
2436
- return perf_evlist__set_filter_pids(trace->evlist, nr, pids);
3675
+ err = perf_evlist__append_tp_filter_pids(trace->evlist, nr, pids);
3676
+ if (!err && trace->filter_pids.map)
3677
+ err = bpf_map__set_filter_pids(trace->filter_pids.map, nr, pids);
3678
+
3679
+ return err;
3680
+}
3681
+
3682
+static int trace__set_filter_pids(struct trace *trace)
3683
+{
3684
+ int err = 0;
3685
+ /*
3686
+ * Better not use !target__has_task() here because we need to cover the
3687
+ * case where no threads were specified in the command line, but a
3688
+ * workload was, and in that case we will fill in the thread_map when
3689
+ * we fork the workload in perf_evlist__prepare_workload.
3690
+ */
3691
+ if (trace->filter_pids.nr > 0) {
3692
+ err = perf_evlist__append_tp_filter_pids(trace->evlist, trace->filter_pids.nr,
3693
+ trace->filter_pids.entries);
3694
+ if (!err && trace->filter_pids.map) {
3695
+ err = bpf_map__set_filter_pids(trace->filter_pids.map, trace->filter_pids.nr,
3696
+ trace->filter_pids.entries);
3697
+ }
3698
+ } else if (perf_thread_map__pid(trace->evlist->core.threads, 0) == -1) {
3699
+ err = trace__set_filter_loop_pids(trace);
3700
+ }
3701
+
3702
+ return err;
3703
+}
3704
+
3705
+static int __trace__deliver_event(struct trace *trace, union perf_event *event)
3706
+{
3707
+ struct evlist *evlist = trace->evlist;
3708
+ struct perf_sample sample;
3709
+ int err;
3710
+
3711
+ err = perf_evlist__parse_sample(evlist, event, &sample);
3712
+ if (err)
3713
+ fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
3714
+ else
3715
+ trace__handle_event(trace, event, &sample);
3716
+
3717
+ return 0;
3718
+}
3719
+
3720
+static int __trace__flush_events(struct trace *trace)
3721
+{
3722
+ u64 first = ordered_events__first_time(&trace->oe.data);
3723
+ u64 flush = trace->oe.last - NSEC_PER_SEC;
3724
+
3725
+ /* Is there some thing to flush.. */
3726
+ if (first && first < flush)
3727
+ return ordered_events__flush_time(&trace->oe.data, flush);
3728
+
3729
+ return 0;
3730
+}
3731
+
3732
+static int trace__flush_events(struct trace *trace)
3733
+{
3734
+ return !trace->sort_events ? 0 : __trace__flush_events(trace);
3735
+}
3736
+
3737
+static int trace__deliver_event(struct trace *trace, union perf_event *event)
3738
+{
3739
+ int err;
3740
+
3741
+ if (!trace->sort_events)
3742
+ return __trace__deliver_event(trace, event);
3743
+
3744
+ err = perf_evlist__parse_sample_timestamp(trace->evlist, event, &trace->oe.last);
3745
+ if (err && err != -1)
3746
+ return err;
3747
+
3748
+ err = ordered_events__queue(&trace->oe.data, event, trace->oe.last, 0);
3749
+ if (err)
3750
+ return err;
3751
+
3752
+ return trace__flush_events(trace);
3753
+}
3754
+
3755
+static int ordered_events__deliver_event(struct ordered_events *oe,
3756
+ struct ordered_event *event)
3757
+{
3758
+ struct trace *trace = container_of(oe, struct trace, oe.data);
3759
+
3760
+ return __trace__deliver_event(trace, event->event);
3761
+}
3762
+
3763
+static struct syscall_arg_fmt *evsel__find_syscall_arg_fmt_by_name(struct evsel *evsel, char *arg)
3764
+{
3765
+ struct tep_format_field *field;
3766
+ struct syscall_arg_fmt *fmt = __evsel__syscall_arg_fmt(evsel);
3767
+
3768
+ if (evsel->tp_format == NULL || fmt == NULL)
3769
+ return NULL;
3770
+
3771
+ for (field = evsel->tp_format->format.fields; field; field = field->next, ++fmt)
3772
+ if (strcmp(field->name, arg) == 0)
3773
+ return fmt;
3774
+
3775
+ return NULL;
3776
+}
3777
+
3778
+static int trace__expand_filter(struct trace *trace __maybe_unused, struct evsel *evsel)
3779
+{
3780
+ char *tok, *left = evsel->filter, *new_filter = evsel->filter;
3781
+
3782
+ while ((tok = strpbrk(left, "=<>!")) != NULL) {
3783
+ char *right = tok + 1, *right_end;
3784
+
3785
+ if (*right == '=')
3786
+ ++right;
3787
+
3788
+ while (isspace(*right))
3789
+ ++right;
3790
+
3791
+ if (*right == '\0')
3792
+ break;
3793
+
3794
+ while (!isalpha(*left))
3795
+ if (++left == tok) {
3796
+ /*
3797
+ * Bail out, can't find the name of the argument that is being
3798
+ * used in the filter, let it try to set this filter, will fail later.
3799
+ */
3800
+ return 0;
3801
+ }
3802
+
3803
+ right_end = right + 1;
3804
+ while (isalnum(*right_end) || *right_end == '_' || *right_end == '|')
3805
+ ++right_end;
3806
+
3807
+ if (isalpha(*right)) {
3808
+ struct syscall_arg_fmt *fmt;
3809
+ int left_size = tok - left,
3810
+ right_size = right_end - right;
3811
+ char arg[128];
3812
+
3813
+ while (isspace(left[left_size - 1]))
3814
+ --left_size;
3815
+
3816
+ scnprintf(arg, sizeof(arg), "%.*s", left_size, left);
3817
+
3818
+ fmt = evsel__find_syscall_arg_fmt_by_name(evsel, arg);
3819
+ if (fmt == NULL) {
3820
+ pr_err("\"%s\" not found in \"%s\", can't set filter \"%s\"\n",
3821
+ arg, evsel->name, evsel->filter);
3822
+ return -1;
3823
+ }
3824
+
3825
+ pr_debug2("trying to expand \"%s\" \"%.*s\" \"%.*s\" -> ",
3826
+ arg, (int)(right - tok), tok, right_size, right);
3827
+
3828
+ if (fmt->strtoul) {
3829
+ u64 val;
3830
+ struct syscall_arg syscall_arg = {
3831
+ .parm = fmt->parm,
3832
+ };
3833
+
3834
+ if (fmt->strtoul(right, right_size, &syscall_arg, &val)) {
3835
+ char *n, expansion[19];
3836
+ int expansion_lenght = scnprintf(expansion, sizeof(expansion), "%#" PRIx64, val);
3837
+ int expansion_offset = right - new_filter;
3838
+
3839
+ pr_debug("%s", expansion);
3840
+
3841
+ if (asprintf(&n, "%.*s%s%s", expansion_offset, new_filter, expansion, right_end) < 0) {
3842
+ pr_debug(" out of memory!\n");
3843
+ free(new_filter);
3844
+ return -1;
3845
+ }
3846
+ if (new_filter != evsel->filter)
3847
+ free(new_filter);
3848
+ left = n + expansion_offset + expansion_lenght;
3849
+ new_filter = n;
3850
+ } else {
3851
+ pr_err("\"%.*s\" not found for \"%s\" in \"%s\", can't set filter \"%s\"\n",
3852
+ right_size, right, arg, evsel->name, evsel->filter);
3853
+ return -1;
3854
+ }
3855
+ } else {
3856
+ pr_err("No resolver (strtoul) for \"%s\" in \"%s\", can't set filter \"%s\"\n",
3857
+ arg, evsel->name, evsel->filter);
3858
+ return -1;
3859
+ }
3860
+
3861
+ pr_debug("\n");
3862
+ } else {
3863
+ left = right_end;
3864
+ }
3865
+ }
3866
+
3867
+ if (new_filter != evsel->filter) {
3868
+ pr_debug("New filter for %s: %s\n", evsel->name, new_filter);
3869
+ evsel__set_filter(evsel, new_filter);
3870
+ free(new_filter);
3871
+ }
3872
+
3873
+ return 0;
3874
+}
3875
+
3876
+static int trace__expand_filters(struct trace *trace, struct evsel **err_evsel)
3877
+{
3878
+ struct evlist *evlist = trace->evlist;
3879
+ struct evsel *evsel;
3880
+
3881
+ evlist__for_each_entry(evlist, evsel) {
3882
+ if (evsel->filter == NULL)
3883
+ continue;
3884
+
3885
+ if (trace__expand_filter(trace, evsel)) {
3886
+ *err_evsel = evsel;
3887
+ return -1;
3888
+ }
3889
+ }
3890
+
3891
+ return 0;
24373892 }
24383893
24393894 static int trace__run(struct trace *trace, int argc, const char **argv)
24403895 {
2441
- struct perf_evlist *evlist = trace->evlist;
2442
- struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
3896
+ struct evlist *evlist = trace->evlist;
3897
+ struct evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
24433898 int err = -1, i;
24443899 unsigned long before;
24453900 const bool forks = argc > 0;
....@@ -2447,33 +3902,33 @@
24473902
24483903 trace->live = true;
24493904
2450
- if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2451
- goto out_error_raw_syscalls;
3905
+ if (!trace->raw_augmented_syscalls) {
3906
+ if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
3907
+ goto out_error_raw_syscalls;
24523908
2453
- if (trace->trace_syscalls)
2454
- trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
3909
+ if (trace->trace_syscalls)
3910
+ trace->vfs_getname = evlist__add_vfs_getname(evlist);
3911
+ }
24553912
24563913 if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
2457
- pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
3914
+ pgfault_maj = evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
24583915 if (pgfault_maj == NULL)
24593916 goto out_error_mem;
2460
- perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
2461
- perf_evlist__add(evlist, pgfault_maj);
3917
+ evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
3918
+ evlist__add(evlist, pgfault_maj);
24623919 }
24633920
24643921 if ((trace->trace_pgfaults & TRACE_PFMIN)) {
2465
- pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
3922
+ pgfault_min = evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
24663923 if (pgfault_min == NULL)
24673924 goto out_error_mem;
2468
- perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
2469
- perf_evlist__add(evlist, pgfault_min);
3925
+ evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
3926
+ evlist__add(evlist, pgfault_min);
24703927 }
24713928
24723929 if (trace->sched &&
2473
- perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2474
- trace__sched_stat_runtime))
3930
+ evlist__add_newtp(evlist, "sched", "sched_stat_runtime", trace__sched_stat_runtime))
24753931 goto out_error_sched_stat_runtime;
2476
-
24773932 /*
24783933 * If a global cgroup was set, apply it to all the events without an
24793934 * explicit cgroup. I.e.:
....@@ -2528,7 +3983,7 @@
25283983 }
25293984 }
25303985
2531
- err = perf_evlist__open(evlist);
3986
+ err = evlist__open(evlist);
25323987 if (err < 0)
25333988 goto out_error_open;
25343989
....@@ -2542,108 +3997,122 @@
25423997 goto out_error_open;
25433998 }
25443999
2545
- /*
2546
- * Better not use !target__has_task() here because we need to cover the
2547
- * case where no threads were specified in the command line, but a
2548
- * workload was, and in that case we will fill in the thread_map when
2549
- * we fork the workload in perf_evlist__prepare_workload.
2550
- */
2551
- if (trace->filter_pids.nr > 0)
2552
- err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2553
- else if (thread_map__pid(evlist->threads, 0) == -1)
2554
- err = trace__set_filter_loop_pids(trace);
2555
-
4000
+ err = trace__set_filter_pids(trace);
25564001 if (err < 0)
25574002 goto out_error_mem;
4003
+
4004
+ if (trace->syscalls.map)
4005
+ trace__init_syscalls_bpf_map(trace);
4006
+
4007
+ if (trace->syscalls.prog_array.sys_enter)
4008
+ trace__init_syscalls_bpf_prog_array_maps(trace);
25584009
25594010 if (trace->ev_qualifier_ids.nr > 0) {
25604011 err = trace__set_ev_qualifier_filter(trace);
25614012 if (err < 0)
25624013 goto out_errno;
25634014
2564
- pr_debug("event qualifier tracepoint filter: %s\n",
2565
- trace->syscalls.events.sys_exit->filter);
4015
+ if (trace->syscalls.events.sys_exit) {
4016
+ pr_debug("event qualifier tracepoint filter: %s\n",
4017
+ trace->syscalls.events.sys_exit->filter);
4018
+ }
25664019 }
25674020
4021
+ /*
4022
+ * If the "close" syscall is not traced, then we will not have the
4023
+ * opportunity to, in syscall_arg__scnprintf_close_fd() invalidate the
4024
+ * fd->pathname table and were ending up showing the last value set by
4025
+ * syscalls opening a pathname and associating it with a descriptor or
4026
+ * reading it from /proc/pid/fd/ in cases where that doesn't make
4027
+ * sense.
4028
+ *
4029
+ * So just disable this beautifier (SCA_FD, SCA_FDAT) when 'close' is
4030
+ * not in use.
4031
+ */
4032
+ trace->fd_path_disabled = !trace__syscall_enabled(trace, syscalltbl__id(trace->sctbl, "close"));
4033
+
4034
+ err = trace__expand_filters(trace, &evsel);
4035
+ if (err)
4036
+ goto out_delete_evlist;
25684037 err = perf_evlist__apply_filters(evlist, &evsel);
25694038 if (err < 0)
25704039 goto out_error_apply_filters;
25714040
2572
- err = perf_evlist__mmap(evlist, trace->opts.mmap_pages);
4041
+ if (trace->dump.map)
4042
+ bpf_map__fprintf(trace->dump.map, trace->output);
4043
+
4044
+ err = evlist__mmap(evlist, trace->opts.mmap_pages);
25734045 if (err < 0)
25744046 goto out_error_mmap;
25754047
25764048 if (!target__none(&trace->opts.target) && !trace->opts.initial_delay)
2577
- perf_evlist__enable(evlist);
4049
+ evlist__enable(evlist);
25784050
25794051 if (forks)
25804052 perf_evlist__start_workload(evlist);
25814053
25824054 if (trace->opts.initial_delay) {
25834055 usleep(trace->opts.initial_delay * 1000);
2584
- perf_evlist__enable(evlist);
4056
+ evlist__enable(evlist);
25854057 }
25864058
2587
- trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2588
- evlist->threads->nr > 1 ||
2589
- perf_evlist__first(evlist)->attr.inherit;
4059
+ trace->multiple_threads = perf_thread_map__pid(evlist->core.threads, 0) == -1 ||
4060
+ evlist->core.threads->nr > 1 ||
4061
+ evlist__first(evlist)->core.attr.inherit;
25904062
25914063 /*
2592
- * Now that we already used evsel->attr to ask the kernel to setup the
2593
- * events, lets reuse evsel->attr.sample_max_stack as the limit in
4064
+ * Now that we already used evsel->core.attr to ask the kernel to setup the
4065
+ * events, lets reuse evsel->core.attr.sample_max_stack as the limit in
25944066 * trace__resolve_callchain(), allowing per-event max-stack settings
2595
- * to override an explicitely set --max-stack global setting.
4067
+ * to override an explicitly set --max-stack global setting.
25964068 */
25974069 evlist__for_each_entry(evlist, evsel) {
25984070 if (evsel__has_callchain(evsel) &&
2599
- evsel->attr.sample_max_stack == 0)
2600
- evsel->attr.sample_max_stack = trace->max_stack;
4071
+ evsel->core.attr.sample_max_stack == 0)
4072
+ evsel->core.attr.sample_max_stack = trace->max_stack;
26014073 }
26024074 again:
26034075 before = trace->nr_events;
26044076
2605
- for (i = 0; i < evlist->nr_mmaps; i++) {
4077
+ for (i = 0; i < evlist->core.nr_mmaps; i++) {
26064078 union perf_event *event;
2607
- struct perf_mmap *md;
4079
+ struct mmap *md;
26084080
26094081 md = &evlist->mmap[i];
2610
- if (perf_mmap__read_init(md) < 0)
4082
+ if (perf_mmap__read_init(&md->core) < 0)
26114083 continue;
26124084
2613
- while ((event = perf_mmap__read_event(md)) != NULL) {
2614
- struct perf_sample sample;
2615
-
4085
+ while ((event = perf_mmap__read_event(&md->core)) != NULL) {
26164086 ++trace->nr_events;
26174087
2618
- err = perf_evlist__parse_sample(evlist, event, &sample);
2619
- if (err) {
2620
- fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2621
- goto next_event;
2622
- }
4088
+ err = trace__deliver_event(trace, event);
4089
+ if (err)
4090
+ goto out_disable;
26234091
2624
- trace__handle_event(trace, event, &sample);
2625
-next_event:
2626
- perf_mmap__consume(md);
4092
+ perf_mmap__consume(&md->core);
26274093
26284094 if (interrupted)
26294095 goto out_disable;
26304096
26314097 if (done && !draining) {
2632
- perf_evlist__disable(evlist);
4098
+ evlist__disable(evlist);
26334099 draining = true;
26344100 }
26354101 }
2636
- perf_mmap__read_done(md);
4102
+ perf_mmap__read_done(&md->core);
26374103 }
26384104
26394105 if (trace->nr_events == before) {
26404106 int timeout = done ? 100 : -1;
26414107
2642
- if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2643
- if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
4108
+ if (!draining && evlist__poll(evlist, timeout) > 0) {
4109
+ if (evlist__filter_pollfd(evlist, POLLERR | POLLHUP | POLLNVAL) == 0)
26444110 draining = true;
26454111
26464112 goto again;
4113
+ } else {
4114
+ if (trace__flush_events(trace))
4115
+ goto out_disable;
26474116 }
26484117 } else {
26494118 goto again;
....@@ -2652,7 +4121,10 @@
26524121 out_disable:
26534122 thread__zput(trace->current);
26544123
2655
- perf_evlist__disable(evlist);
4124
+ evlist__disable(evlist);
4125
+
4126
+ if (trace->sort_events)
4127
+ ordered_events__flush(&trace->oe.data, OE_FLUSH__FINAL);
26564128
26574129 if (!err) {
26584130 if (trace->summary)
....@@ -2670,7 +4142,7 @@
26704142 out_delete_evlist:
26714143 trace__symbols__exit(trace);
26724144
2673
- perf_evlist__delete(evlist);
4145
+ evlist__delete(evlist);
26744146 cgroup__put(trace->cgroup);
26754147 trace->evlist = NULL;
26764148 trace->live = false;
....@@ -2687,11 +4159,11 @@
26874159 goto out_error;
26884160
26894161 out_error_mmap:
2690
- perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
4162
+ evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
26914163 goto out_error;
26924164
26934165 out_error_open:
2694
- perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
4166
+ evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
26954167
26964168 out_error:
26974169 fprintf(trace->output, "%s\n", errbuf);
....@@ -2700,7 +4172,7 @@
27004172 out_error_apply_filters:
27014173 fprintf(trace->output,
27024174 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2703
- evsel->filter, perf_evsel__name(evsel), errno,
4175
+ evsel->filter, evsel__name(evsel), errno,
27044176 str_error_r(errno, errbuf, sizeof(errbuf)));
27054177 goto out_delete_evlist;
27064178 }
....@@ -2715,18 +4187,16 @@
27154187
27164188 static int trace__replay(struct trace *trace)
27174189 {
2718
- const struct perf_evsel_str_handler handlers[] = {
4190
+ const struct evsel_str_handler handlers[] = {
27194191 { "probe:vfs_getname", trace__vfs_getname, },
27204192 };
27214193 struct perf_data data = {
2722
- .file = {
2723
- .path = input_name,
2724
- },
2725
- .mode = PERF_DATA_MODE_READ,
2726
- .force = trace->force,
4194
+ .path = input_name,
4195
+ .mode = PERF_DATA_MODE_READ,
4196
+ .force = trace->force,
27274197 };
27284198 struct perf_session *session;
2729
- struct perf_evsel *evsel;
4199
+ struct evsel *evsel;
27304200 int err = -1;
27314201
27324202 trace->tool.sample = trace__process_sample;
....@@ -2747,8 +4217,8 @@
27474217 trace->multiple_threads = true;
27484218
27494219 session = perf_session__new(&data, false, &trace->tool);
2750
- if (session == NULL)
2751
- return -1;
4220
+ if (IS_ERR(session))
4221
+ return PTR_ERR(session);
27524222
27534223 if (trace->opts.target.pid)
27544224 symbol_conf.pid_list_str = strdup(trace->opts.target.pid);
....@@ -2773,7 +4243,7 @@
27734243 "syscalls:sys_enter");
27744244
27754245 if (evsel &&
2776
- (perf_evsel__init_raw_syscall_tp(evsel, trace__sys_enter) < 0 ||
4246
+ (evsel__init_raw_syscall_tp(evsel, trace__sys_enter) < 0 ||
27774247 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
27784248 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
27794249 goto out;
....@@ -2785,17 +4255,17 @@
27854255 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
27864256 "syscalls:sys_exit");
27874257 if (evsel &&
2788
- (perf_evsel__init_raw_syscall_tp(evsel, trace__sys_exit) < 0 ||
4258
+ (evsel__init_raw_syscall_tp(evsel, trace__sys_exit) < 0 ||
27894259 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
27904260 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
27914261 goto out;
27924262 }
27934263
27944264 evlist__for_each_entry(session->evlist, evsel) {
2795
- if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2796
- (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2797
- evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2798
- evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
4265
+ if (evsel->core.attr.type == PERF_TYPE_SOFTWARE &&
4266
+ (evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
4267
+ evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
4268
+ evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS))
27994269 evsel->handler = trace__pgfault;
28004270 }
28014271
....@@ -2824,17 +4294,17 @@
28244294 }
28254295
28264296 DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
2827
- struct stats *stats;
2828
- double msecs;
2829
- int syscall;
4297
+ struct syscall_stats *stats;
4298
+ double msecs;
4299
+ int syscall;
28304300 )
28314301 {
28324302 struct int_node *source = rb_entry(nd, struct int_node, rb_node);
2833
- struct stats *stats = source->priv;
4303
+ struct syscall_stats *stats = source->priv;
28344304
28354305 entry->syscall = source->i;
28364306 entry->stats = stats;
2837
- entry->msecs = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0;
4307
+ entry->msecs = stats ? (u64)stats->stats.n * (avg_stats(&stats->stats) / NSEC_PER_MSEC) : 0;
28384308 }
28394309
28404310 static size_t thread__dump_stats(struct thread_trace *ttrace,
....@@ -2850,27 +4320,37 @@
28504320
28514321 printed += fprintf(fp, "\n");
28524322
2853
- printed += fprintf(fp, " syscall calls total min avg max stddev\n");
2854
- printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
2855
- printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
4323
+ printed += fprintf(fp, " syscall calls errors total min avg max stddev\n");
4324
+ printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
4325
+ printed += fprintf(fp, " --------------- -------- ------ -------- --------- --------- --------- ------\n");
28564326
28574327 resort_rb__for_each_entry(nd, syscall_stats) {
2858
- struct stats *stats = syscall_stats_entry->stats;
4328
+ struct syscall_stats *stats = syscall_stats_entry->stats;
28594329 if (stats) {
2860
- double min = (double)(stats->min) / NSEC_PER_MSEC;
2861
- double max = (double)(stats->max) / NSEC_PER_MSEC;
2862
- double avg = avg_stats(stats);
4330
+ double min = (double)(stats->stats.min) / NSEC_PER_MSEC;
4331
+ double max = (double)(stats->stats.max) / NSEC_PER_MSEC;
4332
+ double avg = avg_stats(&stats->stats);
28634333 double pct;
2864
- u64 n = (u64) stats->n;
4334
+ u64 n = (u64)stats->stats.n;
28654335
2866
- pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
4336
+ pct = avg ? 100.0 * stddev_stats(&stats->stats) / avg : 0.0;
28674337 avg /= NSEC_PER_MSEC;
28684338
28694339 sc = &trace->syscalls.table[syscall_stats_entry->syscall];
28704340 printed += fprintf(fp, " %-15s", sc->name);
2871
- printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
2872
- n, syscall_stats_entry->msecs, min, avg);
4341
+ printed += fprintf(fp, " %8" PRIu64 " %6" PRIu64 " %9.3f %9.3f %9.3f",
4342
+ n, stats->nr_failures, syscall_stats_entry->msecs, min, avg);
28734343 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
4344
+
4345
+ if (trace->errno_summary && stats->nr_failures) {
4346
+ const char *arch_name = perf_env__arch(trace->host->env);
4347
+ int e;
4348
+
4349
+ for (e = 0; e < stats->max_errno; ++e) {
4350
+ if (stats->errnos[e] != 0)
4351
+ fprintf(fp, "\t\t\t\t%s: %d\n", arch_syscalls__strerrno(arch_name, e + 1), stats->errnos[e]);
4352
+ }
4353
+ }
28744354 }
28754355 }
28764356
....@@ -2951,8 +4431,8 @@
29514431 return 0;
29524432 }
29534433
2954
-static int trace__set_filter_pids(const struct option *opt, const char *str,
2955
- int unset __maybe_unused)
4434
+static int trace__set_filter_pids_from_option(const struct option *opt, const char *str,
4435
+ int unset __maybe_unused)
29564436 {
29574437 int ret = -1;
29584438 size_t i;
....@@ -3017,38 +4497,65 @@
30174497 return 0;
30184498 }
30194499
3020
-static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
4500
+static void evlist__set_default_evsel_handler(struct evlist *evlist, void *handler)
30214501 {
3022
- struct perf_evsel *evsel;
4502
+ struct evsel *evsel;
30234503
3024
- evlist__for_each_entry(evlist, evsel)
3025
- evsel->handler = handler;
4504
+ evlist__for_each_entry(evlist, evsel) {
4505
+ if (evsel->handler == NULL)
4506
+ evsel->handler = handler;
4507
+ }
30264508 }
30274509
3028
-static int evlist__set_syscall_tp_fields(struct perf_evlist *evlist)
4510
+static void evsel__set_syscall_arg_fmt(struct evsel *evsel, const char *name)
30294511 {
3030
- struct perf_evsel *evsel;
4512
+ struct syscall_arg_fmt *fmt = evsel__syscall_arg_fmt(evsel);
4513
+
4514
+ if (fmt) {
4515
+ struct syscall_fmt *scfmt = syscall_fmt__find(name);
4516
+
4517
+ if (scfmt) {
4518
+ int skip = 0;
4519
+
4520
+ if (strcmp(evsel->tp_format->format.fields->name, "__syscall_nr") == 0 ||
4521
+ strcmp(evsel->tp_format->format.fields->name, "nr") == 0)
4522
+ ++skip;
4523
+
4524
+ memcpy(fmt + skip, scfmt->arg, (evsel->tp_format->format.nr_fields - skip) * sizeof(*fmt));
4525
+ }
4526
+ }
4527
+}
4528
+
4529
+static int evlist__set_syscall_tp_fields(struct evlist *evlist)
4530
+{
4531
+ struct evsel *evsel;
30314532
30324533 evlist__for_each_entry(evlist, evsel) {
30334534 if (evsel->priv || !evsel->tp_format)
30344535 continue;
30354536
3036
- if (strcmp(evsel->tp_format->system, "syscalls"))
4537
+ if (strcmp(evsel->tp_format->system, "syscalls")) {
4538
+ evsel__init_tp_arg_scnprintf(evsel);
30374539 continue;
4540
+ }
30384541
3039
- if (perf_evsel__init_syscall_tp(evsel))
4542
+ if (evsel__init_syscall_tp(evsel))
30404543 return -1;
30414544
30424545 if (!strncmp(evsel->tp_format->name, "sys_enter_", 10)) {
3043
- struct syscall_tp *sc = evsel->priv;
4546
+ struct syscall_tp *sc = __evsel__syscall_tp(evsel);
30444547
30454548 if (__tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64)))
30464549 return -1;
4550
+
4551
+ evsel__set_syscall_arg_fmt(evsel, evsel->tp_format->name + sizeof("sys_enter_") - 1);
30474552 } else if (!strncmp(evsel->tp_format->name, "sys_exit_", 9)) {
3048
- struct syscall_tp *sc = evsel->priv;
4553
+ struct syscall_tp *sc = __evsel__syscall_tp(evsel);
30494554
30504555 if (__tp_field__init_uint(&sc->ret, sizeof(u64), sc->id.offset + sizeof(u64), evsel->needs_swap))
30514556 return -1;
4557
+
4558
+ evsel__set_syscall_arg_fmt(evsel, evsel->tp_format->name + sizeof("sys_exit_") - 1);
30524559 }
30534560 }
30544561
....@@ -3072,6 +4579,7 @@
30724579 int len = strlen(str) + 1, err = -1, list, idx;
30734580 char *strace_groups_dir = system_path(STRACE_GROUPS_DIR);
30744581 char group_name[PATH_MAX];
4582
+ struct syscall_fmt *fmt;
30754583
30764584 if (strace_groups_dir == NULL)
30774585 return -1;
....@@ -3089,12 +4597,19 @@
30894597 if (syscalltbl__id(trace->sctbl, s) >= 0 ||
30904598 syscalltbl__strglobmatch_first(trace->sctbl, s, &idx) >= 0) {
30914599 list = 1;
4600
+ goto do_concat;
4601
+ }
4602
+
4603
+ fmt = syscall_fmt__find_by_alias(s);
4604
+ if (fmt != NULL) {
4605
+ list = 1;
4606
+ s = fmt->name;
30924607 } else {
30934608 path__join(group_name, sizeof(group_name), strace_groups_dir, s);
30944609 if (access(group_name, R_OK) == 0)
30954610 list = 1;
30964611 }
3097
-
4612
+do_concat:
30984613 if (lists[list]) {
30994614 sprintf(lists[list] + strlen(lists[list]), ",%s", s);
31004615 } else {
....@@ -3130,9 +4645,9 @@
31304645 err = 0;
31314646
31324647 if (lists[0]) {
3133
- struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event",
3134
- "event selector. use 'perf list' to list available events",
3135
- parse_events_option);
4648
+ struct option o = {
4649
+ .value = &trace->evlist,
4650
+ };
31364651 err = parse_events_option(&o, lists[0], 0);
31374652 }
31384653 out:
....@@ -3146,12 +4661,59 @@
31464661 {
31474662 struct trace *trace = opt->value;
31484663
3149
- if (!list_empty(&trace->evlist->entries))
3150
- return parse_cgroups(opt, str, unset);
3151
-
4664
+ if (!list_empty(&trace->evlist->core.entries)) {
4665
+ struct option o = {
4666
+ .value = &trace->evlist,
4667
+ };
4668
+ return parse_cgroups(&o, str, unset);
4669
+ }
31524670 trace->cgroup = evlist__findnew_cgroup(trace->evlist, str);
31534671
31544672 return 0;
4673
+}
4674
+
4675
+static int trace__config(const char *var, const char *value, void *arg)
4676
+{
4677
+ struct trace *trace = arg;
4678
+ int err = 0;
4679
+
4680
+ if (!strcmp(var, "trace.add_events")) {
4681
+ trace->perfconfig_events = strdup(value);
4682
+ if (trace->perfconfig_events == NULL) {
4683
+ pr_err("Not enough memory for %s\n", "trace.add_events");
4684
+ return -1;
4685
+ }
4686
+ } else if (!strcmp(var, "trace.show_timestamp")) {
4687
+ trace->show_tstamp = perf_config_bool(var, value);
4688
+ } else if (!strcmp(var, "trace.show_duration")) {
4689
+ trace->show_duration = perf_config_bool(var, value);
4690
+ } else if (!strcmp(var, "trace.show_arg_names")) {
4691
+ trace->show_arg_names = perf_config_bool(var, value);
4692
+ if (!trace->show_arg_names)
4693
+ trace->show_zeros = true;
4694
+ } else if (!strcmp(var, "trace.show_zeros")) {
4695
+ bool new_show_zeros = perf_config_bool(var, value);
4696
+ if (!trace->show_arg_names && !new_show_zeros) {
4697
+ pr_warning("trace.show_zeros has to be set when trace.show_arg_names=no\n");
4698
+ goto out;
4699
+ }
4700
+ trace->show_zeros = new_show_zeros;
4701
+ } else if (!strcmp(var, "trace.show_prefix")) {
4702
+ trace->show_string_prefix = perf_config_bool(var, value);
4703
+ } else if (!strcmp(var, "trace.no_inherit")) {
4704
+ trace->opts.no_inherit = perf_config_bool(var, value);
4705
+ } else if (!strcmp(var, "trace.args_alignment")) {
4706
+ int args_alignment = 0;
4707
+ if (perf_config_int(&args_alignment, var, value) == 0)
4708
+ trace->args_alignment = args_alignment;
4709
+ } else if (!strcmp(var, "trace.tracepoint_beautifiers")) {
4710
+ if (strcasecmp(value, "libtraceevent") == 0)
4711
+ trace->libtraceevent_print = true;
4712
+ else if (strcasecmp(value, "libbeauty") == 0)
4713
+ trace->libtraceevent_print = false;
4714
+ }
4715
+out:
4716
+ return err;
31554717 }
31564718
31574719 int cmd_trace(int argc, const char **argv)
....@@ -3164,9 +4726,6 @@
31644726 NULL
31654727 };
31664728 struct trace trace = {
3167
- .syscalls = {
3168
- . max = -1,
3169
- },
31704729 .opts = {
31714730 .target = {
31724731 .uid = UINT_MAX,
....@@ -3176,19 +4735,26 @@
31764735 .user_interval = ULLONG_MAX,
31774736 .no_buffering = true,
31784737 .mmap_pages = UINT_MAX,
3179
- .proc_map_timeout = 500,
31804738 },
31814739 .output = stderr,
31824740 .show_comm = true,
4741
+ .show_tstamp = true,
4742
+ .show_duration = true,
4743
+ .show_arg_names = true,
4744
+ .args_alignment = 70,
31834745 .trace_syscalls = false,
31844746 .kernel_syscallchains = false,
31854747 .max_stack = UINT_MAX,
4748
+ .max_events = ULONG_MAX,
31864749 };
4750
+ const char *map_dump_str = NULL;
31874751 const char *output_name = NULL;
31884752 const struct option trace_options[] = {
31894753 OPT_CALLBACK('e', "event", &trace, "event",
31904754 "event/syscall selector. use 'perf list' to list available events",
31914755 trace__parse_events_option),
4756
+ OPT_CALLBACK(0, "filter", &trace.evlist, "filter",
4757
+ "event filter", parse_filter),
31924758 OPT_BOOLEAN(0, "comm", &trace.show_comm,
31934759 "show the thread COMM next to its id"),
31944760 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
....@@ -3201,7 +4767,7 @@
32014767 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
32024768 "trace events on existing thread id"),
32034769 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
3204
- "pids to filter (by the kernel)", trace__set_filter_pids),
4770
+ "pids to filter (by the kernel)", trace__set_filter_pids_from_option),
32054771 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
32064772 "system-wide collection from all CPUs"),
32074773 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
....@@ -3216,6 +4782,9 @@
32164782 OPT_CALLBACK(0, "duration", &trace, "float",
32174783 "show only events with duration > N.M ms",
32184784 trace__set_duration),
4785
+#ifdef HAVE_LIBBPF_SUPPORT
4786
+ OPT_STRING(0, "map-dump", &map_dump_str, "BPF map", "BPF map to periodically dump"),
4787
+#endif
32194788 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
32204789 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
32214790 OPT_BOOLEAN('T', "time", &trace.full_time,
....@@ -3226,6 +4795,8 @@
32264795 "Show only syscall summary with statistics"),
32274796 OPT_BOOLEAN('S', "with-summary", &trace.summary,
32284797 "Show all syscalls and summary with statistics"),
4798
+ OPT_BOOLEAN(0, "errno-summary", &trace.errno_summary,
4799
+ "Show errno stats per syscall, use with -s or -S"),
32294800 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
32304801 "Trace pagefaults", parse_pagefaults, "maj"),
32314802 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
....@@ -3233,8 +4804,12 @@
32334804 OPT_CALLBACK(0, "call-graph", &trace.opts,
32344805 "record_mode[,record_size]", record_callchain_help,
32354806 &record_parse_callchain_opt),
4807
+ OPT_BOOLEAN(0, "libtraceevent_print", &trace.libtraceevent_print,
4808
+ "Use libtraceevent to print the tracepoint arguments."),
32364809 OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
32374810 "Show the kernel callchains on the syscall exit path"),
4811
+ OPT_ULONG(0, "max-events", &trace.max_events,
4812
+ "Set the maximum number of events to print, exit after that is reached. "),
32384813 OPT_UINTEGER(0, "min-stack", &trace.min_stack,
32394814 "Set the minimum stack depth when parsing the callchain, "
32404815 "anything below the specified depth will be ignored."),
....@@ -3242,20 +4817,23 @@
32424817 "Set the maximum stack depth when parsing the callchain, "
32434818 "anything beyond the specified depth will be ignored. "
32444819 "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
4820
+ OPT_BOOLEAN(0, "sort-events", &trace.sort_events,
4821
+ "Sort batch of events before processing, use if getting out of order events"),
32454822 OPT_BOOLEAN(0, "print-sample", &trace.print_sample,
32464823 "print the PERF_RECORD_SAMPLE PERF_SAMPLE_ info, for debugging"),
3247
- OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
4824
+ OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
32484825 "per thread proc mmap processing timeout in ms"),
32494826 OPT_CALLBACK('G', "cgroup", &trace, "name", "monitor event in cgroup name only",
32504827 trace__parse_cgroups),
3251
- OPT_UINTEGER('D', "delay", &trace.opts.initial_delay,
4828
+ OPT_INTEGER('D', "delay", &trace.opts.initial_delay,
32524829 "ms to wait before starting measurement after program "
32534830 "start"),
4831
+ OPTS_EVSWITCH(&trace.evswitch),
32544832 OPT_END()
32554833 };
32564834 bool __maybe_unused max_stack_user_set = true;
32574835 bool mmap_pages_user_set = true;
3258
- struct perf_evsel *evsel;
4836
+ struct evsel *evsel;
32594837 const char * const trace_subcommands[] = { "record", NULL };
32604838 int err = -1;
32614839 char bf[BUFSIZ];
....@@ -3263,7 +4841,7 @@
32634841 signal(SIGSEGV, sighandler_dump_stack);
32644842 signal(SIGFPE, sighandler_dump_stack);
32654843
3266
- trace.evlist = perf_evlist__new();
4844
+ trace.evlist = evlist__new();
32674845 trace.sctbl = syscalltbl__new();
32684846
32694847 if (trace.evlist == NULL || trace.sctbl == NULL) {
....@@ -3272,8 +4850,53 @@
32724850 goto out;
32734851 }
32744852
4853
+ /*
4854
+ * Parsing .perfconfig may entail creating a BPF event, that may need
4855
+ * to create BPF maps, so bump RLIM_MEMLOCK as the default 64K setting
4856
+ * is too small. This affects just this process, not touching the
4857
+ * global setting. If it fails we'll get something in 'perf trace -v'
4858
+ * to help diagnose the problem.
4859
+ */
4860
+ rlimit__bump_memlock();
4861
+
4862
+ err = perf_config(trace__config, &trace);
4863
+ if (err)
4864
+ goto out;
4865
+
32754866 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
32764867 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
4868
+
4869
+ /*
4870
+ * Here we already passed thru trace__parse_events_option() and it has
4871
+ * already figured out if -e syscall_name, if not but if --event
4872
+ * foo:bar was used, the user is interested _just_ in those, say,
4873
+ * tracepoint events, not in the strace-like syscall-name-based mode.
4874
+ *
4875
+ * This is important because we need to check if strace-like mode is
4876
+ * needed to decided if we should filter out the eBPF
4877
+ * __augmented_syscalls__ code, if it is in the mix, say, via
4878
+ * .perfconfig trace.add_events, and filter those out.
4879
+ */
4880
+ if (!trace.trace_syscalls && !trace.trace_pgfaults &&
4881
+ trace.evlist->core.nr_entries == 0 /* Was --events used? */) {
4882
+ trace.trace_syscalls = true;
4883
+ }
4884
+ /*
4885
+ * Now that we have --verbose figured out, lets see if we need to parse
4886
+ * events from .perfconfig, so that if those events fail parsing, say some
4887
+ * BPF program fails, then we'll be able to use --verbose to see what went
4888
+ * wrong in more detail.
4889
+ */
4890
+ if (trace.perfconfig_events != NULL) {
4891
+ struct parse_events_error parse_err;
4892
+
4893
+ bzero(&parse_err, sizeof(parse_err));
4894
+ err = parse_events(trace.evlist, trace.perfconfig_events, &parse_err);
4895
+ if (err) {
4896
+ parse_events_print_error(&parse_err, trace.perfconfig_events);
4897
+ goto out;
4898
+ }
4899
+ }
32774900
32784901 if ((nr_cgroups || trace.cgroup) && !trace.opts.target.system_wide) {
32794902 usage_with_options_msg(trace_usage, trace_options,
....@@ -3288,10 +4911,60 @@
32884911 }
32894912
32904913 if (evsel) {
3291
- if (perf_evsel__init_augmented_syscall_tp(evsel) ||
3292
- perf_evsel__init_augmented_syscall_tp_args(evsel))
3293
- goto out;
32944914 trace.syscalls.events.augmented = evsel;
4915
+
4916
+ evsel = perf_evlist__find_tracepoint_by_name(trace.evlist, "raw_syscalls:sys_enter");
4917
+ if (evsel == NULL) {
4918
+ pr_err("ERROR: raw_syscalls:sys_enter not found in the augmented BPF object\n");
4919
+ goto out;
4920
+ }
4921
+
4922
+ if (evsel->bpf_obj == NULL) {
4923
+ pr_err("ERROR: raw_syscalls:sys_enter not associated to a BPF object\n");
4924
+ goto out;
4925
+ }
4926
+
4927
+ trace.bpf_obj = evsel->bpf_obj;
4928
+
4929
+ /*
4930
+ * If we have _just_ the augmenter event but don't have a
4931
+ * explicit --syscalls, then assume we want all strace-like
4932
+ * syscalls:
4933
+ */
4934
+ if (!trace.trace_syscalls && trace__only_augmented_syscalls_evsels(&trace))
4935
+ trace.trace_syscalls = true;
4936
+ /*
4937
+ * So, if we have a syscall augmenter, but trace_syscalls, aka
4938
+ * strace-like syscall tracing is not set, then we need to trow
4939
+ * away the augmenter, i.e. all the events that were created
4940
+ * from that BPF object file.
4941
+ *
4942
+ * This is more to fix the current .perfconfig trace.add_events
4943
+ * style of setting up the strace-like eBPF based syscall point
4944
+ * payload augmenter.
4945
+ *
4946
+ * All this complexity will be avoided by adding an alternative
4947
+ * to trace.add_events in the form of
4948
+ * trace.bpf_augmented_syscalls, that will be only parsed if we
4949
+ * need it.
4950
+ *
4951
+ * .perfconfig trace.add_events is still useful if we want, for
4952
+ * instance, have msr_write.msr in some .perfconfig profile based
4953
+ * 'perf trace --config determinism.profile' mode, where for some
4954
+ * particular goal/workload type we want a set of events and
4955
+ * output mode (with timings, etc) instead of having to add
4956
+ * all via the command line.
4957
+ *
4958
+ * Also --config to specify an alternate .perfconfig file needs
4959
+ * to be implemented.
4960
+ */
4961
+ if (!trace.trace_syscalls) {
4962
+ trace__delete_augmented_syscalls(&trace);
4963
+ } else {
4964
+ trace__set_bpf_map_filtered_pids(&trace);
4965
+ trace__set_bpf_map_syscalls(&trace);
4966
+ trace.syscalls.unaugmented_prog = trace__find_bpf_program_by_title(&trace, "!raw_syscalls:unaugmented");
4967
+ }
32954968 }
32964969
32974970 err = bpf__setup_stdout(trace.evlist);
....@@ -3302,6 +4975,14 @@
33024975 }
33034976
33044977 err = -1;
4978
+
4979
+ if (map_dump_str) {
4980
+ trace.dump.map = trace__find_bpf_map_by_name(&trace, map_dump_str);
4981
+ if (trace.dump.map == NULL) {
4982
+ pr_err("ERROR: BPF map \"%s\" not found\n", map_dump_str);
4983
+ goto out;
4984
+ }
4985
+ }
33054986
33064987 if (trace.trace_pgfaults) {
33074988 trace.opts.sample_address = true;
....@@ -3329,25 +5010,106 @@
33295010 symbol_conf.use_callchain = true;
33305011 }
33315012
3332
- if (trace.evlist->nr_entries > 0) {
3333
- evlist__set_evsel_handler(trace.evlist, trace__event_handler);
5013
+ if (trace.evlist->core.nr_entries > 0) {
5014
+ evlist__set_default_evsel_handler(trace.evlist, trace__event_handler);
33345015 if (evlist__set_syscall_tp_fields(trace.evlist)) {
33355016 perror("failed to set syscalls:* tracepoint fields");
33365017 goto out;
33375018 }
33385019 }
33395020
5021
+ if (trace.sort_events) {
5022
+ ordered_events__init(&trace.oe.data, ordered_events__deliver_event, &trace);
5023
+ ordered_events__set_copy_on_queue(&trace.oe.data, true);
5024
+ }
5025
+
5026
+ /*
5027
+ * If we are augmenting syscalls, then combine what we put in the
5028
+ * __augmented_syscalls__ BPF map with what is in the
5029
+ * syscalls:sys_exit_FOO tracepoints, i.e. just like we do without BPF,
5030
+ * combining raw_syscalls:sys_enter with raw_syscalls:sys_exit.
5031
+ *
5032
+ * We'll switch to look at two BPF maps, one for sys_enter and the
5033
+ * other for sys_exit when we start augmenting the sys_exit paths with
5034
+ * buffers that are being copied from kernel to userspace, think 'read'
5035
+ * syscall.
5036
+ */
5037
+ if (trace.syscalls.events.augmented) {
5038
+ evlist__for_each_entry(trace.evlist, evsel) {
5039
+ bool raw_syscalls_sys_exit = strcmp(evsel__name(evsel), "raw_syscalls:sys_exit") == 0;
5040
+
5041
+ if (raw_syscalls_sys_exit) {
5042
+ trace.raw_augmented_syscalls = true;
5043
+ goto init_augmented_syscall_tp;
5044
+ }
5045
+
5046
+ if (trace.syscalls.events.augmented->priv == NULL &&
5047
+ strstr(evsel__name(evsel), "syscalls:sys_enter")) {
5048
+ struct evsel *augmented = trace.syscalls.events.augmented;
5049
+ if (evsel__init_augmented_syscall_tp(augmented, evsel) ||
5050
+ evsel__init_augmented_syscall_tp_args(augmented))
5051
+ goto out;
5052
+ /*
5053
+ * Augmented is __augmented_syscalls__ BPF_OUTPUT event
5054
+ * Above we made sure we can get from the payload the tp fields
5055
+ * that we get from syscalls:sys_enter tracefs format file.
5056
+ */
5057
+ augmented->handler = trace__sys_enter;
5058
+ /*
5059
+ * Now we do the same for the *syscalls:sys_enter event so that
5060
+ * if we handle it directly, i.e. if the BPF prog returns 0 so
5061
+ * as not to filter it, then we'll handle it just like we would
5062
+ * for the BPF_OUTPUT one:
5063
+ */
5064
+ if (evsel__init_augmented_syscall_tp(evsel, evsel) ||
5065
+ evsel__init_augmented_syscall_tp_args(evsel))
5066
+ goto out;
5067
+ evsel->handler = trace__sys_enter;
5068
+ }
5069
+
5070
+ if (strstarts(evsel__name(evsel), "syscalls:sys_exit_")) {
5071
+ struct syscall_tp *sc;
5072
+init_augmented_syscall_tp:
5073
+ if (evsel__init_augmented_syscall_tp(evsel, evsel))
5074
+ goto out;
5075
+ sc = __evsel__syscall_tp(evsel);
5076
+ /*
5077
+ * For now with BPF raw_augmented we hook into
5078
+ * raw_syscalls:sys_enter and there we get all
5079
+ * 6 syscall args plus the tracepoint common
5080
+ * fields and the syscall_nr (another long).
5081
+ * So we check if that is the case and if so
5082
+ * don't look after the sc->args_size but
5083
+ * always after the full raw_syscalls:sys_enter
5084
+ * payload, which is fixed.
5085
+ *
5086
+ * We'll revisit this later to pass
5087
+ * s->args_size to the BPF augmenter (now
5088
+ * tools/perf/examples/bpf/augmented_raw_syscalls.c,
5089
+ * so that it copies only what we need for each
5090
+ * syscall, like what happens when we use
5091
+ * syscalls:sys_enter_NAME, so that we reduce
5092
+ * the kernel/userspace traffic to just what is
5093
+ * needed for each syscall.
5094
+ */
5095
+ if (trace.raw_augmented_syscalls)
5096
+ trace.raw_augmented_syscalls_args_size = (6 + 1) * sizeof(long) + sc->id.offset;
5097
+ evsel__init_augmented_syscall_tp_ret(evsel);
5098
+ evsel->handler = trace__sys_exit;
5099
+ }
5100
+ }
5101
+ }
5102
+
33405103 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
33415104 return trace__record(&trace, argc-1, &argv[1]);
5105
+
5106
+ /* Using just --errno-summary will trigger --summary */
5107
+ if (trace.errno_summary && !trace.summary && !trace.summary_only)
5108
+ trace.summary_only = true;
33425109
33435110 /* summary_only implies summary option, but don't overwrite summary if set */
33445111 if (trace.summary_only)
33455112 trace.summary = trace.summary_only;
3346
-
3347
- if (!trace.trace_syscalls && !trace.trace_pgfaults &&
3348
- trace.evlist->nr_entries == 0 /* Was --events used? */) {
3349
- trace.trace_syscalls = true;
3350
- }
33515113
33525114 if (output_name != NULL) {
33535115 err = trace__open_output(&trace, output_name);
....@@ -3356,6 +5118,10 @@
33565118 goto out;
33575119 }
33585120 }
5121
+
5122
+ err = evswitch__init(&trace.evswitch, trace.evlist, stderr);
5123
+ if (err)
5124
+ goto out_close;
33595125
33605126 err = target__validate(&trace.opts.target);
33615127 if (err) {
....@@ -3383,5 +5149,6 @@
33835149 if (output_name != NULL)
33845150 fclose(trace.output);
33855151 out:
5152
+ zfree(&trace.perfconfig_events);
33865153 return err;
33875154 }