hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/tools/perf/builtin-trace.c
....@@ -12,21 +12,33 @@
1212 * Initially based on the 'trace' prototype by Thomas Gleixner:
1313 *
1414 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
15
- *
16
- * Released under the GPL v2. (and only v2, not any later version)
1715 */
1816
17
+#include "util/record.h"
1918 #include <traceevent/event-parse.h>
2019 #include <api/fs/tracing_path.h>
20
+#include <bpf/bpf.h>
21
+#include "util/bpf_map.h"
22
+#include "util/rlimit.h"
2123 #include "builtin.h"
2224 #include "util/cgroup.h"
2325 #include "util/color.h"
26
+#include "util/config.h"
2427 #include "util/debug.h"
28
+#include "util/dso.h"
2529 #include "util/env.h"
2630 #include "util/event.h"
31
+#include "util/evsel.h"
32
+#include "util/evsel_fprintf.h"
33
+#include "util/synthetic-events.h"
2734 #include "util/evlist.h"
35
+#include "util/evswitch.h"
36
+#include "util/mmap.h"
37
+#include <subcmd/pager.h>
2838 #include <subcmd/exec-cmd.h>
2939 #include "util/machine.h"
40
+#include "util/map.h"
41
+#include "util/symbol.h"
3042 #include "util/path.h"
3143 #include "util/session.h"
3244 #include "util/thread.h"
....@@ -35,6 +47,8 @@
3547 #include "util/intlist.h"
3648 #include "util/thread_map.h"
3749 #include "util/stat.h"
50
+#include "util/tool.h"
51
+#include "util/util.h"
3852 #include "trace/beauty/beauty.h"
3953 #include "trace-event.h"
4054 #include "util/parse-events.h"
....@@ -44,6 +58,7 @@
4458 #include "string2.h"
4559 #include "syscalltbl.h"
4660 #include "rb_resort.h"
61
+#include "../perf.h"
4762
4863 #include <errno.h>
4964 #include <inttypes.h>
....@@ -57,9 +72,12 @@
5772 #include <linux/random.h>
5873 #include <linux/stringify.h>
5974 #include <linux/time64.h>
75
+#include <linux/zalloc.h>
6076 #include <fcntl.h>
77
+#include <sys/sysmacros.h>
6178
62
-#include "sane_ctype.h"
79
+#include <linux/ctype.h>
80
+#include <perf/mmap.h>
6381
6482 #ifndef O_CLOEXEC
6583 # define O_CLOEXEC 02000000
....@@ -69,26 +87,65 @@
6987 # define F_LINUX_SPECIFIC_BASE 1024
7088 #endif
7189
90
+/*
91
+ * strtoul: Go from a string to a value, i.e. for msr: MSR_FS_BASE to 0xc0000100
92
+ */
93
+struct syscall_arg_fmt {
94
+ size_t (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
95
+ bool (*strtoul)(char *bf, size_t size, struct syscall_arg *arg, u64 *val);
96
+ unsigned long (*mask_val)(struct syscall_arg *arg, unsigned long val);
97
+ void *parm;
98
+ const char *name;
99
+ u16 nr_entries; // for arrays
100
+ bool show_zero;
101
+};
102
+
103
+struct syscall_fmt {
104
+ const char *name;
105
+ const char *alias;
106
+ struct {
107
+ const char *sys_enter,
108
+ *sys_exit;
109
+ } bpf_prog_name;
110
+ struct syscall_arg_fmt arg[6];
111
+ u8 nr_args;
112
+ bool errpid;
113
+ bool timeout;
114
+ bool hexret;
115
+};
116
+
72117 struct trace {
73118 struct perf_tool tool;
74119 struct syscalltbl *sctbl;
75120 struct {
76
- int max;
77121 struct syscall *table;
122
+ struct bpf_map *map;
123
+ struct { // per syscall BPF_MAP_TYPE_PROG_ARRAY
124
+ struct bpf_map *sys_enter,
125
+ *sys_exit;
126
+ } prog_array;
78127 struct {
79
- struct perf_evsel *sys_enter,
128
+ struct evsel *sys_enter,
80129 *sys_exit,
81130 *augmented;
82131 } events;
132
+ struct bpf_program *unaugmented_prog;
83133 } syscalls;
134
+ struct {
135
+ struct bpf_map *map;
136
+ } dump;
84137 struct record_opts opts;
85
- struct perf_evlist *evlist;
138
+ struct evlist *evlist;
86139 struct machine *host;
87140 struct thread *current;
141
+ struct bpf_object *bpf_obj;
88142 struct cgroup *cgroup;
89143 u64 base_time;
90144 FILE *output;
91145 unsigned long nr_events;
146
+ unsigned long nr_events_printed;
147
+ unsigned long max_events;
148
+ struct evswitch evswitch;
92149 struct strlist *ev_qualifier;
93150 struct {
94151 size_t nr;
....@@ -97,6 +154,7 @@
97154 struct {
98155 size_t nr;
99156 pid_t *entries;
157
+ struct bpf_map *map;
100158 } filter_pids;
101159 double duration_filter;
102160 double runtime_ms;
....@@ -106,6 +164,10 @@
106164 } stats;
107165 unsigned int max_stack;
108166 unsigned int min_stack;
167
+ int raw_augmented_syscalls_args_size;
168
+ bool raw_augmented_syscalls;
169
+ bool fd_path_disabled;
170
+ bool sort_events;
109171 bool not_ev_qualifier;
110172 bool live;
111173 bool full_time;
....@@ -113,15 +175,28 @@
113175 bool multiple_threads;
114176 bool summary;
115177 bool summary_only;
178
+ bool errno_summary;
116179 bool failure_only;
117180 bool show_comm;
118181 bool print_sample;
119182 bool show_tool_stats;
120183 bool trace_syscalls;
184
+ bool libtraceevent_print;
121185 bool kernel_syscallchains;
186
+ s16 args_alignment;
187
+ bool show_tstamp;
188
+ bool show_duration;
189
+ bool show_zeros;
190
+ bool show_arg_names;
191
+ bool show_string_prefix;
122192 bool force;
123193 bool vfs_getname;
124194 int trace_pgfaults;
195
+ char *perfconfig_events;
196
+ struct {
197
+ struct ordered_events data;
198
+ u64 last;
199
+ } oe;
125200 };
126201
127202 struct tp_field {
....@@ -181,7 +256,7 @@
181256 return 0;
182257 }
183258
184
-static int tp_field__init_uint(struct tp_field *field, struct format_field *format_field, bool needs_swap)
259
+static int tp_field__init_uint(struct tp_field *field, struct tep_format_field *format_field, bool needs_swap)
185260 {
186261 return __tp_field__init_uint(field, format_field->size, format_field->offset, needs_swap);
187262 }
....@@ -198,7 +273,7 @@
198273 return 0;
199274 }
200275
201
-static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
276
+static int tp_field__init_ptr(struct tp_field *field, struct tep_format_field *format_field)
202277 {
203278 return __tp_field__init_ptr(field, format_field->offset);
204279 }
....@@ -210,11 +285,90 @@
210285 };
211286 };
212287
213
-static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
214
- struct tp_field *field,
215
- const char *name)
288
+/*
289
+ * The evsel->priv as used by 'perf trace'
290
+ * sc: for raw_syscalls:sys_{enter,exit} and syscalls:sys_{enter,exit}_SYSCALLNAME
291
+ * fmt: for all the other tracepoints
292
+ */
293
+struct evsel_trace {
294
+ struct syscall_tp sc;
295
+ struct syscall_arg_fmt *fmt;
296
+};
297
+
298
+static struct evsel_trace *evsel_trace__new(void)
216299 {
217
- struct format_field *format_field = perf_evsel__field(evsel, name);
300
+ return zalloc(sizeof(struct evsel_trace));
301
+}
302
+
303
+static void evsel_trace__delete(struct evsel_trace *et)
304
+{
305
+ if (et == NULL)
306
+ return;
307
+
308
+ zfree(&et->fmt);
309
+ free(et);
310
+}
311
+
312
+/*
313
+ * Used with raw_syscalls:sys_{enter,exit} and with the
314
+ * syscalls:sys_{enter,exit}_SYSCALL tracepoints
315
+ */
316
+static inline struct syscall_tp *__evsel__syscall_tp(struct evsel *evsel)
317
+{
318
+ struct evsel_trace *et = evsel->priv;
319
+
320
+ return &et->sc;
321
+}
322
+
323
+static struct syscall_tp *evsel__syscall_tp(struct evsel *evsel)
324
+{
325
+ if (evsel->priv == NULL) {
326
+ evsel->priv = evsel_trace__new();
327
+ if (evsel->priv == NULL)
328
+ return NULL;
329
+ }
330
+
331
+ return __evsel__syscall_tp(evsel);
332
+}
333
+
334
+/*
335
+ * Used with all the other tracepoints.
336
+ */
337
+static inline struct syscall_arg_fmt *__evsel__syscall_arg_fmt(struct evsel *evsel)
338
+{
339
+ struct evsel_trace *et = evsel->priv;
340
+
341
+ return et->fmt;
342
+}
343
+
344
+static struct syscall_arg_fmt *evsel__syscall_arg_fmt(struct evsel *evsel)
345
+{
346
+ struct evsel_trace *et = evsel->priv;
347
+
348
+ if (evsel->priv == NULL) {
349
+ et = evsel->priv = evsel_trace__new();
350
+
351
+ if (et == NULL)
352
+ return NULL;
353
+ }
354
+
355
+ if (et->fmt == NULL) {
356
+ et->fmt = calloc(evsel->tp_format->format.nr_fields, sizeof(struct syscall_arg_fmt));
357
+ if (et->fmt == NULL)
358
+ goto out_delete;
359
+ }
360
+
361
+ return __evsel__syscall_arg_fmt(evsel);
362
+
363
+out_delete:
364
+ evsel_trace__delete(evsel->priv);
365
+ evsel->priv = NULL;
366
+ return NULL;
367
+}
368
+
369
+static int evsel__init_tp_uint_field(struct evsel *evsel, struct tp_field *field, const char *name)
370
+{
371
+ struct tep_format_field *format_field = evsel__field(evsel, name);
218372
219373 if (format_field == NULL)
220374 return -1;
....@@ -223,14 +377,12 @@
223377 }
224378
225379 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
226
- ({ struct syscall_tp *sc = evsel->priv;\
227
- perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
380
+ ({ struct syscall_tp *sc = __evsel__syscall_tp(evsel);\
381
+ evsel__init_tp_uint_field(evsel, &sc->name, #name); })
228382
229
-static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
230
- struct tp_field *field,
231
- const char *name)
383
+static int evsel__init_tp_ptr_field(struct evsel *evsel, struct tp_field *field, const char *name)
232384 {
233
- struct format_field *format_field = perf_evsel__field(evsel, name);
385
+ struct tep_format_field *format_field = evsel__field(evsel, name);
234386
235387 if (format_field == NULL)
236388 return -1;
....@@ -239,117 +391,136 @@
239391 }
240392
241393 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
242
- ({ struct syscall_tp *sc = evsel->priv;\
243
- perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
394
+ ({ struct syscall_tp *sc = __evsel__syscall_tp(evsel);\
395
+ evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
244396
245
-static void perf_evsel__delete_priv(struct perf_evsel *evsel)
397
+static void evsel__delete_priv(struct evsel *evsel)
246398 {
247399 zfree(&evsel->priv);
248
- perf_evsel__delete(evsel);
400
+ evsel__delete(evsel);
249401 }
250402
251
-static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel)
403
+static int evsel__init_syscall_tp(struct evsel *evsel)
252404 {
253
- struct syscall_tp *sc = evsel->priv = malloc(sizeof(struct syscall_tp));
405
+ struct syscall_tp *sc = evsel__syscall_tp(evsel);
254406
255
- if (evsel->priv != NULL) {
256
- if (perf_evsel__init_tp_uint_field(evsel, &sc->id, "__syscall_nr"))
257
- goto out_delete;
407
+ if (sc != NULL) {
408
+ if (evsel__init_tp_uint_field(evsel, &sc->id, "__syscall_nr") &&
409
+ evsel__init_tp_uint_field(evsel, &sc->id, "nr"))
410
+ return -ENOENT;
258411 return 0;
259412 }
260413
261414 return -ENOMEM;
262
-out_delete:
263
- zfree(&evsel->priv);
264
- return -ENOENT;
265415 }
266416
267
-static int perf_evsel__init_augmented_syscall_tp(struct perf_evsel *evsel)
417
+static int evsel__init_augmented_syscall_tp(struct evsel *evsel, struct evsel *tp)
268418 {
269
- struct syscall_tp *sc = evsel->priv = malloc(sizeof(struct syscall_tp));
419
+ struct syscall_tp *sc = evsel__syscall_tp(evsel);
270420
271
- if (evsel->priv != NULL) { /* field, sizeof_field, offsetof_field */
272
- if (__tp_field__init_uint(&sc->id, sizeof(long), sizeof(long long), evsel->needs_swap))
273
- goto out_delete;
421
+ if (sc != NULL) {
422
+ struct tep_format_field *syscall_id = evsel__field(tp, "id");
423
+ if (syscall_id == NULL)
424
+ syscall_id = evsel__field(tp, "__syscall_nr");
425
+ if (syscall_id == NULL ||
426
+ __tp_field__init_uint(&sc->id, syscall_id->size, syscall_id->offset, evsel->needs_swap))
427
+ return -EINVAL;
274428
275429 return 0;
276430 }
277431
278432 return -ENOMEM;
279
-out_delete:
280
- zfree(&evsel->priv);
281
- return -EINVAL;
282433 }
283434
284
-static int perf_evsel__init_augmented_syscall_tp_args(struct perf_evsel *evsel)
435
+static int evsel__init_augmented_syscall_tp_args(struct evsel *evsel)
285436 {
286
- struct syscall_tp *sc = evsel->priv;
437
+ struct syscall_tp *sc = __evsel__syscall_tp(evsel);
287438
288439 return __tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64));
289440 }
290441
291
-static int perf_evsel__init_raw_syscall_tp(struct perf_evsel *evsel, void *handler)
442
+static int evsel__init_augmented_syscall_tp_ret(struct evsel *evsel)
292443 {
293
- evsel->priv = malloc(sizeof(struct syscall_tp));
294
- if (evsel->priv != NULL) {
444
+ struct syscall_tp *sc = __evsel__syscall_tp(evsel);
445
+
446
+ return __tp_field__init_uint(&sc->ret, sizeof(u64), sc->id.offset + sizeof(u64), evsel->needs_swap);
447
+}
448
+
449
+static int evsel__init_raw_syscall_tp(struct evsel *evsel, void *handler)
450
+{
451
+ if (evsel__syscall_tp(evsel) != NULL) {
295452 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
296
- goto out_delete;
453
+ return -ENOENT;
297454
298455 evsel->handler = handler;
299456 return 0;
300457 }
301458
302459 return -ENOMEM;
303
-
304
-out_delete:
305
- zfree(&evsel->priv);
306
- return -ENOENT;
307460 }
308461
309
-static struct perf_evsel *perf_evsel__raw_syscall_newtp(const char *direction, void *handler)
462
+static struct evsel *perf_evsel__raw_syscall_newtp(const char *direction, void *handler)
310463 {
311
- struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
464
+ struct evsel *evsel = evsel__newtp("raw_syscalls", direction);
312465
313466 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
314467 if (IS_ERR(evsel))
315
- evsel = perf_evsel__newtp("syscalls", direction);
468
+ evsel = evsel__newtp("syscalls", direction);
316469
317470 if (IS_ERR(evsel))
318471 return NULL;
319472
320
- if (perf_evsel__init_raw_syscall_tp(evsel, handler))
473
+ if (evsel__init_raw_syscall_tp(evsel, handler))
321474 goto out_delete;
322475
323476 return evsel;
324477
325478 out_delete:
326
- perf_evsel__delete_priv(evsel);
479
+ evsel__delete_priv(evsel);
327480 return NULL;
328481 }
329482
330483 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
331
- ({ struct syscall_tp *fields = evsel->priv; \
484
+ ({ struct syscall_tp *fields = __evsel__syscall_tp(evsel); \
332485 fields->name.integer(&fields->name, sample); })
333486
334487 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
335
- ({ struct syscall_tp *fields = evsel->priv; \
488
+ ({ struct syscall_tp *fields = __evsel__syscall_tp(evsel); \
336489 fields->name.pointer(&fields->name, sample); })
337490
338
-size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, int val)
491
+size_t strarray__scnprintf_suffix(struct strarray *sa, char *bf, size_t size, const char *intfmt, bool show_suffix, int val)
339492 {
340493 int idx = val - sa->offset;
341494
342
- if (idx < 0 || idx >= sa->nr_entries || sa->entries[idx] == NULL)
343
- return scnprintf(bf, size, intfmt, val);
495
+ if (idx < 0 || idx >= sa->nr_entries || sa->entries[idx] == NULL) {
496
+ size_t printed = scnprintf(bf, size, intfmt, val);
497
+ if (show_suffix)
498
+ printed += scnprintf(bf + printed, size - printed, " /* %s??? */", sa->prefix);
499
+ return printed;
500
+ }
344501
345
- return scnprintf(bf, size, "%s", sa->entries[idx]);
502
+ return scnprintf(bf, size, "%s%s", sa->entries[idx], show_suffix ? sa->prefix : "");
503
+}
504
+
505
+size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, bool show_prefix, int val)
506
+{
507
+ int idx = val - sa->offset;
508
+
509
+ if (idx < 0 || idx >= sa->nr_entries || sa->entries[idx] == NULL) {
510
+ size_t printed = scnprintf(bf, size, intfmt, val);
511
+ if (show_prefix)
512
+ printed += scnprintf(bf + printed, size - printed, " /* %s??? */", sa->prefix);
513
+ return printed;
514
+ }
515
+
516
+ return scnprintf(bf, size, "%s%s", show_prefix ? sa->prefix : "", sa->entries[idx]);
346517 }
347518
348519 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
349520 const char *intfmt,
350521 struct syscall_arg *arg)
351522 {
352
- return strarray__scnprintf(arg->parm, bf, size, intfmt, arg->val);
523
+ return strarray__scnprintf(arg->parm, bf, size, intfmt, arg->show_string_prefix, arg->val);
353524 }
354525
355526 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
....@@ -360,34 +531,123 @@
360531
361532 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
362533
363
-struct strarrays {
364
- int nr_entries;
365
- struct strarray **entries;
366
-};
534
+bool syscall_arg__strtoul_strarray(char *bf, size_t size, struct syscall_arg *arg, u64 *ret)
535
+{
536
+ return strarray__strtoul(arg->parm, bf, size, ret);
537
+}
367538
368
-#define DEFINE_STRARRAYS(array) struct strarrays strarrays__##array = { \
369
- .nr_entries = ARRAY_SIZE(array), \
370
- .entries = array, \
539
+bool syscall_arg__strtoul_strarray_flags(char *bf, size_t size, struct syscall_arg *arg, u64 *ret)
540
+{
541
+ return strarray__strtoul_flags(arg->parm, bf, size, ret);
542
+}
543
+
544
+bool syscall_arg__strtoul_strarrays(char *bf, size_t size, struct syscall_arg *arg, u64 *ret)
545
+{
546
+ return strarrays__strtoul(arg->parm, bf, size, ret);
547
+}
548
+
549
+size_t syscall_arg__scnprintf_strarray_flags(char *bf, size_t size, struct syscall_arg *arg)
550
+{
551
+ return strarray__scnprintf_flags(arg->parm, bf, size, arg->show_string_prefix, arg->val);
552
+}
553
+
554
+size_t strarrays__scnprintf(struct strarrays *sas, char *bf, size_t size, const char *intfmt, bool show_prefix, int val)
555
+{
556
+ size_t printed;
557
+ int i;
558
+
559
+ for (i = 0; i < sas->nr_entries; ++i) {
560
+ struct strarray *sa = sas->entries[i];
561
+ int idx = val - sa->offset;
562
+
563
+ if (idx >= 0 && idx < sa->nr_entries) {
564
+ if (sa->entries[idx] == NULL)
565
+ break;
566
+ return scnprintf(bf, size, "%s%s", show_prefix ? sa->prefix : "", sa->entries[idx]);
567
+ }
568
+ }
569
+
570
+ printed = scnprintf(bf, size, intfmt, val);
571
+ if (show_prefix)
572
+ printed += scnprintf(bf + printed, size - printed, " /* %s??? */", sas->entries[0]->prefix);
573
+ return printed;
574
+}
575
+
576
+bool strarray__strtoul(struct strarray *sa, char *bf, size_t size, u64 *ret)
577
+{
578
+ int i;
579
+
580
+ for (i = 0; i < sa->nr_entries; ++i) {
581
+ if (sa->entries[i] && strncmp(sa->entries[i], bf, size) == 0 && sa->entries[i][size] == '\0') {
582
+ *ret = sa->offset + i;
583
+ return true;
584
+ }
585
+ }
586
+
587
+ return false;
588
+}
589
+
590
+bool strarray__strtoul_flags(struct strarray *sa, char *bf, size_t size, u64 *ret)
591
+{
592
+ u64 val = 0;
593
+ char *tok = bf, *sep, *end;
594
+
595
+ *ret = 0;
596
+
597
+ while (size != 0) {
598
+ int toklen = size;
599
+
600
+ sep = memchr(tok, '|', size);
601
+ if (sep != NULL) {
602
+ size -= sep - tok + 1;
603
+
604
+ end = sep - 1;
605
+ while (end > tok && isspace(*end))
606
+ --end;
607
+
608
+ toklen = end - tok + 1;
609
+ }
610
+
611
+ while (isspace(*tok))
612
+ ++tok;
613
+
614
+ if (isalpha(*tok) || *tok == '_') {
615
+ if (!strarray__strtoul(sa, tok, toklen, &val))
616
+ return false;
617
+ } else {
618
+ bool is_hexa = tok[0] == 0 && (tok[1] = 'x' || tok[1] == 'X');
619
+
620
+ val = strtoul(tok, NULL, is_hexa ? 16 : 0);
621
+ }
622
+
623
+ *ret |= (1 << (val - 1));
624
+
625
+ if (sep == NULL)
626
+ break;
627
+ tok = sep + 1;
628
+ }
629
+
630
+ return true;
631
+}
632
+
633
+bool strarrays__strtoul(struct strarrays *sas, char *bf, size_t size, u64 *ret)
634
+{
635
+ int i;
636
+
637
+ for (i = 0; i < sas->nr_entries; ++i) {
638
+ struct strarray *sa = sas->entries[i];
639
+
640
+ if (strarray__strtoul(sa, bf, size, ret))
641
+ return true;
642
+ }
643
+
644
+ return false;
371645 }
372646
373647 size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size,
374648 struct syscall_arg *arg)
375649 {
376
- struct strarrays *sas = arg->parm;
377
- int i;
378
-
379
- for (i = 0; i < sas->nr_entries; ++i) {
380
- struct strarray *sa = sas->entries[i];
381
- int idx = arg->val - sa->offset;
382
-
383
- if (idx >= 0 && idx < sa->nr_entries) {
384
- if (sa->entries[idx] == NULL)
385
- break;
386
- return scnprintf(bf, size, "%s", sa->entries[idx]);
387
- }
388
- }
389
-
390
- return scnprintf(bf, size, "%d", arg->val);
650
+ return strarrays__scnprintf(arg->parm, bf, size, "%d", arg->show_string_prefix, arg->val);
391651 }
392652
393653 #ifndef AT_FDCWD
....@@ -398,9 +658,10 @@
398658 struct syscall_arg *arg)
399659 {
400660 int fd = arg->val;
661
+ const char *prefix = "AT_FD";
401662
402663 if (fd == AT_FDCWD)
403
- return scnprintf(bf, size, "CWD");
664
+ return scnprintf(bf, size, "%s%s", arg->show_string_prefix ? prefix : "", "CWD");
404665
405666 return syscall_arg__scnprintf_fd(bf, size, arg);
406667 }
....@@ -417,6 +678,13 @@
417678 return scnprintf(bf, size, "%#lx", arg->val);
418679 }
419680
681
+size_t syscall_arg__scnprintf_ptr(char *bf, size_t size, struct syscall_arg *arg)
682
+{
683
+ if (arg->val == 0)
684
+ return scnprintf(bf, size, "NULL");
685
+ return syscall_arg__scnprintf_hex(bf, size, arg);
686
+}
687
+
420688 size_t syscall_arg__scnprintf_int(char *bf, size_t size, struct syscall_arg *arg)
421689 {
422690 return scnprintf(bf, size, "%d", arg->val);
....@@ -427,17 +695,36 @@
427695 return scnprintf(bf, size, "%ld", arg->val);
428696 }
429697
698
+static size_t syscall_arg__scnprintf_char_array(char *bf, size_t size, struct syscall_arg *arg)
699
+{
700
+ // XXX Hey, maybe for sched:sched_switch prev/next comm fields we can
701
+ // fill missing comms using thread__set_comm()...
702
+ // here or in a special syscall_arg__scnprintf_pid_sched_tp...
703
+ return scnprintf(bf, size, "\"%-.*s\"", arg->fmt->nr_entries ?: arg->len, arg->val);
704
+}
705
+
706
+#define SCA_CHAR_ARRAY syscall_arg__scnprintf_char_array
707
+
430708 static const char *bpf_cmd[] = {
431709 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
432710 "MAP_GET_NEXT_KEY", "PROG_LOAD",
433711 };
434
-static DEFINE_STRARRAY(bpf_cmd);
712
+static DEFINE_STRARRAY(bpf_cmd, "BPF_");
713
+
714
+static const char *fsmount_flags[] = {
715
+ [1] = "CLOEXEC",
716
+};
717
+static DEFINE_STRARRAY(fsmount_flags, "FSMOUNT_");
718
+
719
+#include "trace/beauty/generated/fsconfig_arrays.c"
720
+
721
+static DEFINE_STRARRAY(fsconfig_cmds, "FSCONFIG_");
435722
436723 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
437
-static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
724
+static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, "EPOLL_CTL_", 1);
438725
439726 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
440
-static DEFINE_STRARRAY(itimers);
727
+static DEFINE_STRARRAY(itimers, "ITIMER_");
441728
442729 static const char *keyctl_options[] = {
443730 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
....@@ -446,7 +733,7 @@
446733 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
447734 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
448735 };
449
-static DEFINE_STRARRAY(keyctl_options);
736
+static DEFINE_STRARRAY(keyctl_options, "KEYCTL_");
450737
451738 static const char *whences[] = { "SET", "CUR", "END",
452739 #ifdef SEEK_DATA
....@@ -456,7 +743,7 @@
456743 "HOLE",
457744 #endif
458745 };
459
-static DEFINE_STRARRAY(whences);
746
+static DEFINE_STRARRAY(whences, "SEEK_");
460747
461748 static const char *fcntl_cmds[] = {
462749 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
....@@ -464,7 +751,7 @@
464751 "SETLK64", "SETLKW64", "SETOWN_EX", "GETOWN_EX",
465752 "GETOWNER_UIDS",
466753 };
467
-static DEFINE_STRARRAY(fcntl_cmds);
754
+static DEFINE_STRARRAY(fcntl_cmds, "F_");
468755
469756 static const char *fcntl_linux_specific_cmds[] = {
470757 "SETLEASE", "GETLEASE", "NOTIFY", [5] = "CANCELLK", "DUPFD_CLOEXEC",
....@@ -472,7 +759,7 @@
472759 "GET_RW_HINT", "SET_RW_HINT", "GET_FILE_RW_HINT", "SET_FILE_RW_HINT",
473760 };
474761
475
-static DEFINE_STRARRAY_OFFSET(fcntl_linux_specific_cmds, F_LINUX_SPECIFIC_BASE);
762
+static DEFINE_STRARRAY_OFFSET(fcntl_linux_specific_cmds, "F_", F_LINUX_SPECIFIC_BASE);
476763
477764 static struct strarray *fcntl_cmds_arrays[] = {
478765 &strarray__fcntl_cmds,
....@@ -486,39 +773,31 @@
486773 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
487774 "RTTIME",
488775 };
489
-static DEFINE_STRARRAY(rlimit_resources);
776
+static DEFINE_STRARRAY(rlimit_resources, "RLIMIT_");
490777
491778 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
492
-static DEFINE_STRARRAY(sighow);
779
+static DEFINE_STRARRAY(sighow, "SIG_");
493780
494781 static const char *clockid[] = {
495782 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
496783 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
497784 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
498785 };
499
-static DEFINE_STRARRAY(clockid);
500
-
501
-static const char *socket_families[] = {
502
- "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
503
- "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
504
- "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
505
- "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
506
- "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
507
- "ALG", "NFC", "VSOCK",
508
-};
509
-static DEFINE_STRARRAY(socket_families);
786
+static DEFINE_STRARRAY(clockid, "CLOCK_");
510787
511788 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
512789 struct syscall_arg *arg)
513790 {
791
+ bool show_prefix = arg->show_string_prefix;
792
+ const char *suffix = "_OK";
514793 size_t printed = 0;
515794 int mode = arg->val;
516795
517796 if (mode == F_OK) /* 0 */
518
- return scnprintf(bf, size, "F");
797
+ return scnprintf(bf, size, "F%s", show_prefix ? suffix : "");
519798 #define P_MODE(n) \
520799 if (mode & n##_OK) { \
521
- printed += scnprintf(bf + printed, size - printed, "%s", #n); \
800
+ printed += scnprintf(bf + printed, size - printed, "%s%s", #n, show_prefix ? suffix : ""); \
522801 mode &= ~n##_OK; \
523802 }
524803
....@@ -543,11 +822,13 @@
543822 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
544823 struct syscall_arg *arg)
545824 {
825
+ bool show_prefix = arg->show_string_prefix;
826
+ const char *prefix = "O_";
546827 int printed = 0, flags = arg->val;
547828
548829 #define P_FLAG(n) \
549830 if (flags & O_##n) { \
550
- printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
831
+ printed += scnprintf(bf + printed, size - printed, "%s%s%s", printed ? "|" : "", show_prefix ? prefix : "", #n); \
551832 flags &= ~O_##n; \
552833 }
553834
....@@ -573,11 +854,13 @@
573854 static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
574855 struct syscall_arg *arg)
575856 {
857
+ bool show_prefix = arg->show_string_prefix;
858
+ const char *prefix = "GRND_";
576859 int printed = 0, flags = arg->val;
577860
578861 #define P_FLAG(n) \
579862 if (flags & GRND_##n) { \
580
- printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
863
+ printed += scnprintf(bf + printed, size - printed, "%s%s%s", printed ? "|" : "", show_prefix ? prefix : "", #n); \
581864 flags &= ~GRND_##n; \
582865 }
583866
....@@ -595,6 +878,12 @@
595878
596879 #define STRARRAY(name, array) \
597880 { .scnprintf = SCA_STRARRAY, \
881
+ .strtoul = STUL_STRARRAY, \
882
+ .parm = &strarray__##array, }
883
+
884
+#define STRARRAY_FLAGS(name, array) \
885
+ { .scnprintf = SCA_STRARRAY_FLAGS, \
886
+ .strtoul = STUL_STRARRAY_FLAGS, \
598887 .parm = &strarray__##array, }
599888
600889 #include "trace/beauty/arch_errno_names.c"
....@@ -613,28 +902,20 @@
613902 #include "trace/beauty/socket_type.c"
614903 #include "trace/beauty/waitid_options.c"
615904
616
-struct syscall_arg_fmt {
617
- size_t (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
618
- void *parm;
619
- const char *name;
620
- bool show_zero;
621
-};
622
-
623
-static struct syscall_fmt {
624
- const char *name;
625
- const char *alias;
626
- struct syscall_arg_fmt arg[6];
627
- u8 nr_args;
628
- bool errpid;
629
- bool timeout;
630
- bool hexret;
631
-} syscall_fmts[] = {
905
+static struct syscall_fmt syscall_fmts[] = {
632906 { .name = "access",
633907 .arg = { [1] = { .scnprintf = SCA_ACCMODE, /* mode */ }, }, },
908
+ { .name = "arch_prctl",
909
+ .arg = { [0] = { .scnprintf = SCA_X86_ARCH_PRCTL_CODE, /* code */ },
910
+ [1] = { .scnprintf = SCA_PTR, /* arg2 */ }, }, },
911
+ { .name = "bind",
912
+ .arg = { [0] = { .scnprintf = SCA_INT, /* fd */ },
913
+ [1] = { .scnprintf = SCA_SOCKADDR, /* umyaddr */ },
914
+ [2] = { .scnprintf = SCA_INT, /* addrlen */ }, }, },
634915 { .name = "bpf",
635916 .arg = { [0] = STRARRAY(cmd, bpf_cmd), }, },
636917 { .name = "brk", .hexret = true,
637
- .arg = { [0] = { .scnprintf = SCA_HEX, /* brk */ }, }, },
918
+ .arg = { [0] = { .scnprintf = SCA_PTR, /* brk */ }, }, },
638919 { .name = "clock_gettime",
639920 .arg = { [0] = STRARRAY(clk_id, clockid), }, },
640921 { .name = "clone", .errpid = true, .nr_args = 5,
....@@ -645,6 +926,10 @@
645926 [4] = { .name = "tls", .scnprintf = SCA_HEX, }, }, },
646927 { .name = "close",
647928 .arg = { [0] = { .scnprintf = SCA_CLOSE_FD, /* fd */ }, }, },
929
+ { .name = "connect",
930
+ .arg = { [0] = { .scnprintf = SCA_INT, /* fd */ },
931
+ [1] = { .scnprintf = SCA_SOCKADDR, /* servaddr */ },
932
+ [2] = { .scnprintf = SCA_INT, /* addrlen */ }, }, },
648933 { .name = "epoll_ctl",
649934 .arg = { [1] = STRARRAY(op, epoll_ctl_ops), }, },
650935 { .name = "eventfd2",
....@@ -654,12 +939,22 @@
654939 { .name = "fchownat",
655940 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
656941 { .name = "fcntl",
657
- .arg = { [1] = { .scnprintf = SCA_FCNTL_CMD, /* cmd */
942
+ .arg = { [1] = { .scnprintf = SCA_FCNTL_CMD, /* cmd */
943
+ .strtoul = STUL_STRARRAYS,
658944 .parm = &strarrays__fcntl_cmds_arrays,
659945 .show_zero = true, },
660946 [2] = { .scnprintf = SCA_FCNTL_ARG, /* arg */ }, }, },
661947 { .name = "flock",
662948 .arg = { [1] = { .scnprintf = SCA_FLOCK, /* cmd */ }, }, },
949
+ { .name = "fsconfig",
950
+ .arg = { [1] = STRARRAY(cmd, fsconfig_cmds), }, },
951
+ { .name = "fsmount",
952
+ .arg = { [1] = STRARRAY_FLAGS(flags, fsmount_flags),
953
+ [2] = { .scnprintf = SCA_FSMOUNT_ATTR_FLAGS, /* attr_flags */ }, }, },
954
+ { .name = "fspick",
955
+ .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ },
956
+ [1] = { .scnprintf = SCA_FILENAME, /* path */ },
957
+ [2] = { .scnprintf = SCA_FSPICK_FLAGS, /* flags */ }, }, },
663958 { .name = "fstat", .alias = "newfstat", },
664959 { .name = "fstatat", .alias = "newfstatat", },
665960 { .name = "futex",
....@@ -710,31 +1005,33 @@
7101005 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
7111006 { .name = "mknodat",
7121007 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
713
- { .name = "mlock",
714
- .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
715
- { .name = "mlockall",
716
- .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
7171008 { .name = "mmap", .hexret = true,
7181009 /* The standard mmap maps to old_mmap on s390x */
7191010 #if defined(__s390x__)
7201011 .alias = "old_mmap",
7211012 #endif
722
- .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ },
723
- [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ },
724
- [3] = { .scnprintf = SCA_MMAP_FLAGS, /* flags */ }, }, },
1013
+ .arg = { [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ },
1014
+ [3] = { .scnprintf = SCA_MMAP_FLAGS, /* flags */
1015
+ .strtoul = STUL_STRARRAY_FLAGS,
1016
+ .parm = &strarray__mmap_flags, },
1017
+ [5] = { .scnprintf = SCA_HEX, /* offset */ }, }, },
1018
+ { .name = "mount",
1019
+ .arg = { [0] = { .scnprintf = SCA_FILENAME, /* dev_name */ },
1020
+ [3] = { .scnprintf = SCA_MOUNT_FLAGS, /* flags */
1021
+ .mask_val = SCAMV_MOUNT_FLAGS, /* flags */ }, }, },
1022
+ { .name = "move_mount",
1023
+ .arg = { [0] = { .scnprintf = SCA_FDAT, /* from_dfd */ },
1024
+ [1] = { .scnprintf = SCA_FILENAME, /* from_pathname */ },
1025
+ [2] = { .scnprintf = SCA_FDAT, /* to_dfd */ },
1026
+ [3] = { .scnprintf = SCA_FILENAME, /* to_pathname */ },
1027
+ [4] = { .scnprintf = SCA_MOVE_MOUNT_FLAGS, /* flags */ }, }, },
7251028 { .name = "mprotect",
7261029 .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ },
7271030 [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, }, },
7281031 { .name = "mq_unlink",
7291032 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* u_name */ }, }, },
7301033 { .name = "mremap", .hexret = true,
731
- .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ },
732
- [3] = { .scnprintf = SCA_MREMAP_FLAGS, /* flags */ },
733
- [4] = { .scnprintf = SCA_HEX, /* new_addr */ }, }, },
734
- { .name = "munlock",
735
- .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
736
- { .name = "munmap",
737
- .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
1034
+ .arg = { [3] = { .scnprintf = SCA_MREMAP_FLAGS, /* flags */ }, }, },
7381035 { .name = "name_to_handle_at",
7391036 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
7401037 { .name = "newfstatat",
....@@ -763,8 +1060,10 @@
7631060 [3] = { .scnprintf = SCA_INT, /* pkey */ }, }, },
7641061 { .name = "poll", .timeout = true, },
7651062 { .name = "ppoll", .timeout = true, },
766
- { .name = "prctl", .alias = "arch_prctl",
767
- .arg = { [0] = { .scnprintf = SCA_PRCTL_OPTION, /* option */ },
1063
+ { .name = "prctl",
1064
+ .arg = { [0] = { .scnprintf = SCA_PRCTL_OPTION, /* option */
1065
+ .strtoul = STUL_STRARRAY,
1066
+ .parm = &strarray__prctl_options, },
7681067 [1] = { .scnprintf = SCA_PRCTL_ARG2, /* arg2 */ },
7691068 [2] = { .scnprintf = SCA_PRCTL_ARG3, /* arg3 */ }, }, },
7701069 { .name = "pread", .alias = "pread64", },
....@@ -781,7 +1080,12 @@
7811080 { .name = "recvmsg",
7821081 .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
7831082 { .name = "renameat",
784
- .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1083
+ .arg = { [0] = { .scnprintf = SCA_FDAT, /* olddirfd */ },
1084
+ [2] = { .scnprintf = SCA_FDAT, /* newdirfd */ }, }, },
1085
+ { .name = "renameat2",
1086
+ .arg = { [0] = { .scnprintf = SCA_FDAT, /* olddirfd */ },
1087
+ [2] = { .scnprintf = SCA_FDAT, /* newdirfd */ },
1088
+ [4] = { .scnprintf = SCA_RENAMEAT2_FLAGS, /* flags */ }, }, },
7851089 { .name = "rt_sigaction",
7861090 .arg = { [0] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
7871091 { .name = "rt_sigprocmask",
....@@ -796,12 +1100,14 @@
7961100 .arg = { [0] = { .scnprintf = SCA_SECCOMP_OP, /* op */ },
7971101 [1] = { .scnprintf = SCA_SECCOMP_FLAGS, /* flags */ }, }, },
7981102 { .name = "select", .timeout = true, },
1103
+ { .name = "sendfile", .alias = "sendfile64", },
7991104 { .name = "sendmmsg",
8001105 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
8011106 { .name = "sendmsg",
8021107 .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
8031108 { .name = "sendto",
804
- .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
1109
+ .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ },
1110
+ [4] = { .scnprintf = SCA_SOCKADDR, /* addr */ }, }, },
8051111 { .name = "set_tid_address", .errpid = true, },
8061112 { .name = "setitimer",
8071113 .arg = { [0] = STRARRAY(which, itimers), }, },
....@@ -826,10 +1132,14 @@
8261132 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, },
8271133 { .name = "symlinkat",
8281134 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1135
+ { .name = "sync_file_range",
1136
+ .arg = { [3] = { .scnprintf = SCA_SYNC_FILE_RANGE_FLAGS, /* flags */ }, }, },
8291137 { .name = "tgkill",
8301138 .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
8311139 { .name = "tkill",
8321140 .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1141
+ { .name = "umount2", .alias = "umount",
1142
+ .arg = { [0] = { .scnprintf = SCA_FILENAME, /* name */ }, }, },
8331143 { .name = "uname", .alias = "newuname", },
8341144 { .name = "unlinkat",
8351145 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
....@@ -847,25 +1157,66 @@
8471157 return strcmp(name, fmt->name);
8481158 }
8491159
1160
+static struct syscall_fmt *__syscall_fmt__find(struct syscall_fmt *fmts, const int nmemb, const char *name)
1161
+{
1162
+ return bsearch(name, fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1163
+}
1164
+
8501165 static struct syscall_fmt *syscall_fmt__find(const char *name)
8511166 {
8521167 const int nmemb = ARRAY_SIZE(syscall_fmts);
853
- return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1168
+ return __syscall_fmt__find(syscall_fmts, nmemb, name);
1169
+}
1170
+
1171
+static struct syscall_fmt *__syscall_fmt__find_by_alias(struct syscall_fmt *fmts, const int nmemb, const char *alias)
1172
+{
1173
+ int i;
1174
+
1175
+ for (i = 0; i < nmemb; ++i) {
1176
+ if (fmts[i].alias && strcmp(fmts[i].alias, alias) == 0)
1177
+ return &fmts[i];
1178
+ }
1179
+
1180
+ return NULL;
1181
+}
1182
+
1183
+static struct syscall_fmt *syscall_fmt__find_by_alias(const char *alias)
1184
+{
1185
+ const int nmemb = ARRAY_SIZE(syscall_fmts);
1186
+ return __syscall_fmt__find_by_alias(syscall_fmts, nmemb, alias);
8541187 }
8551188
8561189 /*
8571190 * is_exit: is this "exit" or "exit_group"?
8581191 * is_open: is this "open" or "openat"? To associate the fd returned in sys_exit with the pathname in sys_enter.
1192
+ * args_size: sum of the sizes of the syscall arguments, anything after that is augmented stuff: pathname for openat, etc.
1193
+ * nonexistent: Just a hole in the syscall table, syscall id not allocated
8591194 */
8601195 struct syscall {
861
- struct event_format *tp_format;
1196
+ struct tep_event *tp_format;
8621197 int nr_args;
1198
+ int args_size;
1199
+ struct {
1200
+ struct bpf_program *sys_enter,
1201
+ *sys_exit;
1202
+ } bpf_prog;
8631203 bool is_exit;
8641204 bool is_open;
865
- struct format_field *args;
1205
+ bool nonexistent;
1206
+ struct tep_format_field *args;
8661207 const char *name;
8671208 struct syscall_fmt *fmt;
8681209 struct syscall_arg_fmt *arg_fmt;
1210
+};
1211
+
1212
+/*
1213
+ * Must match what is in the BPF program:
1214
+ *
1215
+ * tools/perf/examples/bpf/augmented_raw_syscalls.c
1216
+ */
1217
+struct bpf_map_syscall_entry {
1218
+ bool enabled;
1219
+ u16 string_args_len[6];
8691220 };
8701221
8711222 /*
....@@ -914,9 +1265,9 @@
9141265 char *name;
9151266 } filename;
9161267 struct {
917
- int max;
918
- char **table;
919
- } paths;
1268
+ int max;
1269
+ struct file *table;
1270
+ } files;
9201271
9211272 struct intlist *syscall_stats;
9221273 };
....@@ -925,10 +1276,10 @@
9251276 {
9261277 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
9271278
928
- if (ttrace)
929
- ttrace->paths.max = -1;
930
-
931
- ttrace->syscall_stats = intlist__new(NULL);
1279
+ if (ttrace) {
1280
+ ttrace->files.max = -1;
1281
+ ttrace->syscall_stats = intlist__new(NULL);
1282
+ }
9321283
9331284 return ttrace;
9341285 }
....@@ -970,30 +1321,51 @@
9701321
9711322 static const size_t trace__entry_str_size = 2048;
9721323
1324
+static struct file *thread_trace__files_entry(struct thread_trace *ttrace, int fd)
1325
+{
1326
+ if (fd < 0)
1327
+ return NULL;
1328
+
1329
+ if (fd > ttrace->files.max) {
1330
+ struct file *nfiles = realloc(ttrace->files.table, (fd + 1) * sizeof(struct file));
1331
+
1332
+ if (nfiles == NULL)
1333
+ return NULL;
1334
+
1335
+ if (ttrace->files.max != -1) {
1336
+ memset(nfiles + ttrace->files.max + 1, 0,
1337
+ (fd - ttrace->files.max) * sizeof(struct file));
1338
+ } else {
1339
+ memset(nfiles, 0, (fd + 1) * sizeof(struct file));
1340
+ }
1341
+
1342
+ ttrace->files.table = nfiles;
1343
+ ttrace->files.max = fd;
1344
+ }
1345
+
1346
+ return ttrace->files.table + fd;
1347
+}
1348
+
1349
+struct file *thread__files_entry(struct thread *thread, int fd)
1350
+{
1351
+ return thread_trace__files_entry(thread__priv(thread), fd);
1352
+}
1353
+
9731354 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
9741355 {
9751356 struct thread_trace *ttrace = thread__priv(thread);
1357
+ struct file *file = thread_trace__files_entry(ttrace, fd);
9761358
977
- if (fd > ttrace->paths.max) {
978
- char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
979
-
980
- if (npath == NULL)
981
- return -1;
982
-
983
- if (ttrace->paths.max != -1) {
984
- memset(npath + ttrace->paths.max + 1, 0,
985
- (fd - ttrace->paths.max) * sizeof(char *));
986
- } else {
987
- memset(npath, 0, (fd + 1) * sizeof(char *));
988
- }
989
-
990
- ttrace->paths.table = npath;
991
- ttrace->paths.max = fd;
1359
+ if (file != NULL) {
1360
+ struct stat st;
1361
+ if (stat(pathname, &st) == 0)
1362
+ file->dev_maj = major(st.st_rdev);
1363
+ file->pathname = strdup(pathname);
1364
+ if (file->pathname)
1365
+ return 0;
9921366 }
9931367
994
- ttrace->paths.table[fd] = strdup(pathname);
995
-
996
- return ttrace->paths.table[fd] != NULL ? 0 : -1;
1368
+ return -1;
9971369 }
9981370
9991371 static int thread__read_fd_path(struct thread *thread, int fd)
....@@ -1027,13 +1399,13 @@
10271399 {
10281400 struct thread_trace *ttrace = thread__priv(thread);
10291401
1030
- if (ttrace == NULL)
1402
+ if (ttrace == NULL || trace->fd_path_disabled)
10311403 return NULL;
10321404
10331405 if (fd < 0)
10341406 return NULL;
10351407
1036
- if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1408
+ if ((fd > ttrace->files.max || ttrace->files.table[fd].pathname == NULL)) {
10371409 if (!trace->live)
10381410 return NULL;
10391411 ++trace->stats.proc_getname;
....@@ -1041,7 +1413,7 @@
10411413 return NULL;
10421414 }
10431415
1044
- return ttrace->paths.table[fd];
1416
+ return ttrace->files.table[fd].pathname;
10451417 }
10461418
10471419 size_t syscall_arg__scnprintf_fd(char *bf, size_t size, struct syscall_arg *arg)
....@@ -1080,8 +1452,8 @@
10801452 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
10811453 struct thread_trace *ttrace = thread__priv(arg->thread);
10821454
1083
- if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1084
- zfree(&ttrace->paths.table[fd]);
1455
+ if (ttrace && fd >= 0 && fd <= ttrace->files.max)
1456
+ zfree(&ttrace->files.table[fd].pathname);
10851457
10861458 return printed;
10871459 }
....@@ -1095,10 +1467,29 @@
10951467 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
10961468 }
10971469
1470
+static size_t syscall_arg__scnprintf_augmented_string(struct syscall_arg *arg, char *bf, size_t size)
1471
+{
1472
+ struct augmented_arg *augmented_arg = arg->augmented.args;
1473
+ size_t printed = scnprintf(bf, size, "\"%.*s\"", augmented_arg->size, augmented_arg->value);
1474
+ /*
1475
+ * So that the next arg with a payload can consume its augmented arg, i.e. for rename* syscalls
1476
+ * we would have two strings, each prefixed by its size.
1477
+ */
1478
+ int consumed = sizeof(*augmented_arg) + augmented_arg->size;
1479
+
1480
+ arg->augmented.args = ((void *)arg->augmented.args) + consumed;
1481
+ arg->augmented.size -= consumed;
1482
+
1483
+ return printed;
1484
+}
1485
+
10981486 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
10991487 struct syscall_arg *arg)
11001488 {
11011489 unsigned long ptr = arg->val;
1490
+
1491
+ if (arg->augmented.args)
1492
+ return syscall_arg__scnprintf_augmented_string(arg, bf, size);
11021493
11031494 if (!arg->trace->vfs_getname)
11041495 return scnprintf(bf, size, "%#x", ptr);
....@@ -1142,11 +1533,9 @@
11421533 interrupted = sig == SIGINT;
11431534 }
11441535
1145
-static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1146
- u64 duration, bool duration_calculated, u64 tstamp, FILE *fp)
1536
+static size_t trace__fprintf_comm_tid(struct trace *trace, struct thread *thread, FILE *fp)
11471537 {
1148
- size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1149
- printed += fprintf_duration(duration, duration_calculated, fp);
1538
+ size_t printed = 0;
11501539
11511540 if (trace->multiple_threads) {
11521541 if (trace->show_comm)
....@@ -1155,6 +1544,18 @@
11551544 }
11561545
11571546 return printed;
1547
+}
1548
+
1549
+static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1550
+ u64 duration, bool duration_calculated, u64 tstamp, FILE *fp)
1551
+{
1552
+ size_t printed = 0;
1553
+
1554
+ if (trace->show_tstamp)
1555
+ printed = trace__fprintf_tstamp(trace, tstamp, fp);
1556
+ if (trace->show_duration)
1557
+ printed += fprintf_duration(duration, duration_calculated, fp);
1558
+ return printed + trace__fprintf_comm_tid(trace, thread, fp);
11581559 }
11591560
11601561 static int trace__process_event(struct trace *trace, struct machine *machine,
....@@ -1194,7 +1595,7 @@
11941595
11951596 if (symbol_conf.kptr_restrict) {
11961597 pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
1197
- "Check /proc/sys/kernel/kptr_restrict.\n\n"
1598
+ "Check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
11981599 "Kernel samples will not be resolved.\n");
11991600 machine->kptr_restrict_warned = true;
12001601 return NULL;
....@@ -1203,7 +1604,7 @@
12031604 return machine__resolve_kernel_addr(vmachine, addrp, modp);
12041605 }
12051606
1206
-static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1607
+static int trace__symbols_init(struct trace *trace, struct evlist *evlist)
12071608 {
12081609 int err = symbol__init(NULL);
12091610
....@@ -1219,8 +1620,8 @@
12191620 goto out;
12201621
12211622 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1222
- evlist->threads, trace__tool_process, false,
1223
- trace->opts.proc_map_timeout, 1);
1623
+ evlist->core.threads, trace__tool_process, false,
1624
+ 1);
12241625 out:
12251626 if (err)
12261627 symbol__exit();
....@@ -1256,31 +1657,60 @@
12561657 return 0;
12571658 }
12581659
1259
-static int syscall__set_arg_fmts(struct syscall *sc)
1260
-{
1261
- struct format_field *field;
1262
- int idx = 0, len;
1660
+static struct syscall_arg_fmt syscall_arg_fmts__by_name[] = {
1661
+ { .name = "msr", .scnprintf = SCA_X86_MSR, .strtoul = STUL_X86_MSR, },
1662
+ { .name = "vector", .scnprintf = SCA_X86_IRQ_VECTORS, .strtoul = STUL_X86_IRQ_VECTORS, },
1663
+};
12631664
1264
- for (field = sc->args; field; field = field->next, ++idx) {
1265
- if (sc->fmt && sc->fmt->arg[idx].scnprintf)
1665
+static int syscall_arg_fmt__cmp(const void *name, const void *fmtp)
1666
+{
1667
+ const struct syscall_arg_fmt *fmt = fmtp;
1668
+ return strcmp(name, fmt->name);
1669
+}
1670
+
1671
+static struct syscall_arg_fmt *
1672
+__syscall_arg_fmt__find_by_name(struct syscall_arg_fmt *fmts, const int nmemb, const char *name)
1673
+{
1674
+ return bsearch(name, fmts, nmemb, sizeof(struct syscall_arg_fmt), syscall_arg_fmt__cmp);
1675
+}
1676
+
1677
+static struct syscall_arg_fmt *syscall_arg_fmt__find_by_name(const char *name)
1678
+{
1679
+ const int nmemb = ARRAY_SIZE(syscall_arg_fmts__by_name);
1680
+ return __syscall_arg_fmt__find_by_name(syscall_arg_fmts__by_name, nmemb, name);
1681
+}
1682
+
1683
+static struct tep_format_field *
1684
+syscall_arg_fmt__init_array(struct syscall_arg_fmt *arg, struct tep_format_field *field)
1685
+{
1686
+ struct tep_format_field *last_field = NULL;
1687
+ int len;
1688
+
1689
+ for (; field; field = field->next, ++arg) {
1690
+ last_field = field;
1691
+
1692
+ if (arg->scnprintf)
12661693 continue;
12671694
1695
+ len = strlen(field->name);
1696
+
12681697 if (strcmp(field->type, "const char *") == 0 &&
1269
- (strcmp(field->name, "filename") == 0 ||
1270
- strcmp(field->name, "path") == 0 ||
1271
- strcmp(field->name, "pathname") == 0))
1272
- sc->arg_fmt[idx].scnprintf = SCA_FILENAME;
1273
- else if (field->flags & FIELD_IS_POINTER)
1274
- sc->arg_fmt[idx].scnprintf = syscall_arg__scnprintf_hex;
1698
+ ((len >= 4 && strcmp(field->name + len - 4, "name") == 0) ||
1699
+ strstr(field->name, "path") != NULL))
1700
+ arg->scnprintf = SCA_FILENAME;
1701
+ else if ((field->flags & TEP_FIELD_IS_POINTER) || strstr(field->name, "addr"))
1702
+ arg->scnprintf = SCA_PTR;
12751703 else if (strcmp(field->type, "pid_t") == 0)
1276
- sc->arg_fmt[idx].scnprintf = SCA_PID;
1704
+ arg->scnprintf = SCA_PID;
12771705 else if (strcmp(field->type, "umode_t") == 0)
1278
- sc->arg_fmt[idx].scnprintf = SCA_MODE_T;
1279
- else if ((strcmp(field->type, "int") == 0 ||
1706
+ arg->scnprintf = SCA_MODE_T;
1707
+ else if ((field->flags & TEP_FIELD_IS_ARRAY) && strstr(field->type, "char")) {
1708
+ arg->scnprintf = SCA_CHAR_ARRAY;
1709
+ arg->nr_entries = field->arraylen;
1710
+ } else if ((strcmp(field->type, "int") == 0 ||
12801711 strcmp(field->type, "unsigned int") == 0 ||
12811712 strcmp(field->type, "long") == 0) &&
1282
- (len = strlen(field->name)) >= 2 &&
1283
- strcmp(field->name + len - 2, "fd") == 0) {
1713
+ len >= 2 && strcmp(field->name + len - 2, "fd") == 0) {
12841714 /*
12851715 * /sys/kernel/tracing/events/syscalls/sys_enter*
12861716 * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c
....@@ -1288,9 +1718,26 @@
12881718 * 23 unsigned int
12891719 * 7 unsigned long
12901720 */
1291
- sc->arg_fmt[idx].scnprintf = SCA_FD;
1721
+ arg->scnprintf = SCA_FD;
1722
+ } else {
1723
+ struct syscall_arg_fmt *fmt = syscall_arg_fmt__find_by_name(field->name);
1724
+
1725
+ if (fmt) {
1726
+ arg->scnprintf = fmt->scnprintf;
1727
+ arg->strtoul = fmt->strtoul;
1728
+ }
12921729 }
12931730 }
1731
+
1732
+ return last_field;
1733
+}
1734
+
1735
+static int syscall__set_arg_fmts(struct syscall *sc)
1736
+{
1737
+ struct tep_format_field *last_field = syscall_arg_fmt__init_array(sc->arg_fmt, sc->args);
1738
+
1739
+ if (last_field)
1740
+ sc->args_size = last_field->offset + last_field->size;
12941741
12951742 return 0;
12961743 }
....@@ -1301,29 +1748,40 @@
13011748 struct syscall *sc;
13021749 const char *name = syscalltbl__name(trace->sctbl, id);
13031750
1304
- if (name == NULL)
1305
- return -1;
1751
+#ifdef HAVE_SYSCALL_TABLE_SUPPORT
1752
+ if (trace->syscalls.table == NULL) {
1753
+ trace->syscalls.table = calloc(trace->sctbl->syscalls.max_id + 1, sizeof(*sc));
1754
+ if (trace->syscalls.table == NULL)
1755
+ return -ENOMEM;
1756
+ }
1757
+#else
1758
+ if (id > trace->sctbl->syscalls.max_id || (id == 0 && trace->syscalls.table == NULL)) {
1759
+ // When using libaudit we don't know beforehand what is the max syscall id
1760
+ struct syscall *table = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
13061761
1307
- if (id > trace->syscalls.max) {
1308
- struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1762
+ if (table == NULL)
1763
+ return -ENOMEM;
13091764
1310
- if (nsyscalls == NULL)
1311
- return -1;
1765
+ // Need to memset from offset 0 and +1 members if brand new
1766
+ if (trace->syscalls.table == NULL)
1767
+ memset(table, 0, (id + 1) * sizeof(*sc));
1768
+ else
1769
+ memset(table + trace->sctbl->syscalls.max_id + 1, 0, (id - trace->sctbl->syscalls.max_id) * sizeof(*sc));
13121770
1313
- if (trace->syscalls.max != -1) {
1314
- memset(nsyscalls + trace->syscalls.max + 1, 0,
1315
- (id - trace->syscalls.max) * sizeof(*sc));
1316
- } else {
1317
- memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1318
- }
1771
+ trace->syscalls.table = table;
1772
+ trace->sctbl->syscalls.max_id = id;
1773
+ }
1774
+#endif
1775
+ sc = trace->syscalls.table + id;
1776
+ if (sc->nonexistent)
1777
+ return 0;
13191778
1320
- trace->syscalls.table = nsyscalls;
1321
- trace->syscalls.max = id;
1779
+ if (name == NULL) {
1780
+ sc->nonexistent = true;
1781
+ return 0;
13221782 }
13231783
1324
- sc = trace->syscalls.table + id;
13251784 sc->name = name;
1326
-
13271785 sc->fmt = syscall_fmt__find(sc->name);
13281786
13291787 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
....@@ -1335,10 +1793,10 @@
13351793 }
13361794
13371795 if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? 6 : sc->tp_format->format.nr_fields))
1338
- return -1;
1796
+ return -ENOMEM;
13391797
13401798 if (IS_ERR(sc->tp_format))
1341
- return -1;
1799
+ return PTR_ERR(sc->tp_format);
13421800
13431801 sc->args = sc->tp_format->format.fields;
13441802 /*
....@@ -1357,14 +1815,33 @@
13571815 return syscall__set_arg_fmts(sc);
13581816 }
13591817
1818
+static int evsel__init_tp_arg_scnprintf(struct evsel *evsel)
1819
+{
1820
+ struct syscall_arg_fmt *fmt = evsel__syscall_arg_fmt(evsel);
1821
+
1822
+ if (fmt != NULL) {
1823
+ syscall_arg_fmt__init_array(fmt, evsel->tp_format->format.fields);
1824
+ return 0;
1825
+ }
1826
+
1827
+ return -ENOMEM;
1828
+}
1829
+
1830
+static int intcmp(const void *a, const void *b)
1831
+{
1832
+ const int *one = a, *another = b;
1833
+
1834
+ return *one - *another;
1835
+}
1836
+
13601837 static int trace__validate_ev_qualifier(struct trace *trace)
13611838 {
1362
- int err = 0, i;
1363
- size_t nr_allocated;
1839
+ int err = 0;
1840
+ bool printed_invalid_prefix = false;
13641841 struct str_node *pos;
1842
+ size_t nr_used = 0, nr_allocated = strlist__nr_entries(trace->ev_qualifier);
13651843
1366
- trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1367
- trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1844
+ trace->ev_qualifier_ids.entries = malloc(nr_allocated *
13681845 sizeof(trace->ev_qualifier_ids.entries[0]));
13691846
13701847 if (trace->ev_qualifier_ids.entries == NULL) {
....@@ -1373,9 +1850,6 @@
13731850 err = -EINVAL;
13741851 goto out;
13751852 }
1376
-
1377
- nr_allocated = trace->ev_qualifier_ids.nr;
1378
- i = 0;
13791853
13801854 strlist__for_each_entry(pos, trace->ev_qualifier) {
13811855 const char *sc = pos->s;
....@@ -1386,17 +1860,18 @@
13861860 if (id >= 0)
13871861 goto matches;
13881862
1389
- if (err == 0) {
1390
- fputs("Error:\tInvalid syscall ", trace->output);
1391
- err = -EINVAL;
1863
+ if (!printed_invalid_prefix) {
1864
+ pr_debug("Skipping unknown syscalls: ");
1865
+ printed_invalid_prefix = true;
13921866 } else {
1393
- fputs(", ", trace->output);
1867
+ pr_debug(", ");
13941868 }
13951869
1396
- fputs(sc, trace->output);
1870
+ pr_debug("%s", sc);
1871
+ continue;
13971872 }
13981873 matches:
1399
- trace->ev_qualifier_ids.entries[i++] = id;
1874
+ trace->ev_qualifier_ids.entries[nr_used++] = id;
14001875 if (match_next == -1)
14011876 continue;
14021877
....@@ -1404,7 +1879,7 @@
14041879 id = syscalltbl__strglobmatch_next(trace->sctbl, sc, &match_next);
14051880 if (id < 0)
14061881 break;
1407
- if (nr_allocated == trace->ev_qualifier_ids.nr) {
1882
+ if (nr_allocated == nr_used) {
14081883 void *entries;
14091884
14101885 nr_allocated += 8;
....@@ -1417,20 +1892,36 @@
14171892 }
14181893 trace->ev_qualifier_ids.entries = entries;
14191894 }
1420
- trace->ev_qualifier_ids.nr++;
1421
- trace->ev_qualifier_ids.entries[i++] = id;
1895
+ trace->ev_qualifier_ids.entries[nr_used++] = id;
14221896 }
14231897 }
14241898
1425
- if (err < 0) {
1426
- fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1427
- "\nHint:\tand: 'man syscalls'\n", trace->output);
1428
-out_free:
1429
- zfree(&trace->ev_qualifier_ids.entries);
1430
- trace->ev_qualifier_ids.nr = 0;
1431
- }
1899
+ trace->ev_qualifier_ids.nr = nr_used;
1900
+ qsort(trace->ev_qualifier_ids.entries, nr_used, sizeof(int), intcmp);
14321901 out:
1902
+ if (printed_invalid_prefix)
1903
+ pr_debug("\n");
14331904 return err;
1905
+out_free:
1906
+ zfree(&trace->ev_qualifier_ids.entries);
1907
+ trace->ev_qualifier_ids.nr = 0;
1908
+ goto out;
1909
+}
1910
+
1911
+static __maybe_unused bool trace__syscall_enabled(struct trace *trace, int id)
1912
+{
1913
+ bool in_ev_qualifier;
1914
+
1915
+ if (trace->ev_qualifier_ids.nr == 0)
1916
+ return true;
1917
+
1918
+ in_ev_qualifier = bsearch(&id, trace->ev_qualifier_ids.entries,
1919
+ trace->ev_qualifier_ids.nr, sizeof(int), intcmp) != NULL;
1920
+
1921
+ if (in_ev_qualifier)
1922
+ return !trace->not_ev_qualifier;
1923
+
1924
+ return trace->not_ev_qualifier;
14341925 }
14351926
14361927 /*
....@@ -1459,31 +1950,49 @@
14591950 return scnprintf(bf, size, "arg%d: ", arg->idx);
14601951 }
14611952
1462
-static size_t syscall__scnprintf_val(struct syscall *sc, char *bf, size_t size,
1463
- struct syscall_arg *arg, unsigned long val)
1953
+/*
1954
+ * Check if the value is in fact zero, i.e. mask whatever needs masking, such
1955
+ * as mount 'flags' argument that needs ignoring some magic flag, see comment
1956
+ * in tools/perf/trace/beauty/mount_flags.c
1957
+ */
1958
+static unsigned long syscall_arg_fmt__mask_val(struct syscall_arg_fmt *fmt, struct syscall_arg *arg, unsigned long val)
14641959 {
1465
- if (sc->arg_fmt && sc->arg_fmt[arg->idx].scnprintf) {
1960
+ if (fmt && fmt->mask_val)
1961
+ return fmt->mask_val(arg, val);
1962
+
1963
+ return val;
1964
+}
1965
+
1966
+static size_t syscall_arg_fmt__scnprintf_val(struct syscall_arg_fmt *fmt, char *bf, size_t size,
1967
+ struct syscall_arg *arg, unsigned long val)
1968
+{
1969
+ if (fmt && fmt->scnprintf) {
14661970 arg->val = val;
1467
- if (sc->arg_fmt[arg->idx].parm)
1468
- arg->parm = sc->arg_fmt[arg->idx].parm;
1469
- return sc->arg_fmt[arg->idx].scnprintf(bf, size, arg);
1971
+ if (fmt->parm)
1972
+ arg->parm = fmt->parm;
1973
+ return fmt->scnprintf(bf, size, arg);
14701974 }
14711975 return scnprintf(bf, size, "%ld", val);
14721976 }
14731977
14741978 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1475
- unsigned char *args, struct trace *trace,
1476
- struct thread *thread)
1979
+ unsigned char *args, void *augmented_args, int augmented_args_size,
1980
+ struct trace *trace, struct thread *thread)
14771981 {
14781982 size_t printed = 0;
14791983 unsigned long val;
14801984 u8 bit = 1;
14811985 struct syscall_arg arg = {
14821986 .args = args,
1987
+ .augmented = {
1988
+ .size = augmented_args_size,
1989
+ .args = augmented_args,
1990
+ },
14831991 .idx = 0,
14841992 .mask = 0,
14851993 .trace = trace,
14861994 .thread = thread,
1995
+ .show_string_prefix = trace->show_string_prefix,
14871996 };
14881997 struct thread_trace *ttrace = thread__priv(thread);
14891998
....@@ -1495,14 +2004,20 @@
14952004 ttrace->ret_scnprintf = NULL;
14962005
14972006 if (sc->args != NULL) {
1498
- struct format_field *field;
2007
+ struct tep_format_field *field;
14992008
15002009 for (field = sc->args; field;
15012010 field = field->next, ++arg.idx, bit <<= 1) {
15022011 if (arg.mask & bit)
15032012 continue;
15042013
2014
+ arg.fmt = &sc->arg_fmt[arg.idx];
15052015 val = syscall_arg__val(&arg, arg.idx);
2016
+ /*
2017
+ * Some syscall args need some mask, most don't and
2018
+ * return val untouched.
2019
+ */
2020
+ val = syscall_arg_fmt__mask_val(&sc->arg_fmt[arg.idx], &arg, val);
15062021
15072022 /*
15082023 * Suppress this argument if its value is zero and
....@@ -1510,6 +2025,7 @@
15102025 * strarray for it.
15112026 */
15122027 if (val == 0 &&
2028
+ !trace->show_zeros &&
15132029 !(sc->arg_fmt &&
15142030 (sc->arg_fmt[arg.idx].show_zero ||
15152031 sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAY ||
....@@ -1517,9 +2033,13 @@
15172033 sc->arg_fmt[arg.idx].parm))
15182034 continue;
15192035
1520
- printed += scnprintf(bf + printed, size - printed,
1521
- "%s%s: ", printed ? ", " : "", field->name);
1522
- printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val);
2036
+ printed += scnprintf(bf + printed, size - printed, "%s", printed ? ", " : "");
2037
+
2038
+ if (trace->show_arg_names)
2039
+ printed += scnprintf(bf + printed, size - printed, "%s: ", field->name);
2040
+
2041
+ printed += syscall_arg_fmt__scnprintf_val(&sc->arg_fmt[arg.idx],
2042
+ bf + printed, size - printed, &arg, val);
15232043 }
15242044 } else if (IS_ERR(sc->tp_format)) {
15252045 /*
....@@ -1534,7 +2054,7 @@
15342054 if (printed)
15352055 printed += scnprintf(bf + printed, size - printed, ", ");
15362056 printed += syscall__scnprintf_name(sc, bf + printed, size - printed, &arg);
1537
- printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val);
2057
+ printed += syscall_arg_fmt__scnprintf_val(&sc->arg_fmt[arg.idx], bf + printed, size - printed, &arg, val);
15382058 next_arg:
15392059 ++arg.idx;
15402060 bit <<= 1;
....@@ -1544,13 +2064,14 @@
15442064 return printed;
15452065 }
15462066
1547
-typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
2067
+typedef int (*tracepoint_handler)(struct trace *trace, struct evsel *evsel,
15482068 union perf_event *event,
15492069 struct perf_sample *sample);
15502070
15512071 static struct syscall *trace__syscall_info(struct trace *trace,
1552
- struct perf_evsel *evsel, int id)
2072
+ struct evsel *evsel, int id)
15532073 {
2074
+ int err = 0;
15542075
15552076 if (id < 0) {
15562077
....@@ -1567,35 +2088,63 @@
15672088 if (verbose > 1) {
15682089 static u64 n;
15692090 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1570
- id, perf_evsel__name(evsel), ++n);
2091
+ id, evsel__name(evsel), ++n);
15712092 }
15722093 return NULL;
15732094 }
15742095
1575
- if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1576
- trace__read_syscall_info(trace, id))
2096
+ err = -EINVAL;
2097
+
2098
+#ifdef HAVE_SYSCALL_TABLE_SUPPORT
2099
+ if (id > trace->sctbl->syscalls.max_id) {
2100
+#else
2101
+ if (id >= trace->sctbl->syscalls.max_id) {
2102
+ /*
2103
+ * With libaudit we don't know beforehand what is the max_id,
2104
+ * so we let trace__read_syscall_info() figure that out as we
2105
+ * go on reading syscalls.
2106
+ */
2107
+ err = trace__read_syscall_info(trace, id);
2108
+ if (err)
2109
+#endif
2110
+ goto out_cant_read;
2111
+ }
2112
+
2113
+ if ((trace->syscalls.table == NULL || trace->syscalls.table[id].name == NULL) &&
2114
+ (err = trace__read_syscall_info(trace, id)) != 0)
15772115 goto out_cant_read;
15782116
1579
- if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
2117
+ if (trace->syscalls.table[id].name == NULL) {
2118
+ if (trace->syscalls.table[id].nonexistent)
2119
+ return NULL;
15802120 goto out_cant_read;
2121
+ }
15812122
15822123 return &trace->syscalls.table[id];
15832124
15842125 out_cant_read:
15852126 if (verbose > 0) {
1586
- fprintf(trace->output, "Problems reading syscall %d", id);
1587
- if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
2127
+ char sbuf[STRERR_BUFSIZE];
2128
+ fprintf(trace->output, "Problems reading syscall %d: %d (%s)", id, -err, str_error_r(-err, sbuf, sizeof(sbuf)));
2129
+ if (id <= trace->sctbl->syscalls.max_id && trace->syscalls.table[id].name != NULL)
15882130 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
15892131 fputs(" information\n", trace->output);
15902132 }
15912133 return NULL;
15922134 }
15932135
1594
-static void thread__update_stats(struct thread_trace *ttrace,
1595
- int id, struct perf_sample *sample)
2136
+struct syscall_stats {
2137
+ struct stats stats;
2138
+ u64 nr_failures;
2139
+ int max_errno;
2140
+ u32 *errnos;
2141
+};
2142
+
2143
+static void thread__update_stats(struct thread *thread, struct thread_trace *ttrace,
2144
+ int id, struct perf_sample *sample, long err, bool errno_summary)
15962145 {
15972146 struct int_node *inode;
1598
- struct stats *stats;
2147
+ struct syscall_stats *stats;
15992148 u64 duration = 0;
16002149
16012150 inode = intlist__findnew(ttrace->syscall_stats, id);
....@@ -1604,23 +2153,53 @@
16042153
16052154 stats = inode->priv;
16062155 if (stats == NULL) {
1607
- stats = malloc(sizeof(struct stats));
2156
+ stats = malloc(sizeof(*stats));
16082157 if (stats == NULL)
16092158 return;
1610
- init_stats(stats);
2159
+
2160
+ stats->nr_failures = 0;
2161
+ stats->max_errno = 0;
2162
+ stats->errnos = NULL;
2163
+ init_stats(&stats->stats);
16112164 inode->priv = stats;
16122165 }
16132166
16142167 if (ttrace->entry_time && sample->time > ttrace->entry_time)
16152168 duration = sample->time - ttrace->entry_time;
16162169
1617
- update_stats(stats, duration);
2170
+ update_stats(&stats->stats, duration);
2171
+
2172
+ if (err < 0) {
2173
+ ++stats->nr_failures;
2174
+
2175
+ if (!errno_summary)
2176
+ return;
2177
+
2178
+ err = -err;
2179
+ if (err > stats->max_errno) {
2180
+ u32 *new_errnos = realloc(stats->errnos, err * sizeof(u32));
2181
+
2182
+ if (new_errnos) {
2183
+ memset(new_errnos + stats->max_errno, 0, (err - stats->max_errno) * sizeof(u32));
2184
+ } else {
2185
+ pr_debug("Not enough memory for errno stats for thread \"%s\"(%d/%d), results will be incomplete\n",
2186
+ thread__comm_str(thread), thread->pid_, thread->tid);
2187
+ return;
2188
+ }
2189
+
2190
+ stats->errnos = new_errnos;
2191
+ stats->max_errno = err;
2192
+ }
2193
+
2194
+ ++stats->errnos[err - 1];
2195
+ }
16182196 }
16192197
16202198 static int trace__printf_interrupted_entry(struct trace *trace)
16212199 {
16222200 struct thread_trace *ttrace;
16232201 size_t printed;
2202
+ int len;
16242203
16252204 if (trace->failure_only || trace->current == NULL)
16262205 return 0;
....@@ -1631,13 +2210,20 @@
16312210 return 0;
16322211
16332212 printed = trace__fprintf_entry_head(trace, trace->current, 0, false, ttrace->entry_time, trace->output);
1634
- printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
2213
+ printed += len = fprintf(trace->output, "%s)", ttrace->entry_str);
2214
+
2215
+ if (len < trace->args_alignment - 4)
2216
+ printed += fprintf(trace->output, "%-*s", trace->args_alignment - 4 - len, " ");
2217
+
2218
+ printed += fprintf(trace->output, " ...\n");
2219
+
16352220 ttrace->entry_pending = false;
2221
+ ++trace->nr_events_printed;
16362222
16372223 return printed;
16382224 }
16392225
1640
-static int trace__fprintf_sample(struct trace *trace, struct perf_evsel *evsel,
2226
+static int trace__fprintf_sample(struct trace *trace, struct evsel *evsel,
16412227 struct perf_sample *sample, struct thread *thread)
16422228 {
16432229 int printed = 0;
....@@ -1646,7 +2232,7 @@
16462232 double ts = (double)sample->time / NSEC_PER_MSEC;
16472233
16482234 printed += fprintf(trace->output, "%22s %10.3f %s %d/%d [%d]\n",
1649
- perf_evsel__name(evsel), ts,
2235
+ evsel__name(evsel), ts,
16502236 thread__comm_str(thread),
16512237 sample->pid, sample->tid, sample->cpu);
16522238 }
....@@ -1654,15 +2240,43 @@
16542240 return printed;
16552241 }
16562242
1657
-static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
2243
+static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sample, int *augmented_args_size, int raw_augmented_args_size)
2244
+{
2245
+ void *augmented_args = NULL;
2246
+ /*
2247
+ * For now with BPF raw_augmented we hook into raw_syscalls:sys_enter
2248
+ * and there we get all 6 syscall args plus the tracepoint common fields
2249
+ * that gets calculated at the start and the syscall_nr (another long).
2250
+ * So we check if that is the case and if so don't look after the
2251
+ * sc->args_size but always after the full raw_syscalls:sys_enter payload,
2252
+ * which is fixed.
2253
+ *
2254
+ * We'll revisit this later to pass s->args_size to the BPF augmenter
2255
+ * (now tools/perf/examples/bpf/augmented_raw_syscalls.c, so that it
2256
+ * copies only what we need for each syscall, like what happens when we
2257
+ * use syscalls:sys_enter_NAME, so that we reduce the kernel/userspace
2258
+ * traffic to just what is needed for each syscall.
2259
+ */
2260
+ int args_size = raw_augmented_args_size ?: sc->args_size;
2261
+
2262
+ *augmented_args_size = sample->raw_size - args_size;
2263
+ if (*augmented_args_size > 0)
2264
+ augmented_args = sample->raw_data + args_size;
2265
+
2266
+ return augmented_args;
2267
+}
2268
+
2269
+static int trace__sys_enter(struct trace *trace, struct evsel *evsel,
16582270 union perf_event *event __maybe_unused,
16592271 struct perf_sample *sample)
16602272 {
16612273 char *msg;
16622274 void *args;
1663
- size_t printed = 0;
2275
+ int printed = 0;
16642276 struct thread *thread;
16652277 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
2278
+ int augmented_args_size = 0;
2279
+ void *augmented_args = NULL;
16662280 struct syscall *sc = trace__syscall_info(trace, evsel, id);
16672281 struct thread_trace *ttrace;
16682282
....@@ -1686,18 +2300,34 @@
16862300
16872301 if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
16882302 trace__printf_interrupted_entry(trace);
1689
-
2303
+ /*
2304
+ * If this is raw_syscalls.sys_enter, then it always comes with the 6 possible
2305
+ * arguments, even if the syscall being handled, say "openat", uses only 4 arguments
2306
+ * this breaks syscall__augmented_args() check for augmented args, as we calculate
2307
+ * syscall->args_size using each syscalls:sys_enter_NAME tracefs format file,
2308
+ * so when handling, say the openat syscall, we end up getting 6 args for the
2309
+ * raw_syscalls:sys_enter event, when we expected just 4, we end up mistakenly
2310
+ * thinking that the extra 2 u64 args are the augmented filename, so just check
2311
+ * here and avoid using augmented syscalls when the evsel is the raw_syscalls one.
2312
+ */
2313
+ if (evsel != trace->syscalls.events.sys_enter)
2314
+ augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls_args_size);
16902315 ttrace->entry_time = sample->time;
16912316 msg = ttrace->entry_str;
16922317 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
16932318
16942319 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1695
- args, trace, thread);
2320
+ args, augmented_args, augmented_args_size, trace, thread);
16962321
16972322 if (sc->is_exit) {
16982323 if (!(trace->duration_filter || trace->summary_only || trace->failure_only || trace->min_stack)) {
2324
+ int alignment = 0;
2325
+
16992326 trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output);
1700
- fprintf(trace->output, "%-70s)\n", ttrace->entry_str);
2327
+ printed = fprintf(trace->output, "%s)", ttrace->entry_str);
2328
+ if (trace->args_alignment > printed)
2329
+ alignment = trace->args_alignment - printed;
2330
+ fprintf(trace->output, "%*s= ?\n", alignment, " ");
17012331 }
17022332 } else {
17032333 ttrace->entry_pending = true;
....@@ -1715,7 +2345,7 @@
17152345 return err;
17162346 }
17172347
1718
-static int trace__fprintf_sys_enter(struct trace *trace, struct perf_evsel *evsel,
2348
+static int trace__fprintf_sys_enter(struct trace *trace, struct evsel *evsel,
17192349 struct perf_sample *sample)
17202350 {
17212351 struct thread_trace *ttrace;
....@@ -1723,7 +2353,8 @@
17232353 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
17242354 struct syscall *sc = trace__syscall_info(trace, evsel, id);
17252355 char msg[1024];
1726
- void *args;
2356
+ void *args, *augmented_args = NULL;
2357
+ int augmented_args_size;
17272358
17282359 if (sc == NULL)
17292360 return -1;
....@@ -1738,7 +2369,8 @@
17382369 goto out_put;
17392370
17402371 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1741
- syscall__scnprintf_args(sc, msg, sizeof(msg), args, trace, thread);
2372
+ augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls_args_size);
2373
+ syscall__scnprintf_args(sc, msg, sizeof(msg), args, augmented_args, augmented_args_size, trace, thread);
17422374 fprintf(trace->output, "%s", msg);
17432375 err = 0;
17442376 out_put:
....@@ -1746,20 +2378,22 @@
17462378 return err;
17472379 }
17482380
1749
-static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel,
2381
+static int trace__resolve_callchain(struct trace *trace, struct evsel *evsel,
17502382 struct perf_sample *sample,
17512383 struct callchain_cursor *cursor)
17522384 {
17532385 struct addr_location al;
1754
- int max_stack = evsel->attr.sample_max_stack ?
1755
- evsel->attr.sample_max_stack :
2386
+ int max_stack = evsel->core.attr.sample_max_stack ?
2387
+ evsel->core.attr.sample_max_stack :
17562388 trace->max_stack;
2389
+ int err;
17572390
1758
- if (machine__resolve(trace->host, &al, sample) < 0 ||
1759
- thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, max_stack))
2391
+ if (machine__resolve(trace->host, &al, sample) < 0)
17602392 return -1;
17612393
1762
- return 0;
2394
+ err = thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, max_stack);
2395
+ addr_location__put(&al);
2396
+ return err;
17632397 }
17642398
17652399 static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
....@@ -1769,18 +2403,18 @@
17692403 EVSEL__PRINT_DSO |
17702404 EVSEL__PRINT_UNKNOWN_AS_ADDR;
17712405
1772
- return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
2406
+ return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, symbol_conf.bt_stop_list, trace->output);
17732407 }
17742408
1775
-static const char *errno_to_name(struct perf_evsel *evsel, int err)
2409
+static const char *errno_to_name(struct evsel *evsel, int err)
17762410 {
1777
- struct perf_env *env = perf_evsel__env(evsel);
2411
+ struct perf_env *env = evsel__env(evsel);
17782412 const char *arch_name = perf_env__arch(env);
17792413
17802414 return arch_syscalls__strerrno(arch_name, err);
17812415 }
17822416
1783
-static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
2417
+static int trace__sys_exit(struct trace *trace, struct evsel *evsel,
17842418 union perf_event *event __maybe_unused,
17852419 struct perf_sample *sample)
17862420 {
....@@ -1788,7 +2422,8 @@
17882422 u64 duration = 0;
17892423 bool duration_calculated = false;
17902424 struct thread *thread;
1791
- int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
2425
+ int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0, printed = 0;
2426
+ int alignment = trace->args_alignment;
17922427 struct syscall *sc = trace__syscall_info(trace, evsel, id);
17932428 struct thread_trace *ttrace;
17942429
....@@ -1802,12 +2437,12 @@
18022437
18032438 trace__fprintf_sample(trace, evsel, sample, thread);
18042439
1805
- if (trace->summary)
1806
- thread__update_stats(ttrace, id, sample);
1807
-
18082440 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
18092441
1810
- if (sc->is_open && ret >= 0 && ttrace->filename.pending_open) {
2442
+ if (trace->summary)
2443
+ thread__update_stats(thread, ttrace, id, sample, ret, trace->errno_summary);
2444
+
2445
+ if (!trace->fd_path_disabled && sc->is_open && ret >= 0 && ttrace->filename.pending_open) {
18112446 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
18122447 ttrace->filename.pending_open = false;
18132448 ++trace->stats.vfs_getname;
....@@ -1836,28 +2471,38 @@
18362471 trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output);
18372472
18382473 if (ttrace->entry_pending) {
1839
- fprintf(trace->output, "%-70s", ttrace->entry_str);
2474
+ printed = fprintf(trace->output, "%s", ttrace->entry_str);
18402475 } else {
1841
- fprintf(trace->output, " ... [");
2476
+ printed += fprintf(trace->output, " ... [");
18422477 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1843
- fprintf(trace->output, "]: %s()", sc->name);
2478
+ printed += 9;
2479
+ printed += fprintf(trace->output, "]: %s()", sc->name);
18442480 }
2481
+
2482
+ printed++; /* the closing ')' */
2483
+
2484
+ if (alignment > printed)
2485
+ alignment -= printed;
2486
+ else
2487
+ alignment = 0;
2488
+
2489
+ fprintf(trace->output, ")%*s= ", alignment, " ");
18452490
18462491 if (sc->fmt == NULL) {
18472492 if (ret < 0)
18482493 goto errno_print;
18492494 signed_print:
1850
- fprintf(trace->output, ") = %ld", ret);
2495
+ fprintf(trace->output, "%ld", ret);
18512496 } else if (ret < 0) {
18522497 errno_print: {
18532498 char bf[STRERR_BUFSIZE];
18542499 const char *emsg = str_error_r(-ret, bf, sizeof(bf)),
18552500 *e = errno_to_name(evsel, -ret);
18562501
1857
- fprintf(trace->output, ") = -1 %s %s", e, emsg);
2502
+ fprintf(trace->output, "-1 %s (%s)", e, emsg);
18582503 }
18592504 } else if (ret == 0 && sc->fmt->timeout)
1860
- fprintf(trace->output, ") = 0 Timeout");
2505
+ fprintf(trace->output, "0 (Timeout)");
18612506 else if (ttrace->ret_scnprintf) {
18622507 char bf[1024];
18632508 struct syscall_arg arg = {
....@@ -1867,14 +2512,14 @@
18672512 };
18682513 ttrace->ret_scnprintf(bf, sizeof(bf), &arg);
18692514 ttrace->ret_scnprintf = NULL;
1870
- fprintf(trace->output, ") = %s", bf);
2515
+ fprintf(trace->output, "%s", bf);
18712516 } else if (sc->fmt->hexret)
1872
- fprintf(trace->output, ") = %#lx", ret);
2517
+ fprintf(trace->output, "%#lx", ret);
18732518 else if (sc->fmt->errpid) {
18742519 struct thread *child = machine__find_thread(trace->host, ret, ret);
18752520
18762521 if (child != NULL) {
1877
- fprintf(trace->output, ") = %ld", ret);
2522
+ fprintf(trace->output, "%ld", ret);
18782523 if (child->comm_set)
18792524 fprintf(trace->output, " (%s)", thread__comm_str(child));
18802525 thread__put(child);
....@@ -1884,10 +2529,17 @@
18842529
18852530 fputc('\n', trace->output);
18862531
2532
+ /*
2533
+ * We only consider an 'event' for the sake of --max-events a non-filtered
2534
+ * sys_enter + sys_exit and other tracepoint events.
2535
+ */
2536
+ if (++trace->nr_events_printed == trace->max_events && trace->max_events != ULONG_MAX)
2537
+ interrupted = true;
2538
+
18872539 if (callchain_ret > 0)
18882540 trace__fprintf_callchain(trace, sample);
18892541 else if (callchain_ret < 0)
1890
- pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
2542
+ pr_err("Problem processing %s callchain, skipping...\n", evsel__name(evsel));
18912543 out:
18922544 ttrace->entry_pending = false;
18932545 err = 0;
....@@ -1896,7 +2548,7 @@
18962548 return err;
18972549 }
18982550
1899
-static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
2551
+static int trace__vfs_getname(struct trace *trace, struct evsel *evsel,
19002552 union perf_event *event __maybe_unused,
19012553 struct perf_sample *sample)
19022554 {
....@@ -1905,7 +2557,7 @@
19052557 size_t filename_len, entry_str_len, to_move;
19062558 ssize_t remaining_space;
19072559 char *pos;
1908
- const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
2560
+ const char *filename = evsel__rawptr(evsel, sample, "pathname");
19092561
19102562 if (!thread)
19112563 goto out;
....@@ -1957,11 +2609,11 @@
19572609 return 0;
19582610 }
19592611
1960
-static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
2612
+static int trace__sched_stat_runtime(struct trace *trace, struct evsel *evsel,
19612613 union perf_event *event __maybe_unused,
19622614 struct perf_sample *sample)
19632615 {
1964
- u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
2616
+ u64 runtime = evsel__intval(evsel, sample, "runtime");
19652617 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
19662618 struct thread *thread = machine__findnew_thread(trace->host,
19672619 sample->pid,
....@@ -1980,10 +2632,10 @@
19802632 out_dump:
19812633 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
19822634 evsel->name,
1983
- perf_evsel__strval(evsel, sample, "comm"),
1984
- (pid_t)perf_evsel__intval(evsel, sample, "pid"),
2635
+ evsel__strval(evsel, sample, "comm"),
2636
+ (pid_t)evsel__intval(evsel, sample, "pid"),
19852637 runtime,
1986
- perf_evsel__intval(evsel, sample, "vruntime"));
2638
+ evsel__intval(evsel, sample, "vruntime"));
19872639 goto out_put;
19882640 }
19892641
....@@ -2016,13 +2668,99 @@
20162668 {
20172669 binary__fprintf(sample->raw_data, sample->raw_size, 8,
20182670 bpf_output__printer, NULL, trace->output);
2671
+ ++trace->nr_events_printed;
20192672 }
20202673
2021
-static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
2674
+static size_t trace__fprintf_tp_fields(struct trace *trace, struct evsel *evsel, struct perf_sample *sample,
2675
+ struct thread *thread, void *augmented_args, int augmented_args_size)
2676
+{
2677
+ char bf[2048];
2678
+ size_t size = sizeof(bf);
2679
+ struct tep_format_field *field = evsel->tp_format->format.fields;
2680
+ struct syscall_arg_fmt *arg = __evsel__syscall_arg_fmt(evsel);
2681
+ size_t printed = 0;
2682
+ unsigned long val;
2683
+ u8 bit = 1;
2684
+ struct syscall_arg syscall_arg = {
2685
+ .augmented = {
2686
+ .size = augmented_args_size,
2687
+ .args = augmented_args,
2688
+ },
2689
+ .idx = 0,
2690
+ .mask = 0,
2691
+ .trace = trace,
2692
+ .thread = thread,
2693
+ .show_string_prefix = trace->show_string_prefix,
2694
+ };
2695
+
2696
+ for (; field && arg; field = field->next, ++syscall_arg.idx, bit <<= 1, ++arg) {
2697
+ if (syscall_arg.mask & bit)
2698
+ continue;
2699
+
2700
+ syscall_arg.len = 0;
2701
+ syscall_arg.fmt = arg;
2702
+ if (field->flags & TEP_FIELD_IS_ARRAY) {
2703
+ int offset = field->offset;
2704
+
2705
+ if (field->flags & TEP_FIELD_IS_DYNAMIC) {
2706
+ offset = format_field__intval(field, sample, evsel->needs_swap);
2707
+ syscall_arg.len = offset >> 16;
2708
+ offset &= 0xffff;
2709
+ }
2710
+
2711
+ val = (uintptr_t)(sample->raw_data + offset);
2712
+ } else
2713
+ val = format_field__intval(field, sample, evsel->needs_swap);
2714
+ /*
2715
+ * Some syscall args need some mask, most don't and
2716
+ * return val untouched.
2717
+ */
2718
+ val = syscall_arg_fmt__mask_val(arg, &syscall_arg, val);
2719
+
2720
+ /*
2721
+ * Suppress this argument if its value is zero and
2722
+ * and we don't have a string associated in an
2723
+ * strarray for it.
2724
+ */
2725
+ if (val == 0 &&
2726
+ !trace->show_zeros &&
2727
+ !((arg->show_zero ||
2728
+ arg->scnprintf == SCA_STRARRAY ||
2729
+ arg->scnprintf == SCA_STRARRAYS) &&
2730
+ arg->parm))
2731
+ continue;
2732
+
2733
+ printed += scnprintf(bf + printed, size - printed, "%s", printed ? ", " : "");
2734
+
2735
+ /*
2736
+ * XXX Perhaps we should have a show_tp_arg_names,
2737
+ * leaving show_arg_names just for syscalls?
2738
+ */
2739
+ if (1 || trace->show_arg_names)
2740
+ printed += scnprintf(bf + printed, size - printed, "%s: ", field->name);
2741
+
2742
+ printed += syscall_arg_fmt__scnprintf_val(arg, bf + printed, size - printed, &syscall_arg, val);
2743
+ }
2744
+
2745
+ return printed + fprintf(trace->output, "%s", bf);
2746
+}
2747
+
2748
+static int trace__event_handler(struct trace *trace, struct evsel *evsel,
20222749 union perf_event *event __maybe_unused,
20232750 struct perf_sample *sample)
20242751 {
2752
+ struct thread *thread;
20252753 int callchain_ret = 0;
2754
+ /*
2755
+ * Check if we called perf_evsel__disable(evsel) due to, for instance,
2756
+ * this event's max_events having been hit and this is an entry coming
2757
+ * from the ring buffer that we should discard, since the max events
2758
+ * have already been considered/printed.
2759
+ */
2760
+ if (evsel->disabled)
2761
+ return 0;
2762
+
2763
+ thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
20262764
20272765 if (sample->callchain) {
20282766 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
....@@ -2036,32 +2774,63 @@
20362774 trace__printf_interrupted_entry(trace);
20372775 trace__fprintf_tstamp(trace, sample->time, trace->output);
20382776
2039
- if (trace->trace_syscalls)
2777
+ if (trace->trace_syscalls && trace->show_duration)
20402778 fprintf(trace->output, "( ): ");
20412779
2042
- fprintf(trace->output, "%s:", evsel->name);
2780
+ if (thread)
2781
+ trace__fprintf_comm_tid(trace, thread, trace->output);
20432782
2044
- if (perf_evsel__is_bpf_output(evsel)) {
2045
- if (evsel == trace->syscalls.events.augmented)
2783
+ if (evsel == trace->syscalls.events.augmented) {
2784
+ int id = perf_evsel__sc_tp_uint(evsel, id, sample);
2785
+ struct syscall *sc = trace__syscall_info(trace, evsel, id);
2786
+
2787
+ if (sc) {
2788
+ fprintf(trace->output, "%s(", sc->name);
20462789 trace__fprintf_sys_enter(trace, evsel, sample);
2047
- else
2048
- bpf_output__fprintf(trace, sample);
2790
+ fputc(')', trace->output);
2791
+ goto newline;
2792
+ }
2793
+
2794
+ /*
2795
+ * XXX: Not having the associated syscall info or not finding/adding
2796
+ * the thread should never happen, but if it does...
2797
+ * fall thru and print it as a bpf_output event.
2798
+ */
2799
+ }
2800
+
2801
+ fprintf(trace->output, "%s(", evsel->name);
2802
+
2803
+ if (evsel__is_bpf_output(evsel)) {
2804
+ bpf_output__fprintf(trace, sample);
20492805 } else if (evsel->tp_format) {
20502806 if (strncmp(evsel->tp_format->name, "sys_enter_", 10) ||
20512807 trace__fprintf_sys_enter(trace, evsel, sample)) {
2052
- event_format__fprintf(evsel->tp_format, sample->cpu,
2053
- sample->raw_data, sample->raw_size,
2054
- trace->output);
2808
+ if (trace->libtraceevent_print) {
2809
+ event_format__fprintf(evsel->tp_format, sample->cpu,
2810
+ sample->raw_data, sample->raw_size,
2811
+ trace->output);
2812
+ } else {
2813
+ trace__fprintf_tp_fields(trace, evsel, sample, thread, NULL, 0);
2814
+ }
20552815 }
20562816 }
20572817
2058
- fprintf(trace->output, "\n");
2818
+newline:
2819
+ fprintf(trace->output, ")\n");
20592820
20602821 if (callchain_ret > 0)
20612822 trace__fprintf_callchain(trace, sample);
20622823 else if (callchain_ret < 0)
2063
- pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
2824
+ pr_err("Problem processing %s callchain, skipping...\n", evsel__name(evsel));
2825
+
2826
+ ++trace->nr_events_printed;
2827
+
2828
+ if (evsel->max_events != ULONG_MAX && ++evsel->nr_events_printed == evsel->max_events) {
2829
+ evsel__disable(evsel);
2830
+ evsel__close(evsel);
2831
+ }
20642832 out:
2833
+ thread__put(thread);
20652834 return 0;
20662835 }
20672836
....@@ -2083,7 +2852,7 @@
20832852 }
20842853
20852854 static int trace__pgfault(struct trace *trace,
2086
- struct perf_evsel *evsel,
2855
+ struct evsel *evsel,
20872856 union perf_event *event __maybe_unused,
20882857 struct perf_sample *sample)
20892858 {
....@@ -2109,7 +2878,7 @@
21092878 if (ttrace == NULL)
21102879 goto out_put;
21112880
2112
- if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2881
+ if (evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
21132882 ttrace->pfmaj++;
21142883 else
21152884 ttrace->pfmin++;
....@@ -2122,7 +2891,7 @@
21222891 trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output);
21232892
21242893 fprintf(trace->output, "%sfault [",
2125
- evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2894
+ evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
21262895 "maj" : "min");
21272896
21282897 print_location(trace->output, sample, &al, false, true);
....@@ -2147,7 +2916,9 @@
21472916 if (callchain_ret > 0)
21482917 trace__fprintf_callchain(trace, sample);
21492918 else if (callchain_ret < 0)
2150
- pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
2919
+ pr_err("Problem processing %s callchain, skipping...\n", evsel__name(evsel));
2920
+
2921
+ ++trace->nr_events_printed;
21512922 out:
21522923 err = 0;
21532924 out_put:
....@@ -2156,7 +2927,7 @@
21562927 }
21572928
21582929 static void trace__set_base_time(struct trace *trace,
2159
- struct perf_evsel *evsel,
2930
+ struct evsel *evsel,
21602931 struct perf_sample *sample)
21612932 {
21622933 /*
....@@ -2168,14 +2939,14 @@
21682939 * appears in our event stream (vfs_getname comes to mind).
21692940 */
21702941 if (trace->base_time == 0 && !trace->full_time &&
2171
- (evsel->attr.sample_type & PERF_SAMPLE_TIME))
2942
+ (evsel->core.attr.sample_type & PERF_SAMPLE_TIME))
21722943 trace->base_time = sample->time;
21732944 }
21742945
21752946 static int trace__process_sample(struct perf_tool *tool,
21762947 union perf_event *event,
21772948 struct perf_sample *sample,
2178
- struct perf_evsel *evsel,
2949
+ struct evsel *evsel,
21792950 struct machine *machine __maybe_unused)
21802951 {
21812952 struct trace *trace = container_of(tool, struct trace, tool);
....@@ -2209,21 +2980,23 @@
22092980 "-m", "1024",
22102981 "-c", "1",
22112982 };
2212
-
2983
+ pid_t pid = getpid();
2984
+ char *filter = asprintf__tp_filter_pids(1, &pid);
22132985 const char * const sc_args[] = { "-e", };
22142986 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
22152987 const char * const majpf_args[] = { "-e", "major-faults" };
22162988 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
22172989 const char * const minpf_args[] = { "-e", "minor-faults" };
22182990 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2991
+ int err = -1;
22192992
2220
- /* +1 is for the event string below */
2221
- rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2993
+ /* +3 is for the event string below and the pid filter */
2994
+ rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 3 +
22222995 majpf_args_nr + minpf_args_nr + argc;
22232996 rec_argv = calloc(rec_argc + 1, sizeof(char *));
22242997
2225
- if (rec_argv == NULL)
2226
- return -ENOMEM;
2998
+ if (rec_argv == NULL || filter == NULL)
2999
+ goto out_free;
22273000
22283001 j = 0;
22293002 for (i = 0; i < ARRAY_SIZE(record_args); i++)
....@@ -2240,10 +3013,12 @@
22403013 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
22413014 else {
22423015 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2243
- free(rec_argv);
2244
- return -1;
3016
+ goto out_free;
22453017 }
22463018 }
3019
+
3020
+ rec_argv[j++] = "--filter";
3021
+ rec_argv[j++] = filter;
22473022
22483023 if (trace->trace_pgfaults & TRACE_PFMAJ)
22493024 for (i = 0; i < majpf_args_nr; i++)
....@@ -2256,42 +3031,53 @@
22563031 for (i = 0; i < (unsigned int)argc; i++)
22573032 rec_argv[j++] = argv[i];
22583033
2259
- return cmd_record(j, rec_argv);
3034
+ err = cmd_record(j, rec_argv);
3035
+out_free:
3036
+ free(filter);
3037
+ free(rec_argv);
3038
+ return err;
22603039 }
22613040
22623041 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
22633042
2264
-static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
3043
+static bool evlist__add_vfs_getname(struct evlist *evlist)
22653044 {
22663045 bool found = false;
2267
- struct perf_evsel *evsel, *tmp;
2268
- struct parse_events_error err = { .idx = 0, };
2269
- int ret = parse_events(evlist, "probe:vfs_getname*", &err);
3046
+ struct evsel *evsel, *tmp;
3047
+ struct parse_events_error err;
3048
+ int ret;
22703049
2271
- if (ret)
3050
+ bzero(&err, sizeof(err));
3051
+ ret = parse_events(evlist, "probe:vfs_getname*", &err);
3052
+ if (ret) {
3053
+ free(err.str);
3054
+ free(err.help);
3055
+ free(err.first_str);
3056
+ free(err.first_help);
22723057 return false;
3058
+ }
22733059
22743060 evlist__for_each_entry_safe(evlist, evsel, tmp) {
2275
- if (!strstarts(perf_evsel__name(evsel), "probe:vfs_getname"))
3061
+ if (!strstarts(evsel__name(evsel), "probe:vfs_getname"))
22763062 continue;
22773063
2278
- if (perf_evsel__field(evsel, "pathname")) {
3064
+ if (evsel__field(evsel, "pathname")) {
22793065 evsel->handler = trace__vfs_getname;
22803066 found = true;
22813067 continue;
22823068 }
22833069
2284
- list_del_init(&evsel->node);
3070
+ list_del_init(&evsel->core.node);
22853071 evsel->evlist = NULL;
2286
- perf_evsel__delete(evsel);
3072
+ evsel__delete(evsel);
22873073 }
22883074
22893075 return found;
22903076 }
22913077
2292
-static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
3078
+static struct evsel *evsel__new_pgfault(u64 config)
22933079 {
2294
- struct perf_evsel *evsel;
3080
+ struct evsel *evsel;
22953081 struct perf_event_attr attr = {
22963082 .type = PERF_TYPE_SOFTWARE,
22973083 .mmap_data = 1,
....@@ -2302,7 +3088,7 @@
23023088
23033089 event_attr_init(&attr);
23043090
2305
- evsel = perf_evsel__new(&attr);
3091
+ evsel = evsel__new(&attr);
23063092 if (evsel)
23073093 evsel->handler = trace__pgfault;
23083094
....@@ -2312,7 +3098,7 @@
23123098 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
23133099 {
23143100 const u32 type = event->header.type;
2315
- struct perf_evsel *evsel;
3101
+ struct evsel *evsel;
23163102
23173103 if (type != PERF_RECORD_SAMPLE) {
23183104 trace__process_event(trace, trace->host, event, sample);
....@@ -2325,24 +3111,30 @@
23253111 return;
23263112 }
23273113
3114
+ if (evswitch__discard(&trace->evswitch, evsel))
3115
+ return;
3116
+
23283117 trace__set_base_time(trace, evsel, sample);
23293118
2330
- if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
3119
+ if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT &&
23313120 sample->raw_data == NULL) {
23323121 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2333
- perf_evsel__name(evsel), sample->tid,
3122
+ evsel__name(evsel), sample->tid,
23343123 sample->cpu, sample->raw_size);
23353124 } else {
23363125 tracepoint_handler handler = evsel->handler;
23373126 handler(trace, evsel, event, sample);
23383127 }
3128
+
3129
+ if (trace->nr_events_printed >= trace->max_events && trace->max_events != ULONG_MAX)
3130
+ interrupted = true;
23393131 }
23403132
23413133 static int trace__add_syscall_newtp(struct trace *trace)
23423134 {
23433135 int ret = -1;
2344
- struct perf_evlist *evlist = trace->evlist;
2345
- struct perf_evsel *sys_enter, *sys_exit;
3136
+ struct evlist *evlist = trace->evlist;
3137
+ struct evsel *sys_enter, *sys_exit;
23463138
23473139 sys_enter = perf_evsel__raw_syscall_newtp("sys_enter", trace__sys_enter);
23483140 if (sys_enter == NULL)
....@@ -2358,11 +3150,11 @@
23583150 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
23593151 goto out_delete_sys_exit;
23603152
2361
- perf_evsel__config_callchain(sys_enter, &trace->opts, &callchain_param);
2362
- perf_evsel__config_callchain(sys_exit, &trace->opts, &callchain_param);
3153
+ evsel__config_callchain(sys_enter, &trace->opts, &callchain_param);
3154
+ evsel__config_callchain(sys_exit, &trace->opts, &callchain_param);
23633155
2364
- perf_evlist__add(evlist, sys_enter);
2365
- perf_evlist__add(evlist, sys_exit);
3156
+ evlist__add(evlist, sys_enter);
3157
+ evlist__add(evlist, sys_exit);
23663158
23673159 if (callchain_param.enabled && !trace->kernel_syscallchains) {
23683160 /*
....@@ -2370,7 +3162,7 @@
23703162 * leading to the syscall, allow overriding that for
23713163 * debugging reasons using --kernel_syscall_callchains
23723164 */
2373
- sys_exit->attr.exclude_callchain_kernel = 1;
3165
+ sys_exit->core.attr.exclude_callchain_kernel = 1;
23743166 }
23753167
23763168 trace->syscalls.events.sys_enter = sys_enter;
....@@ -2381,16 +3173,16 @@
23813173 return ret;
23823174
23833175 out_delete_sys_exit:
2384
- perf_evsel__delete_priv(sys_exit);
3176
+ evsel__delete_priv(sys_exit);
23853177 out_delete_sys_enter:
2386
- perf_evsel__delete_priv(sys_enter);
3178
+ evsel__delete_priv(sys_enter);
23873179 goto out;
23883180 }
23893181
2390
-static int trace__set_ev_qualifier_filter(struct trace *trace)
3182
+static int trace__set_ev_qualifier_tp_filter(struct trace *trace)
23913183 {
23923184 int err = -1;
2393
- struct perf_evsel *sys_exit;
3185
+ struct evsel *sys_exit;
23943186 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
23953187 trace->ev_qualifier_ids.nr,
23963188 trace->ev_qualifier_ids.entries);
....@@ -2398,10 +3190,9 @@
23983190 if (filter == NULL)
23993191 goto out_enomem;
24003192
2401
- if (!perf_evsel__append_tp_filter(trace->syscalls.events.sys_enter,
2402
- filter)) {
3193
+ if (!evsel__append_tp_filter(trace->syscalls.events.sys_enter, filter)) {
24033194 sys_exit = trace->syscalls.events.sys_exit;
2404
- err = perf_evsel__append_tp_filter(sys_exit, filter);
3195
+ err = evsel__append_tp_filter(sys_exit, filter);
24053196 }
24063197
24073198 free(filter);
....@@ -2412,9 +3203,450 @@
24123203 goto out;
24133204 }
24143205
3206
+#ifdef HAVE_LIBBPF_SUPPORT
3207
+static struct bpf_map *trace__find_bpf_map_by_name(struct trace *trace, const char *name)
3208
+{
3209
+ if (trace->bpf_obj == NULL)
3210
+ return NULL;
3211
+
3212
+ return bpf_object__find_map_by_name(trace->bpf_obj, name);
3213
+}
3214
+
3215
+static void trace__set_bpf_map_filtered_pids(struct trace *trace)
3216
+{
3217
+ trace->filter_pids.map = trace__find_bpf_map_by_name(trace, "pids_filtered");
3218
+}
3219
+
3220
+static void trace__set_bpf_map_syscalls(struct trace *trace)
3221
+{
3222
+ trace->syscalls.map = trace__find_bpf_map_by_name(trace, "syscalls");
3223
+ trace->syscalls.prog_array.sys_enter = trace__find_bpf_map_by_name(trace, "syscalls_sys_enter");
3224
+ trace->syscalls.prog_array.sys_exit = trace__find_bpf_map_by_name(trace, "syscalls_sys_exit");
3225
+}
3226
+
3227
+static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace, const char *name)
3228
+{
3229
+ if (trace->bpf_obj == NULL)
3230
+ return NULL;
3231
+
3232
+ return bpf_object__find_program_by_title(trace->bpf_obj, name);
3233
+}
3234
+
3235
+static struct bpf_program *trace__find_syscall_bpf_prog(struct trace *trace, struct syscall *sc,
3236
+ const char *prog_name, const char *type)
3237
+{
3238
+ struct bpf_program *prog;
3239
+
3240
+ if (prog_name == NULL) {
3241
+ char default_prog_name[256];
3242
+ scnprintf(default_prog_name, sizeof(default_prog_name), "!syscalls:sys_%s_%s", type, sc->name);
3243
+ prog = trace__find_bpf_program_by_title(trace, default_prog_name);
3244
+ if (prog != NULL)
3245
+ goto out_found;
3246
+ if (sc->fmt && sc->fmt->alias) {
3247
+ scnprintf(default_prog_name, sizeof(default_prog_name), "!syscalls:sys_%s_%s", type, sc->fmt->alias);
3248
+ prog = trace__find_bpf_program_by_title(trace, default_prog_name);
3249
+ if (prog != NULL)
3250
+ goto out_found;
3251
+ }
3252
+ goto out_unaugmented;
3253
+ }
3254
+
3255
+ prog = trace__find_bpf_program_by_title(trace, prog_name);
3256
+
3257
+ if (prog != NULL) {
3258
+out_found:
3259
+ return prog;
3260
+ }
3261
+
3262
+ pr_debug("Couldn't find BPF prog \"%s\" to associate with syscalls:sys_%s_%s, not augmenting it\n",
3263
+ prog_name, type, sc->name);
3264
+out_unaugmented:
3265
+ return trace->syscalls.unaugmented_prog;
3266
+}
3267
+
3268
+static void trace__init_syscall_bpf_progs(struct trace *trace, int id)
3269
+{
3270
+ struct syscall *sc = trace__syscall_info(trace, NULL, id);
3271
+
3272
+ if (sc == NULL)
3273
+ return;
3274
+
3275
+ sc->bpf_prog.sys_enter = trace__find_syscall_bpf_prog(trace, sc, sc->fmt ? sc->fmt->bpf_prog_name.sys_enter : NULL, "enter");
3276
+ sc->bpf_prog.sys_exit = trace__find_syscall_bpf_prog(trace, sc, sc->fmt ? sc->fmt->bpf_prog_name.sys_exit : NULL, "exit");
3277
+}
3278
+
3279
+static int trace__bpf_prog_sys_enter_fd(struct trace *trace, int id)
3280
+{
3281
+ struct syscall *sc = trace__syscall_info(trace, NULL, id);
3282
+ return sc ? bpf_program__fd(sc->bpf_prog.sys_enter) : bpf_program__fd(trace->syscalls.unaugmented_prog);
3283
+}
3284
+
3285
+static int trace__bpf_prog_sys_exit_fd(struct trace *trace, int id)
3286
+{
3287
+ struct syscall *sc = trace__syscall_info(trace, NULL, id);
3288
+ return sc ? bpf_program__fd(sc->bpf_prog.sys_exit) : bpf_program__fd(trace->syscalls.unaugmented_prog);
3289
+}
3290
+
3291
+static void trace__init_bpf_map_syscall_args(struct trace *trace, int id, struct bpf_map_syscall_entry *entry)
3292
+{
3293
+ struct syscall *sc = trace__syscall_info(trace, NULL, id);
3294
+ int arg = 0;
3295
+
3296
+ if (sc == NULL)
3297
+ goto out;
3298
+
3299
+ for (; arg < sc->nr_args; ++arg) {
3300
+ entry->string_args_len[arg] = 0;
3301
+ if (sc->arg_fmt[arg].scnprintf == SCA_FILENAME) {
3302
+ /* Should be set like strace -s strsize */
3303
+ entry->string_args_len[arg] = PATH_MAX;
3304
+ }
3305
+ }
3306
+out:
3307
+ for (; arg < 6; ++arg)
3308
+ entry->string_args_len[arg] = 0;
3309
+}
3310
+static int trace__set_ev_qualifier_bpf_filter(struct trace *trace)
3311
+{
3312
+ int fd = bpf_map__fd(trace->syscalls.map);
3313
+ struct bpf_map_syscall_entry value = {
3314
+ .enabled = !trace->not_ev_qualifier,
3315
+ };
3316
+ int err = 0;
3317
+ size_t i;
3318
+
3319
+ for (i = 0; i < trace->ev_qualifier_ids.nr; ++i) {
3320
+ int key = trace->ev_qualifier_ids.entries[i];
3321
+
3322
+ if (value.enabled) {
3323
+ trace__init_bpf_map_syscall_args(trace, key, &value);
3324
+ trace__init_syscall_bpf_progs(trace, key);
3325
+ }
3326
+
3327
+ err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST);
3328
+ if (err)
3329
+ break;
3330
+ }
3331
+
3332
+ return err;
3333
+}
3334
+
3335
+static int __trace__init_syscalls_bpf_map(struct trace *trace, bool enabled)
3336
+{
3337
+ int fd = bpf_map__fd(trace->syscalls.map);
3338
+ struct bpf_map_syscall_entry value = {
3339
+ .enabled = enabled,
3340
+ };
3341
+ int err = 0, key;
3342
+
3343
+ for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) {
3344
+ if (enabled)
3345
+ trace__init_bpf_map_syscall_args(trace, key, &value);
3346
+
3347
+ err = bpf_map_update_elem(fd, &key, &value, BPF_ANY);
3348
+ if (err)
3349
+ break;
3350
+ }
3351
+
3352
+ return err;
3353
+}
3354
+
3355
+static int trace__init_syscalls_bpf_map(struct trace *trace)
3356
+{
3357
+ bool enabled = true;
3358
+
3359
+ if (trace->ev_qualifier_ids.nr)
3360
+ enabled = trace->not_ev_qualifier;
3361
+
3362
+ return __trace__init_syscalls_bpf_map(trace, enabled);
3363
+}
3364
+
3365
+static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace, struct syscall *sc)
3366
+{
3367
+ struct tep_format_field *field, *candidate_field;
3368
+ int id;
3369
+
3370
+ /*
3371
+ * We're only interested in syscalls that have a pointer:
3372
+ */
3373
+ for (field = sc->args; field; field = field->next) {
3374
+ if (field->flags & TEP_FIELD_IS_POINTER)
3375
+ goto try_to_find_pair;
3376
+ }
3377
+
3378
+ return NULL;
3379
+
3380
+try_to_find_pair:
3381
+ for (id = 0; id < trace->sctbl->syscalls.nr_entries; ++id) {
3382
+ struct syscall *pair = trace__syscall_info(trace, NULL, id);
3383
+ struct bpf_program *pair_prog;
3384
+ bool is_candidate = false;
3385
+
3386
+ if (pair == NULL || pair == sc ||
3387
+ pair->bpf_prog.sys_enter == trace->syscalls.unaugmented_prog)
3388
+ continue;
3389
+
3390
+ for (field = sc->args, candidate_field = pair->args;
3391
+ field && candidate_field; field = field->next, candidate_field = candidate_field->next) {
3392
+ bool is_pointer = field->flags & TEP_FIELD_IS_POINTER,
3393
+ candidate_is_pointer = candidate_field->flags & TEP_FIELD_IS_POINTER;
3394
+
3395
+ if (is_pointer) {
3396
+ if (!candidate_is_pointer) {
3397
+ // The candidate just doesn't copies our pointer arg, might copy other pointers we want.
3398
+ continue;
3399
+ }
3400
+ } else {
3401
+ if (candidate_is_pointer) {
3402
+ // The candidate might copy a pointer we don't have, skip it.
3403
+ goto next_candidate;
3404
+ }
3405
+ continue;
3406
+ }
3407
+
3408
+ if (strcmp(field->type, candidate_field->type))
3409
+ goto next_candidate;
3410
+
3411
+ is_candidate = true;
3412
+ }
3413
+
3414
+ if (!is_candidate)
3415
+ goto next_candidate;
3416
+
3417
+ /*
3418
+ * Check if the tentative pair syscall augmenter has more pointers, if it has,
3419
+ * then it may be collecting that and we then can't use it, as it would collect
3420
+ * more than what is common to the two syscalls.
3421
+ */
3422
+ if (candidate_field) {
3423
+ for (candidate_field = candidate_field->next; candidate_field; candidate_field = candidate_field->next)
3424
+ if (candidate_field->flags & TEP_FIELD_IS_POINTER)
3425
+ goto next_candidate;
3426
+ }
3427
+
3428
+ pair_prog = pair->bpf_prog.sys_enter;
3429
+ /*
3430
+ * If the pair isn't enabled, then its bpf_prog.sys_enter will not
3431
+ * have been searched for, so search it here and if it returns the
3432
+ * unaugmented one, then ignore it, otherwise we'll reuse that BPF
3433
+ * program for a filtered syscall on a non-filtered one.
3434
+ *
3435
+ * For instance, we have "!syscalls:sys_enter_renameat" and that is
3436
+ * useful for "renameat2".
3437
+ */
3438
+ if (pair_prog == NULL) {
3439
+ pair_prog = trace__find_syscall_bpf_prog(trace, pair, pair->fmt ? pair->fmt->bpf_prog_name.sys_enter : NULL, "enter");
3440
+ if (pair_prog == trace->syscalls.unaugmented_prog)
3441
+ goto next_candidate;
3442
+ }
3443
+
3444
+ pr_debug("Reusing \"%s\" BPF sys_enter augmenter for \"%s\"\n", pair->name, sc->name);
3445
+ return pair_prog;
3446
+ next_candidate:
3447
+ continue;
3448
+ }
3449
+
3450
+ return NULL;
3451
+}
3452
+
3453
+static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace)
3454
+{
3455
+ int map_enter_fd = bpf_map__fd(trace->syscalls.prog_array.sys_enter),
3456
+ map_exit_fd = bpf_map__fd(trace->syscalls.prog_array.sys_exit);
3457
+ int err = 0, key;
3458
+
3459
+ for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) {
3460
+ int prog_fd;
3461
+
3462
+ if (!trace__syscall_enabled(trace, key))
3463
+ continue;
3464
+
3465
+ trace__init_syscall_bpf_progs(trace, key);
3466
+
3467
+ // It'll get at least the "!raw_syscalls:unaugmented"
3468
+ prog_fd = trace__bpf_prog_sys_enter_fd(trace, key);
3469
+ err = bpf_map_update_elem(map_enter_fd, &key, &prog_fd, BPF_ANY);
3470
+ if (err)
3471
+ break;
3472
+ prog_fd = trace__bpf_prog_sys_exit_fd(trace, key);
3473
+ err = bpf_map_update_elem(map_exit_fd, &key, &prog_fd, BPF_ANY);
3474
+ if (err)
3475
+ break;
3476
+ }
3477
+
3478
+ /*
3479
+ * Now lets do a second pass looking for enabled syscalls without
3480
+ * an augmenter that have a signature that is a superset of another
3481
+ * syscall with an augmenter so that we can auto-reuse it.
3482
+ *
3483
+ * I.e. if we have an augmenter for the "open" syscall that has
3484
+ * this signature:
3485
+ *
3486
+ * int open(const char *pathname, int flags, mode_t mode);
3487
+ *
3488
+ * I.e. that will collect just the first string argument, then we
3489
+ * can reuse it for the 'creat' syscall, that has this signature:
3490
+ *
3491
+ * int creat(const char *pathname, mode_t mode);
3492
+ *
3493
+ * and for:
3494
+ *
3495
+ * int stat(const char *pathname, struct stat *statbuf);
3496
+ * int lstat(const char *pathname, struct stat *statbuf);
3497
+ *
3498
+ * Because the 'open' augmenter will collect the first arg as a string,
3499
+ * and leave alone all the other args, which already helps with
3500
+ * beautifying 'stat' and 'lstat''s pathname arg.
3501
+ *
3502
+ * Then, in time, when 'stat' gets an augmenter that collects both
3503
+ * first and second arg (this one on the raw_syscalls:sys_exit prog
3504
+ * array tail call, then that one will be used.
3505
+ */
3506
+ for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) {
3507
+ struct syscall *sc = trace__syscall_info(trace, NULL, key);
3508
+ struct bpf_program *pair_prog;
3509
+ int prog_fd;
3510
+
3511
+ if (sc == NULL || sc->bpf_prog.sys_enter == NULL)
3512
+ continue;
3513
+
3514
+ /*
3515
+ * For now we're just reusing the sys_enter prog, and if it
3516
+ * already has an augmenter, we don't need to find one.
3517
+ */
3518
+ if (sc->bpf_prog.sys_enter != trace->syscalls.unaugmented_prog)
3519
+ continue;
3520
+
3521
+ /*
3522
+ * Look at all the other syscalls for one that has a signature
3523
+ * that is close enough that we can share:
3524
+ */
3525
+ pair_prog = trace__find_usable_bpf_prog_entry(trace, sc);
3526
+ if (pair_prog == NULL)
3527
+ continue;
3528
+
3529
+ sc->bpf_prog.sys_enter = pair_prog;
3530
+
3531
+ /*
3532
+ * Update the BPF_MAP_TYPE_PROG_SHARED for raw_syscalls:sys_enter
3533
+ * with the fd for the program we're reusing:
3534
+ */
3535
+ prog_fd = bpf_program__fd(sc->bpf_prog.sys_enter);
3536
+ err = bpf_map_update_elem(map_enter_fd, &key, &prog_fd, BPF_ANY);
3537
+ if (err)
3538
+ break;
3539
+ }
3540
+
3541
+
3542
+ return err;
3543
+}
3544
+
3545
+static void trace__delete_augmented_syscalls(struct trace *trace)
3546
+{
3547
+ struct evsel *evsel, *tmp;
3548
+
3549
+ evlist__remove(trace->evlist, trace->syscalls.events.augmented);
3550
+ evsel__delete(trace->syscalls.events.augmented);
3551
+ trace->syscalls.events.augmented = NULL;
3552
+
3553
+ evlist__for_each_entry_safe(trace->evlist, tmp, evsel) {
3554
+ if (evsel->bpf_obj == trace->bpf_obj) {
3555
+ evlist__remove(trace->evlist, evsel);
3556
+ evsel__delete(evsel);
3557
+ }
3558
+
3559
+ }
3560
+
3561
+ bpf_object__close(trace->bpf_obj);
3562
+ trace->bpf_obj = NULL;
3563
+}
3564
+#else // HAVE_LIBBPF_SUPPORT
3565
+static struct bpf_map *trace__find_bpf_map_by_name(struct trace *trace __maybe_unused,
3566
+ const char *name __maybe_unused)
3567
+{
3568
+ return NULL;
3569
+}
3570
+
3571
+static void trace__set_bpf_map_filtered_pids(struct trace *trace __maybe_unused)
3572
+{
3573
+}
3574
+
3575
+static void trace__set_bpf_map_syscalls(struct trace *trace __maybe_unused)
3576
+{
3577
+}
3578
+
3579
+static int trace__set_ev_qualifier_bpf_filter(struct trace *trace __maybe_unused)
3580
+{
3581
+ return 0;
3582
+}
3583
+
3584
+static int trace__init_syscalls_bpf_map(struct trace *trace __maybe_unused)
3585
+{
3586
+ return 0;
3587
+}
3588
+
3589
+static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace __maybe_unused,
3590
+ const char *name __maybe_unused)
3591
+{
3592
+ return NULL;
3593
+}
3594
+
3595
+static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace __maybe_unused)
3596
+{
3597
+ return 0;
3598
+}
3599
+
3600
+static void trace__delete_augmented_syscalls(struct trace *trace __maybe_unused)
3601
+{
3602
+}
3603
+#endif // HAVE_LIBBPF_SUPPORT
3604
+
3605
+static bool trace__only_augmented_syscalls_evsels(struct trace *trace)
3606
+{
3607
+ struct evsel *evsel;
3608
+
3609
+ evlist__for_each_entry(trace->evlist, evsel) {
3610
+ if (evsel == trace->syscalls.events.augmented ||
3611
+ evsel->bpf_obj == trace->bpf_obj)
3612
+ continue;
3613
+
3614
+ return false;
3615
+ }
3616
+
3617
+ return true;
3618
+}
3619
+
3620
+static int trace__set_ev_qualifier_filter(struct trace *trace)
3621
+{
3622
+ if (trace->syscalls.map)
3623
+ return trace__set_ev_qualifier_bpf_filter(trace);
3624
+ if (trace->syscalls.events.sys_enter)
3625
+ return trace__set_ev_qualifier_tp_filter(trace);
3626
+ return 0;
3627
+}
3628
+
3629
+static int bpf_map__set_filter_pids(struct bpf_map *map __maybe_unused,
3630
+ size_t npids __maybe_unused, pid_t *pids __maybe_unused)
3631
+{
3632
+ int err = 0;
3633
+#ifdef HAVE_LIBBPF_SUPPORT
3634
+ bool value = true;
3635
+ int map_fd = bpf_map__fd(map);
3636
+ size_t i;
3637
+
3638
+ for (i = 0; i < npids; ++i) {
3639
+ err = bpf_map_update_elem(map_fd, &pids[i], &value, BPF_ANY);
3640
+ if (err)
3641
+ break;
3642
+ }
3643
+#endif
3644
+ return err;
3645
+}
3646
+
24153647 static int trace__set_filter_loop_pids(struct trace *trace)
24163648 {
2417
- unsigned int nr = 1;
3649
+ unsigned int nr = 1, err;
24183650 pid_t pids[32] = {
24193651 getpid(),
24203652 };
....@@ -2426,20 +3658,237 @@
24263658 if (parent == NULL)
24273659 break;
24283660
2429
- if (!strcmp(thread__comm_str(parent), "sshd")) {
3661
+ if (!strcmp(thread__comm_str(parent), "sshd") ||
3662
+ strstarts(thread__comm_str(parent), "gnome-terminal")) {
24303663 pids[nr++] = parent->tid;
24313664 break;
24323665 }
24333666 thread = parent;
24343667 }
24353668
2436
- return perf_evlist__set_filter_pids(trace->evlist, nr, pids);
3669
+ err = perf_evlist__append_tp_filter_pids(trace->evlist, nr, pids);
3670
+ if (!err && trace->filter_pids.map)
3671
+ err = bpf_map__set_filter_pids(trace->filter_pids.map, nr, pids);
3672
+
3673
+ return err;
3674
+}
3675
+
3676
+static int trace__set_filter_pids(struct trace *trace)
3677
+{
3678
+ int err = 0;
3679
+ /*
3680
+ * Better not use !target__has_task() here because we need to cover the
3681
+ * case where no threads were specified in the command line, but a
3682
+ * workload was, and in that case we will fill in the thread_map when
3683
+ * we fork the workload in perf_evlist__prepare_workload.
3684
+ */
3685
+ if (trace->filter_pids.nr > 0) {
3686
+ err = perf_evlist__append_tp_filter_pids(trace->evlist, trace->filter_pids.nr,
3687
+ trace->filter_pids.entries);
3688
+ if (!err && trace->filter_pids.map) {
3689
+ err = bpf_map__set_filter_pids(trace->filter_pids.map, trace->filter_pids.nr,
3690
+ trace->filter_pids.entries);
3691
+ }
3692
+ } else if (perf_thread_map__pid(trace->evlist->core.threads, 0) == -1) {
3693
+ err = trace__set_filter_loop_pids(trace);
3694
+ }
3695
+
3696
+ return err;
3697
+}
3698
+
3699
+static int __trace__deliver_event(struct trace *trace, union perf_event *event)
3700
+{
3701
+ struct evlist *evlist = trace->evlist;
3702
+ struct perf_sample sample;
3703
+ int err;
3704
+
3705
+ err = perf_evlist__parse_sample(evlist, event, &sample);
3706
+ if (err)
3707
+ fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
3708
+ else
3709
+ trace__handle_event(trace, event, &sample);
3710
+
3711
+ return 0;
3712
+}
3713
+
3714
+static int __trace__flush_events(struct trace *trace)
3715
+{
3716
+ u64 first = ordered_events__first_time(&trace->oe.data);
3717
+ u64 flush = trace->oe.last - NSEC_PER_SEC;
3718
+
3719
+ /* Is there some thing to flush.. */
3720
+ if (first && first < flush)
3721
+ return ordered_events__flush_time(&trace->oe.data, flush);
3722
+
3723
+ return 0;
3724
+}
3725
+
3726
+static int trace__flush_events(struct trace *trace)
3727
+{
3728
+ return !trace->sort_events ? 0 : __trace__flush_events(trace);
3729
+}
3730
+
3731
+static int trace__deliver_event(struct trace *trace, union perf_event *event)
3732
+{
3733
+ int err;
3734
+
3735
+ if (!trace->sort_events)
3736
+ return __trace__deliver_event(trace, event);
3737
+
3738
+ err = perf_evlist__parse_sample_timestamp(trace->evlist, event, &trace->oe.last);
3739
+ if (err && err != -1)
3740
+ return err;
3741
+
3742
+ err = ordered_events__queue(&trace->oe.data, event, trace->oe.last, 0);
3743
+ if (err)
3744
+ return err;
3745
+
3746
+ return trace__flush_events(trace);
3747
+}
3748
+
3749
+static int ordered_events__deliver_event(struct ordered_events *oe,
3750
+ struct ordered_event *event)
3751
+{
3752
+ struct trace *trace = container_of(oe, struct trace, oe.data);
3753
+
3754
+ return __trace__deliver_event(trace, event->event);
3755
+}
3756
+
3757
+static struct syscall_arg_fmt *evsel__find_syscall_arg_fmt_by_name(struct evsel *evsel, char *arg)
3758
+{
3759
+ struct tep_format_field *field;
3760
+ struct syscall_arg_fmt *fmt = __evsel__syscall_arg_fmt(evsel);
3761
+
3762
+ if (evsel->tp_format == NULL || fmt == NULL)
3763
+ return NULL;
3764
+
3765
+ for (field = evsel->tp_format->format.fields; field; field = field->next, ++fmt)
3766
+ if (strcmp(field->name, arg) == 0)
3767
+ return fmt;
3768
+
3769
+ return NULL;
3770
+}
3771
+
3772
+static int trace__expand_filter(struct trace *trace __maybe_unused, struct evsel *evsel)
3773
+{
3774
+ char *tok, *left = evsel->filter, *new_filter = evsel->filter;
3775
+
3776
+ while ((tok = strpbrk(left, "=<>!")) != NULL) {
3777
+ char *right = tok + 1, *right_end;
3778
+
3779
+ if (*right == '=')
3780
+ ++right;
3781
+
3782
+ while (isspace(*right))
3783
+ ++right;
3784
+
3785
+ if (*right == '\0')
3786
+ break;
3787
+
3788
+ while (!isalpha(*left))
3789
+ if (++left == tok) {
3790
+ /*
3791
+ * Bail out, can't find the name of the argument that is being
3792
+ * used in the filter, let it try to set this filter, will fail later.
3793
+ */
3794
+ return 0;
3795
+ }
3796
+
3797
+ right_end = right + 1;
3798
+ while (isalnum(*right_end) || *right_end == '_' || *right_end == '|')
3799
+ ++right_end;
3800
+
3801
+ if (isalpha(*right)) {
3802
+ struct syscall_arg_fmt *fmt;
3803
+ int left_size = tok - left,
3804
+ right_size = right_end - right;
3805
+ char arg[128];
3806
+
3807
+ while (isspace(left[left_size - 1]))
3808
+ --left_size;
3809
+
3810
+ scnprintf(arg, sizeof(arg), "%.*s", left_size, left);
3811
+
3812
+ fmt = evsel__find_syscall_arg_fmt_by_name(evsel, arg);
3813
+ if (fmt == NULL) {
3814
+ pr_err("\"%s\" not found in \"%s\", can't set filter \"%s\"\n",
3815
+ arg, evsel->name, evsel->filter);
3816
+ return -1;
3817
+ }
3818
+
3819
+ pr_debug2("trying to expand \"%s\" \"%.*s\" \"%.*s\" -> ",
3820
+ arg, (int)(right - tok), tok, right_size, right);
3821
+
3822
+ if (fmt->strtoul) {
3823
+ u64 val;
3824
+ struct syscall_arg syscall_arg = {
3825
+ .parm = fmt->parm,
3826
+ };
3827
+
3828
+ if (fmt->strtoul(right, right_size, &syscall_arg, &val)) {
3829
+ char *n, expansion[19];
3830
+ int expansion_lenght = scnprintf(expansion, sizeof(expansion), "%#" PRIx64, val);
3831
+ int expansion_offset = right - new_filter;
3832
+
3833
+ pr_debug("%s", expansion);
3834
+
3835
+ if (asprintf(&n, "%.*s%s%s", expansion_offset, new_filter, expansion, right_end) < 0) {
3836
+ pr_debug(" out of memory!\n");
3837
+ free(new_filter);
3838
+ return -1;
3839
+ }
3840
+ if (new_filter != evsel->filter)
3841
+ free(new_filter);
3842
+ left = n + expansion_offset + expansion_lenght;
3843
+ new_filter = n;
3844
+ } else {
3845
+ pr_err("\"%.*s\" not found for \"%s\" in \"%s\", can't set filter \"%s\"\n",
3846
+ right_size, right, arg, evsel->name, evsel->filter);
3847
+ return -1;
3848
+ }
3849
+ } else {
3850
+ pr_err("No resolver (strtoul) for \"%s\" in \"%s\", can't set filter \"%s\"\n",
3851
+ arg, evsel->name, evsel->filter);
3852
+ return -1;
3853
+ }
3854
+
3855
+ pr_debug("\n");
3856
+ } else {
3857
+ left = right_end;
3858
+ }
3859
+ }
3860
+
3861
+ if (new_filter != evsel->filter) {
3862
+ pr_debug("New filter for %s: %s\n", evsel->name, new_filter);
3863
+ evsel__set_filter(evsel, new_filter);
3864
+ free(new_filter);
3865
+ }
3866
+
3867
+ return 0;
3868
+}
3869
+
3870
+static int trace__expand_filters(struct trace *trace, struct evsel **err_evsel)
3871
+{
3872
+ struct evlist *evlist = trace->evlist;
3873
+ struct evsel *evsel;
3874
+
3875
+ evlist__for_each_entry(evlist, evsel) {
3876
+ if (evsel->filter == NULL)
3877
+ continue;
3878
+
3879
+ if (trace__expand_filter(trace, evsel)) {
3880
+ *err_evsel = evsel;
3881
+ return -1;
3882
+ }
3883
+ }
3884
+
3885
+ return 0;
24373886 }
24383887
24393888 static int trace__run(struct trace *trace, int argc, const char **argv)
24403889 {
2441
- struct perf_evlist *evlist = trace->evlist;
2442
- struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
3890
+ struct evlist *evlist = trace->evlist;
3891
+ struct evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
24433892 int err = -1, i;
24443893 unsigned long before;
24453894 const bool forks = argc > 0;
....@@ -2447,33 +3896,33 @@
24473896
24483897 trace->live = true;
24493898
2450
- if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2451
- goto out_error_raw_syscalls;
3899
+ if (!trace->raw_augmented_syscalls) {
3900
+ if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
3901
+ goto out_error_raw_syscalls;
24523902
2453
- if (trace->trace_syscalls)
2454
- trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
3903
+ if (trace->trace_syscalls)
3904
+ trace->vfs_getname = evlist__add_vfs_getname(evlist);
3905
+ }
24553906
24563907 if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
2457
- pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
3908
+ pgfault_maj = evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
24583909 if (pgfault_maj == NULL)
24593910 goto out_error_mem;
2460
- perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
2461
- perf_evlist__add(evlist, pgfault_maj);
3911
+ evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
3912
+ evlist__add(evlist, pgfault_maj);
24623913 }
24633914
24643915 if ((trace->trace_pgfaults & TRACE_PFMIN)) {
2465
- pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
3916
+ pgfault_min = evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
24663917 if (pgfault_min == NULL)
24673918 goto out_error_mem;
2468
- perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
2469
- perf_evlist__add(evlist, pgfault_min);
3919
+ evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
3920
+ evlist__add(evlist, pgfault_min);
24703921 }
24713922
24723923 if (trace->sched &&
2473
- perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2474
- trace__sched_stat_runtime))
3924
+ evlist__add_newtp(evlist, "sched", "sched_stat_runtime", trace__sched_stat_runtime))
24753925 goto out_error_sched_stat_runtime;
2476
-
24773926 /*
24783927 * If a global cgroup was set, apply it to all the events without an
24793928 * explicit cgroup. I.e.:
....@@ -2528,7 +3977,7 @@
25283977 }
25293978 }
25303979
2531
- err = perf_evlist__open(evlist);
3980
+ err = evlist__open(evlist);
25323981 if (err < 0)
25333982 goto out_error_open;
25343983
....@@ -2542,108 +3991,122 @@
25423991 goto out_error_open;
25433992 }
25443993
2545
- /*
2546
- * Better not use !target__has_task() here because we need to cover the
2547
- * case where no threads were specified in the command line, but a
2548
- * workload was, and in that case we will fill in the thread_map when
2549
- * we fork the workload in perf_evlist__prepare_workload.
2550
- */
2551
- if (trace->filter_pids.nr > 0)
2552
- err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2553
- else if (thread_map__pid(evlist->threads, 0) == -1)
2554
- err = trace__set_filter_loop_pids(trace);
2555
-
3994
+ err = trace__set_filter_pids(trace);
25563995 if (err < 0)
25573996 goto out_error_mem;
3997
+
3998
+ if (trace->syscalls.map)
3999
+ trace__init_syscalls_bpf_map(trace);
4000
+
4001
+ if (trace->syscalls.prog_array.sys_enter)
4002
+ trace__init_syscalls_bpf_prog_array_maps(trace);
25584003
25594004 if (trace->ev_qualifier_ids.nr > 0) {
25604005 err = trace__set_ev_qualifier_filter(trace);
25614006 if (err < 0)
25624007 goto out_errno;
25634008
2564
- pr_debug("event qualifier tracepoint filter: %s\n",
2565
- trace->syscalls.events.sys_exit->filter);
4009
+ if (trace->syscalls.events.sys_exit) {
4010
+ pr_debug("event qualifier tracepoint filter: %s\n",
4011
+ trace->syscalls.events.sys_exit->filter);
4012
+ }
25664013 }
25674014
4015
+ /*
4016
+ * If the "close" syscall is not traced, then we will not have the
4017
+ * opportunity to, in syscall_arg__scnprintf_close_fd() invalidate the
4018
+ * fd->pathname table and were ending up showing the last value set by
4019
+ * syscalls opening a pathname and associating it with a descriptor or
4020
+ * reading it from /proc/pid/fd/ in cases where that doesn't make
4021
+ * sense.
4022
+ *
4023
+ * So just disable this beautifier (SCA_FD, SCA_FDAT) when 'close' is
4024
+ * not in use.
4025
+ */
4026
+ trace->fd_path_disabled = !trace__syscall_enabled(trace, syscalltbl__id(trace->sctbl, "close"));
4027
+
4028
+ err = trace__expand_filters(trace, &evsel);
4029
+ if (err)
4030
+ goto out_delete_evlist;
25684031 err = perf_evlist__apply_filters(evlist, &evsel);
25694032 if (err < 0)
25704033 goto out_error_apply_filters;
25714034
2572
- err = perf_evlist__mmap(evlist, trace->opts.mmap_pages);
4035
+ if (trace->dump.map)
4036
+ bpf_map__fprintf(trace->dump.map, trace->output);
4037
+
4038
+ err = evlist__mmap(evlist, trace->opts.mmap_pages);
25734039 if (err < 0)
25744040 goto out_error_mmap;
25754041
25764042 if (!target__none(&trace->opts.target) && !trace->opts.initial_delay)
2577
- perf_evlist__enable(evlist);
4043
+ evlist__enable(evlist);
25784044
25794045 if (forks)
25804046 perf_evlist__start_workload(evlist);
25814047
25824048 if (trace->opts.initial_delay) {
25834049 usleep(trace->opts.initial_delay * 1000);
2584
- perf_evlist__enable(evlist);
4050
+ evlist__enable(evlist);
25854051 }
25864052
2587
- trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2588
- evlist->threads->nr > 1 ||
2589
- perf_evlist__first(evlist)->attr.inherit;
4053
+ trace->multiple_threads = perf_thread_map__pid(evlist->core.threads, 0) == -1 ||
4054
+ evlist->core.threads->nr > 1 ||
4055
+ evlist__first(evlist)->core.attr.inherit;
25904056
25914057 /*
2592
- * Now that we already used evsel->attr to ask the kernel to setup the
2593
- * events, lets reuse evsel->attr.sample_max_stack as the limit in
4058
+ * Now that we already used evsel->core.attr to ask the kernel to setup the
4059
+ * events, lets reuse evsel->core.attr.sample_max_stack as the limit in
25944060 * trace__resolve_callchain(), allowing per-event max-stack settings
2595
- * to override an explicitely set --max-stack global setting.
4061
+ * to override an explicitly set --max-stack global setting.
25964062 */
25974063 evlist__for_each_entry(evlist, evsel) {
25984064 if (evsel__has_callchain(evsel) &&
2599
- evsel->attr.sample_max_stack == 0)
2600
- evsel->attr.sample_max_stack = trace->max_stack;
4065
+ evsel->core.attr.sample_max_stack == 0)
4066
+ evsel->core.attr.sample_max_stack = trace->max_stack;
26014067 }
26024068 again:
26034069 before = trace->nr_events;
26044070
2605
- for (i = 0; i < evlist->nr_mmaps; i++) {
4071
+ for (i = 0; i < evlist->core.nr_mmaps; i++) {
26064072 union perf_event *event;
2607
- struct perf_mmap *md;
4073
+ struct mmap *md;
26084074
26094075 md = &evlist->mmap[i];
2610
- if (perf_mmap__read_init(md) < 0)
4076
+ if (perf_mmap__read_init(&md->core) < 0)
26114077 continue;
26124078
2613
- while ((event = perf_mmap__read_event(md)) != NULL) {
2614
- struct perf_sample sample;
2615
-
4079
+ while ((event = perf_mmap__read_event(&md->core)) != NULL) {
26164080 ++trace->nr_events;
26174081
2618
- err = perf_evlist__parse_sample(evlist, event, &sample);
2619
- if (err) {
2620
- fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2621
- goto next_event;
2622
- }
4082
+ err = trace__deliver_event(trace, event);
4083
+ if (err)
4084
+ goto out_disable;
26234085
2624
- trace__handle_event(trace, event, &sample);
2625
-next_event:
2626
- perf_mmap__consume(md);
4086
+ perf_mmap__consume(&md->core);
26274087
26284088 if (interrupted)
26294089 goto out_disable;
26304090
26314091 if (done && !draining) {
2632
- perf_evlist__disable(evlist);
4092
+ evlist__disable(evlist);
26334093 draining = true;
26344094 }
26354095 }
2636
- perf_mmap__read_done(md);
4096
+ perf_mmap__read_done(&md->core);
26374097 }
26384098
26394099 if (trace->nr_events == before) {
26404100 int timeout = done ? 100 : -1;
26414101
2642
- if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2643
- if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
4102
+ if (!draining && evlist__poll(evlist, timeout) > 0) {
4103
+ if (evlist__filter_pollfd(evlist, POLLERR | POLLHUP | POLLNVAL) == 0)
26444104 draining = true;
26454105
26464106 goto again;
4107
+ } else {
4108
+ if (trace__flush_events(trace))
4109
+ goto out_disable;
26474110 }
26484111 } else {
26494112 goto again;
....@@ -2652,7 +4115,10 @@
26524115 out_disable:
26534116 thread__zput(trace->current);
26544117
2655
- perf_evlist__disable(evlist);
4118
+ evlist__disable(evlist);
4119
+
4120
+ if (trace->sort_events)
4121
+ ordered_events__flush(&trace->oe.data, OE_FLUSH__FINAL);
26564122
26574123 if (!err) {
26584124 if (trace->summary)
....@@ -2670,7 +4136,7 @@
26704136 out_delete_evlist:
26714137 trace__symbols__exit(trace);
26724138
2673
- perf_evlist__delete(evlist);
4139
+ evlist__delete(evlist);
26744140 cgroup__put(trace->cgroup);
26754141 trace->evlist = NULL;
26764142 trace->live = false;
....@@ -2687,11 +4153,11 @@
26874153 goto out_error;
26884154
26894155 out_error_mmap:
2690
- perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
4156
+ evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
26914157 goto out_error;
26924158
26934159 out_error_open:
2694
- perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
4160
+ evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
26954161
26964162 out_error:
26974163 fprintf(trace->output, "%s\n", errbuf);
....@@ -2700,7 +4166,7 @@
27004166 out_error_apply_filters:
27014167 fprintf(trace->output,
27024168 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2703
- evsel->filter, perf_evsel__name(evsel), errno,
4169
+ evsel->filter, evsel__name(evsel), errno,
27044170 str_error_r(errno, errbuf, sizeof(errbuf)));
27054171 goto out_delete_evlist;
27064172 }
....@@ -2715,18 +4181,16 @@
27154181
27164182 static int trace__replay(struct trace *trace)
27174183 {
2718
- const struct perf_evsel_str_handler handlers[] = {
4184
+ const struct evsel_str_handler handlers[] = {
27194185 { "probe:vfs_getname", trace__vfs_getname, },
27204186 };
27214187 struct perf_data data = {
2722
- .file = {
2723
- .path = input_name,
2724
- },
2725
- .mode = PERF_DATA_MODE_READ,
2726
- .force = trace->force,
4188
+ .path = input_name,
4189
+ .mode = PERF_DATA_MODE_READ,
4190
+ .force = trace->force,
27274191 };
27284192 struct perf_session *session;
2729
- struct perf_evsel *evsel;
4193
+ struct evsel *evsel;
27304194 int err = -1;
27314195
27324196 trace->tool.sample = trace__process_sample;
....@@ -2747,8 +4211,8 @@
27474211 trace->multiple_threads = true;
27484212
27494213 session = perf_session__new(&data, false, &trace->tool);
2750
- if (session == NULL)
2751
- return -1;
4214
+ if (IS_ERR(session))
4215
+ return PTR_ERR(session);
27524216
27534217 if (trace->opts.target.pid)
27544218 symbol_conf.pid_list_str = strdup(trace->opts.target.pid);
....@@ -2773,7 +4237,7 @@
27734237 "syscalls:sys_enter");
27744238
27754239 if (evsel &&
2776
- (perf_evsel__init_raw_syscall_tp(evsel, trace__sys_enter) < 0 ||
4240
+ (evsel__init_raw_syscall_tp(evsel, trace__sys_enter) < 0 ||
27774241 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
27784242 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
27794243 goto out;
....@@ -2785,17 +4249,17 @@
27854249 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
27864250 "syscalls:sys_exit");
27874251 if (evsel &&
2788
- (perf_evsel__init_raw_syscall_tp(evsel, trace__sys_exit) < 0 ||
4252
+ (evsel__init_raw_syscall_tp(evsel, trace__sys_exit) < 0 ||
27894253 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
27904254 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
27914255 goto out;
27924256 }
27934257
27944258 evlist__for_each_entry(session->evlist, evsel) {
2795
- if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2796
- (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2797
- evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2798
- evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
4259
+ if (evsel->core.attr.type == PERF_TYPE_SOFTWARE &&
4260
+ (evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
4261
+ evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
4262
+ evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS))
27994263 evsel->handler = trace__pgfault;
28004264 }
28014265
....@@ -2824,17 +4288,17 @@
28244288 }
28254289
28264290 DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
2827
- struct stats *stats;
2828
- double msecs;
2829
- int syscall;
4291
+ struct syscall_stats *stats;
4292
+ double msecs;
4293
+ int syscall;
28304294 )
28314295 {
28324296 struct int_node *source = rb_entry(nd, struct int_node, rb_node);
2833
- struct stats *stats = source->priv;
4297
+ struct syscall_stats *stats = source->priv;
28344298
28354299 entry->syscall = source->i;
28364300 entry->stats = stats;
2837
- entry->msecs = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0;
4301
+ entry->msecs = stats ? (u64)stats->stats.n * (avg_stats(&stats->stats) / NSEC_PER_MSEC) : 0;
28384302 }
28394303
28404304 static size_t thread__dump_stats(struct thread_trace *ttrace,
....@@ -2850,27 +4314,37 @@
28504314
28514315 printed += fprintf(fp, "\n");
28524316
2853
- printed += fprintf(fp, " syscall calls total min avg max stddev\n");
2854
- printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
2855
- printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
4317
+ printed += fprintf(fp, " syscall calls errors total min avg max stddev\n");
4318
+ printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
4319
+ printed += fprintf(fp, " --------------- -------- ------ -------- --------- --------- --------- ------\n");
28564320
28574321 resort_rb__for_each_entry(nd, syscall_stats) {
2858
- struct stats *stats = syscall_stats_entry->stats;
4322
+ struct syscall_stats *stats = syscall_stats_entry->stats;
28594323 if (stats) {
2860
- double min = (double)(stats->min) / NSEC_PER_MSEC;
2861
- double max = (double)(stats->max) / NSEC_PER_MSEC;
2862
- double avg = avg_stats(stats);
4324
+ double min = (double)(stats->stats.min) / NSEC_PER_MSEC;
4325
+ double max = (double)(stats->stats.max) / NSEC_PER_MSEC;
4326
+ double avg = avg_stats(&stats->stats);
28634327 double pct;
2864
- u64 n = (u64) stats->n;
4328
+ u64 n = (u64)stats->stats.n;
28654329
2866
- pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
4330
+ pct = avg ? 100.0 * stddev_stats(&stats->stats) / avg : 0.0;
28674331 avg /= NSEC_PER_MSEC;
28684332
28694333 sc = &trace->syscalls.table[syscall_stats_entry->syscall];
28704334 printed += fprintf(fp, " %-15s", sc->name);
2871
- printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
2872
- n, syscall_stats_entry->msecs, min, avg);
4335
+ printed += fprintf(fp, " %8" PRIu64 " %6" PRIu64 " %9.3f %9.3f %9.3f",
4336
+ n, stats->nr_failures, syscall_stats_entry->msecs, min, avg);
28734337 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
4338
+
4339
+ if (trace->errno_summary && stats->nr_failures) {
4340
+ const char *arch_name = perf_env__arch(trace->host->env);
4341
+ int e;
4342
+
4343
+ for (e = 0; e < stats->max_errno; ++e) {
4344
+ if (stats->errnos[e] != 0)
4345
+ fprintf(fp, "\t\t\t\t%s: %d\n", arch_syscalls__strerrno(arch_name, e + 1), stats->errnos[e]);
4346
+ }
4347
+ }
28744348 }
28754349 }
28764350
....@@ -2951,8 +4425,8 @@
29514425 return 0;
29524426 }
29534427
2954
-static int trace__set_filter_pids(const struct option *opt, const char *str,
2955
- int unset __maybe_unused)
4428
+static int trace__set_filter_pids_from_option(const struct option *opt, const char *str,
4429
+ int unset __maybe_unused)
29564430 {
29574431 int ret = -1;
29584432 size_t i;
....@@ -3017,38 +4491,65 @@
30174491 return 0;
30184492 }
30194493
3020
-static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
4494
+static void evlist__set_default_evsel_handler(struct evlist *evlist, void *handler)
30214495 {
3022
- struct perf_evsel *evsel;
4496
+ struct evsel *evsel;
30234497
3024
- evlist__for_each_entry(evlist, evsel)
3025
- evsel->handler = handler;
4498
+ evlist__for_each_entry(evlist, evsel) {
4499
+ if (evsel->handler == NULL)
4500
+ evsel->handler = handler;
4501
+ }
30264502 }
30274503
3028
-static int evlist__set_syscall_tp_fields(struct perf_evlist *evlist)
4504
+static void evsel__set_syscall_arg_fmt(struct evsel *evsel, const char *name)
30294505 {
3030
- struct perf_evsel *evsel;
4506
+ struct syscall_arg_fmt *fmt = evsel__syscall_arg_fmt(evsel);
4507
+
4508
+ if (fmt) {
4509
+ struct syscall_fmt *scfmt = syscall_fmt__find(name);
4510
+
4511
+ if (scfmt) {
4512
+ int skip = 0;
4513
+
4514
+ if (strcmp(evsel->tp_format->format.fields->name, "__syscall_nr") == 0 ||
4515
+ strcmp(evsel->tp_format->format.fields->name, "nr") == 0)
4516
+ ++skip;
4517
+
4518
+ memcpy(fmt + skip, scfmt->arg, (evsel->tp_format->format.nr_fields - skip) * sizeof(*fmt));
4519
+ }
4520
+ }
4521
+}
4522
+
4523
+static int evlist__set_syscall_tp_fields(struct evlist *evlist)
4524
+{
4525
+ struct evsel *evsel;
30314526
30324527 evlist__for_each_entry(evlist, evsel) {
30334528 if (evsel->priv || !evsel->tp_format)
30344529 continue;
30354530
3036
- if (strcmp(evsel->tp_format->system, "syscalls"))
4531
+ if (strcmp(evsel->tp_format->system, "syscalls")) {
4532
+ evsel__init_tp_arg_scnprintf(evsel);
30374533 continue;
4534
+ }
30384535
3039
- if (perf_evsel__init_syscall_tp(evsel))
4536
+ if (evsel__init_syscall_tp(evsel))
30404537 return -1;
30414538
30424539 if (!strncmp(evsel->tp_format->name, "sys_enter_", 10)) {
3043
- struct syscall_tp *sc = evsel->priv;
4540
+ struct syscall_tp *sc = __evsel__syscall_tp(evsel);
30444541
30454542 if (__tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64)))
30464543 return -1;
4544
+
4545
+ evsel__set_syscall_arg_fmt(evsel, evsel->tp_format->name + sizeof("sys_enter_") - 1);
30474546 } else if (!strncmp(evsel->tp_format->name, "sys_exit_", 9)) {
3048
- struct syscall_tp *sc = evsel->priv;
4547
+ struct syscall_tp *sc = __evsel__syscall_tp(evsel);
30494548
30504549 if (__tp_field__init_uint(&sc->ret, sizeof(u64), sc->id.offset + sizeof(u64), evsel->needs_swap))
30514550 return -1;
4551
+
4552
+ evsel__set_syscall_arg_fmt(evsel, evsel->tp_format->name + sizeof("sys_exit_") - 1);
30524553 }
30534554 }
30544555
....@@ -3072,6 +4573,7 @@
30724573 int len = strlen(str) + 1, err = -1, list, idx;
30734574 char *strace_groups_dir = system_path(STRACE_GROUPS_DIR);
30744575 char group_name[PATH_MAX];
4576
+ struct syscall_fmt *fmt;
30754577
30764578 if (strace_groups_dir == NULL)
30774579 return -1;
....@@ -3089,12 +4591,19 @@
30894591 if (syscalltbl__id(trace->sctbl, s) >= 0 ||
30904592 syscalltbl__strglobmatch_first(trace->sctbl, s, &idx) >= 0) {
30914593 list = 1;
4594
+ goto do_concat;
4595
+ }
4596
+
4597
+ fmt = syscall_fmt__find_by_alias(s);
4598
+ if (fmt != NULL) {
4599
+ list = 1;
4600
+ s = fmt->name;
30924601 } else {
30934602 path__join(group_name, sizeof(group_name), strace_groups_dir, s);
30944603 if (access(group_name, R_OK) == 0)
30954604 list = 1;
30964605 }
3097
-
4606
+do_concat:
30984607 if (lists[list]) {
30994608 sprintf(lists[list] + strlen(lists[list]), ",%s", s);
31004609 } else {
....@@ -3130,9 +4639,9 @@
31304639 err = 0;
31314640
31324641 if (lists[0]) {
3133
- struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event",
3134
- "event selector. use 'perf list' to list available events",
3135
- parse_events_option);
4642
+ struct option o = {
4643
+ .value = &trace->evlist,
4644
+ };
31364645 err = parse_events_option(&o, lists[0], 0);
31374646 }
31384647 out:
....@@ -3146,12 +4655,59 @@
31464655 {
31474656 struct trace *trace = opt->value;
31484657
3149
- if (!list_empty(&trace->evlist->entries))
3150
- return parse_cgroups(opt, str, unset);
3151
-
4658
+ if (!list_empty(&trace->evlist->core.entries)) {
4659
+ struct option o = {
4660
+ .value = &trace->evlist,
4661
+ };
4662
+ return parse_cgroups(&o, str, unset);
4663
+ }
31524664 trace->cgroup = evlist__findnew_cgroup(trace->evlist, str);
31534665
31544666 return 0;
4667
+}
4668
+
4669
+static int trace__config(const char *var, const char *value, void *arg)
4670
+{
4671
+ struct trace *trace = arg;
4672
+ int err = 0;
4673
+
4674
+ if (!strcmp(var, "trace.add_events")) {
4675
+ trace->perfconfig_events = strdup(value);
4676
+ if (trace->perfconfig_events == NULL) {
4677
+ pr_err("Not enough memory for %s\n", "trace.add_events");
4678
+ return -1;
4679
+ }
4680
+ } else if (!strcmp(var, "trace.show_timestamp")) {
4681
+ trace->show_tstamp = perf_config_bool(var, value);
4682
+ } else if (!strcmp(var, "trace.show_duration")) {
4683
+ trace->show_duration = perf_config_bool(var, value);
4684
+ } else if (!strcmp(var, "trace.show_arg_names")) {
4685
+ trace->show_arg_names = perf_config_bool(var, value);
4686
+ if (!trace->show_arg_names)
4687
+ trace->show_zeros = true;
4688
+ } else if (!strcmp(var, "trace.show_zeros")) {
4689
+ bool new_show_zeros = perf_config_bool(var, value);
4690
+ if (!trace->show_arg_names && !new_show_zeros) {
4691
+ pr_warning("trace.show_zeros has to be set when trace.show_arg_names=no\n");
4692
+ goto out;
4693
+ }
4694
+ trace->show_zeros = new_show_zeros;
4695
+ } else if (!strcmp(var, "trace.show_prefix")) {
4696
+ trace->show_string_prefix = perf_config_bool(var, value);
4697
+ } else if (!strcmp(var, "trace.no_inherit")) {
4698
+ trace->opts.no_inherit = perf_config_bool(var, value);
4699
+ } else if (!strcmp(var, "trace.args_alignment")) {
4700
+ int args_alignment = 0;
4701
+ if (perf_config_int(&args_alignment, var, value) == 0)
4702
+ trace->args_alignment = args_alignment;
4703
+ } else if (!strcmp(var, "trace.tracepoint_beautifiers")) {
4704
+ if (strcasecmp(value, "libtraceevent") == 0)
4705
+ trace->libtraceevent_print = true;
4706
+ else if (strcasecmp(value, "libbeauty") == 0)
4707
+ trace->libtraceevent_print = false;
4708
+ }
4709
+out:
4710
+ return err;
31554711 }
31564712
31574713 int cmd_trace(int argc, const char **argv)
....@@ -3164,9 +4720,6 @@
31644720 NULL
31654721 };
31664722 struct trace trace = {
3167
- .syscalls = {
3168
- . max = -1,
3169
- },
31704723 .opts = {
31714724 .target = {
31724725 .uid = UINT_MAX,
....@@ -3176,19 +4729,26 @@
31764729 .user_interval = ULLONG_MAX,
31774730 .no_buffering = true,
31784731 .mmap_pages = UINT_MAX,
3179
- .proc_map_timeout = 500,
31804732 },
31814733 .output = stderr,
31824734 .show_comm = true,
4735
+ .show_tstamp = true,
4736
+ .show_duration = true,
4737
+ .show_arg_names = true,
4738
+ .args_alignment = 70,
31834739 .trace_syscalls = false,
31844740 .kernel_syscallchains = false,
31854741 .max_stack = UINT_MAX,
4742
+ .max_events = ULONG_MAX,
31864743 };
4744
+ const char *map_dump_str = NULL;
31874745 const char *output_name = NULL;
31884746 const struct option trace_options[] = {
31894747 OPT_CALLBACK('e', "event", &trace, "event",
31904748 "event/syscall selector. use 'perf list' to list available events",
31914749 trace__parse_events_option),
4750
+ OPT_CALLBACK(0, "filter", &trace.evlist, "filter",
4751
+ "event filter", parse_filter),
31924752 OPT_BOOLEAN(0, "comm", &trace.show_comm,
31934753 "show the thread COMM next to its id"),
31944754 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
....@@ -3201,7 +4761,7 @@
32014761 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
32024762 "trace events on existing thread id"),
32034763 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
3204
- "pids to filter (by the kernel)", trace__set_filter_pids),
4764
+ "pids to filter (by the kernel)", trace__set_filter_pids_from_option),
32054765 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
32064766 "system-wide collection from all CPUs"),
32074767 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
....@@ -3216,6 +4776,9 @@
32164776 OPT_CALLBACK(0, "duration", &trace, "float",
32174777 "show only events with duration > N.M ms",
32184778 trace__set_duration),
4779
+#ifdef HAVE_LIBBPF_SUPPORT
4780
+ OPT_STRING(0, "map-dump", &map_dump_str, "BPF map", "BPF map to periodically dump"),
4781
+#endif
32194782 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
32204783 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
32214784 OPT_BOOLEAN('T', "time", &trace.full_time,
....@@ -3226,6 +4789,8 @@
32264789 "Show only syscall summary with statistics"),
32274790 OPT_BOOLEAN('S', "with-summary", &trace.summary,
32284791 "Show all syscalls and summary with statistics"),
4792
+ OPT_BOOLEAN(0, "errno-summary", &trace.errno_summary,
4793
+ "Show errno stats per syscall, use with -s or -S"),
32294794 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
32304795 "Trace pagefaults", parse_pagefaults, "maj"),
32314796 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
....@@ -3233,8 +4798,12 @@
32334798 OPT_CALLBACK(0, "call-graph", &trace.opts,
32344799 "record_mode[,record_size]", record_callchain_help,
32354800 &record_parse_callchain_opt),
4801
+ OPT_BOOLEAN(0, "libtraceevent_print", &trace.libtraceevent_print,
4802
+ "Use libtraceevent to print the tracepoint arguments."),
32364803 OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
32374804 "Show the kernel callchains on the syscall exit path"),
4805
+ OPT_ULONG(0, "max-events", &trace.max_events,
4806
+ "Set the maximum number of events to print, exit after that is reached. "),
32384807 OPT_UINTEGER(0, "min-stack", &trace.min_stack,
32394808 "Set the minimum stack depth when parsing the callchain, "
32404809 "anything below the specified depth will be ignored."),
....@@ -3242,20 +4811,23 @@
32424811 "Set the maximum stack depth when parsing the callchain, "
32434812 "anything beyond the specified depth will be ignored. "
32444813 "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
4814
+ OPT_BOOLEAN(0, "sort-events", &trace.sort_events,
4815
+ "Sort batch of events before processing, use if getting out of order events"),
32454816 OPT_BOOLEAN(0, "print-sample", &trace.print_sample,
32464817 "print the PERF_RECORD_SAMPLE PERF_SAMPLE_ info, for debugging"),
3247
- OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
4818
+ OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
32484819 "per thread proc mmap processing timeout in ms"),
32494820 OPT_CALLBACK('G', "cgroup", &trace, "name", "monitor event in cgroup name only",
32504821 trace__parse_cgroups),
3251
- OPT_UINTEGER('D', "delay", &trace.opts.initial_delay,
4822
+ OPT_INTEGER('D', "delay", &trace.opts.initial_delay,
32524823 "ms to wait before starting measurement after program "
32534824 "start"),
4825
+ OPTS_EVSWITCH(&trace.evswitch),
32544826 OPT_END()
32554827 };
32564828 bool __maybe_unused max_stack_user_set = true;
32574829 bool mmap_pages_user_set = true;
3258
- struct perf_evsel *evsel;
4830
+ struct evsel *evsel;
32594831 const char * const trace_subcommands[] = { "record", NULL };
32604832 int err = -1;
32614833 char bf[BUFSIZ];
....@@ -3263,7 +4835,7 @@
32634835 signal(SIGSEGV, sighandler_dump_stack);
32644836 signal(SIGFPE, sighandler_dump_stack);
32654837
3266
- trace.evlist = perf_evlist__new();
4838
+ trace.evlist = evlist__new();
32674839 trace.sctbl = syscalltbl__new();
32684840
32694841 if (trace.evlist == NULL || trace.sctbl == NULL) {
....@@ -3272,8 +4844,53 @@
32724844 goto out;
32734845 }
32744846
4847
+ /*
4848
+ * Parsing .perfconfig may entail creating a BPF event, that may need
4849
+ * to create BPF maps, so bump RLIM_MEMLOCK as the default 64K setting
4850
+ * is too small. This affects just this process, not touching the
4851
+ * global setting. If it fails we'll get something in 'perf trace -v'
4852
+ * to help diagnose the problem.
4853
+ */
4854
+ rlimit__bump_memlock();
4855
+
4856
+ err = perf_config(trace__config, &trace);
4857
+ if (err)
4858
+ goto out;
4859
+
32754860 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
32764861 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
4862
+
4863
+ /*
4864
+ * Here we already passed thru trace__parse_events_option() and it has
4865
+ * already figured out if -e syscall_name, if not but if --event
4866
+ * foo:bar was used, the user is interested _just_ in those, say,
4867
+ * tracepoint events, not in the strace-like syscall-name-based mode.
4868
+ *
4869
+ * This is important because we need to check if strace-like mode is
4870
+ * needed to decided if we should filter out the eBPF
4871
+ * __augmented_syscalls__ code, if it is in the mix, say, via
4872
+ * .perfconfig trace.add_events, and filter those out.
4873
+ */
4874
+ if (!trace.trace_syscalls && !trace.trace_pgfaults &&
4875
+ trace.evlist->core.nr_entries == 0 /* Was --events used? */) {
4876
+ trace.trace_syscalls = true;
4877
+ }
4878
+ /*
4879
+ * Now that we have --verbose figured out, lets see if we need to parse
4880
+ * events from .perfconfig, so that if those events fail parsing, say some
4881
+ * BPF program fails, then we'll be able to use --verbose to see what went
4882
+ * wrong in more detail.
4883
+ */
4884
+ if (trace.perfconfig_events != NULL) {
4885
+ struct parse_events_error parse_err;
4886
+
4887
+ bzero(&parse_err, sizeof(parse_err));
4888
+ err = parse_events(trace.evlist, trace.perfconfig_events, &parse_err);
4889
+ if (err) {
4890
+ parse_events_print_error(&parse_err, trace.perfconfig_events);
4891
+ goto out;
4892
+ }
4893
+ }
32774894
32784895 if ((nr_cgroups || trace.cgroup) && !trace.opts.target.system_wide) {
32794896 usage_with_options_msg(trace_usage, trace_options,
....@@ -3288,10 +4905,60 @@
32884905 }
32894906
32904907 if (evsel) {
3291
- if (perf_evsel__init_augmented_syscall_tp(evsel) ||
3292
- perf_evsel__init_augmented_syscall_tp_args(evsel))
3293
- goto out;
32944908 trace.syscalls.events.augmented = evsel;
4909
+
4910
+ evsel = perf_evlist__find_tracepoint_by_name(trace.evlist, "raw_syscalls:sys_enter");
4911
+ if (evsel == NULL) {
4912
+ pr_err("ERROR: raw_syscalls:sys_enter not found in the augmented BPF object\n");
4913
+ goto out;
4914
+ }
4915
+
4916
+ if (evsel->bpf_obj == NULL) {
4917
+ pr_err("ERROR: raw_syscalls:sys_enter not associated to a BPF object\n");
4918
+ goto out;
4919
+ }
4920
+
4921
+ trace.bpf_obj = evsel->bpf_obj;
4922
+
4923
+ /*
4924
+ * If we have _just_ the augmenter event but don't have a
4925
+ * explicit --syscalls, then assume we want all strace-like
4926
+ * syscalls:
4927
+ */
4928
+ if (!trace.trace_syscalls && trace__only_augmented_syscalls_evsels(&trace))
4929
+ trace.trace_syscalls = true;
4930
+ /*
4931
+ * So, if we have a syscall augmenter, but trace_syscalls, aka
4932
+ * strace-like syscall tracing is not set, then we need to trow
4933
+ * away the augmenter, i.e. all the events that were created
4934
+ * from that BPF object file.
4935
+ *
4936
+ * This is more to fix the current .perfconfig trace.add_events
4937
+ * style of setting up the strace-like eBPF based syscall point
4938
+ * payload augmenter.
4939
+ *
4940
+ * All this complexity will be avoided by adding an alternative
4941
+ * to trace.add_events in the form of
4942
+ * trace.bpf_augmented_syscalls, that will be only parsed if we
4943
+ * need it.
4944
+ *
4945
+ * .perfconfig trace.add_events is still useful if we want, for
4946
+ * instance, have msr_write.msr in some .perfconfig profile based
4947
+ * 'perf trace --config determinism.profile' mode, where for some
4948
+ * particular goal/workload type we want a set of events and
4949
+ * output mode (with timings, etc) instead of having to add
4950
+ * all via the command line.
4951
+ *
4952
+ * Also --config to specify an alternate .perfconfig file needs
4953
+ * to be implemented.
4954
+ */
4955
+ if (!trace.trace_syscalls) {
4956
+ trace__delete_augmented_syscalls(&trace);
4957
+ } else {
4958
+ trace__set_bpf_map_filtered_pids(&trace);
4959
+ trace__set_bpf_map_syscalls(&trace);
4960
+ trace.syscalls.unaugmented_prog = trace__find_bpf_program_by_title(&trace, "!raw_syscalls:unaugmented");
4961
+ }
32954962 }
32964963
32974964 err = bpf__setup_stdout(trace.evlist);
....@@ -3302,6 +4969,14 @@
33024969 }
33034970
33044971 err = -1;
4972
+
4973
+ if (map_dump_str) {
4974
+ trace.dump.map = trace__find_bpf_map_by_name(&trace, map_dump_str);
4975
+ if (trace.dump.map == NULL) {
4976
+ pr_err("ERROR: BPF map \"%s\" not found\n", map_dump_str);
4977
+ goto out;
4978
+ }
4979
+ }
33054980
33064981 if (trace.trace_pgfaults) {
33074982 trace.opts.sample_address = true;
....@@ -3329,25 +5004,106 @@
33295004 symbol_conf.use_callchain = true;
33305005 }
33315006
3332
- if (trace.evlist->nr_entries > 0) {
3333
- evlist__set_evsel_handler(trace.evlist, trace__event_handler);
5007
+ if (trace.evlist->core.nr_entries > 0) {
5008
+ evlist__set_default_evsel_handler(trace.evlist, trace__event_handler);
33345009 if (evlist__set_syscall_tp_fields(trace.evlist)) {
33355010 perror("failed to set syscalls:* tracepoint fields");
33365011 goto out;
33375012 }
33385013 }
33395014
5015
+ if (trace.sort_events) {
5016
+ ordered_events__init(&trace.oe.data, ordered_events__deliver_event, &trace);
5017
+ ordered_events__set_copy_on_queue(&trace.oe.data, true);
5018
+ }
5019
+
5020
+ /*
5021
+ * If we are augmenting syscalls, then combine what we put in the
5022
+ * __augmented_syscalls__ BPF map with what is in the
5023
+ * syscalls:sys_exit_FOO tracepoints, i.e. just like we do without BPF,
5024
+ * combining raw_syscalls:sys_enter with raw_syscalls:sys_exit.
5025
+ *
5026
+ * We'll switch to look at two BPF maps, one for sys_enter and the
5027
+ * other for sys_exit when we start augmenting the sys_exit paths with
5028
+ * buffers that are being copied from kernel to userspace, think 'read'
5029
+ * syscall.
5030
+ */
5031
+ if (trace.syscalls.events.augmented) {
5032
+ evlist__for_each_entry(trace.evlist, evsel) {
5033
+ bool raw_syscalls_sys_exit = strcmp(evsel__name(evsel), "raw_syscalls:sys_exit") == 0;
5034
+
5035
+ if (raw_syscalls_sys_exit) {
5036
+ trace.raw_augmented_syscalls = true;
5037
+ goto init_augmented_syscall_tp;
5038
+ }
5039
+
5040
+ if (trace.syscalls.events.augmented->priv == NULL &&
5041
+ strstr(evsel__name(evsel), "syscalls:sys_enter")) {
5042
+ struct evsel *augmented = trace.syscalls.events.augmented;
5043
+ if (evsel__init_augmented_syscall_tp(augmented, evsel) ||
5044
+ evsel__init_augmented_syscall_tp_args(augmented))
5045
+ goto out;
5046
+ /*
5047
+ * Augmented is __augmented_syscalls__ BPF_OUTPUT event
5048
+ * Above we made sure we can get from the payload the tp fields
5049
+ * that we get from syscalls:sys_enter tracefs format file.
5050
+ */
5051
+ augmented->handler = trace__sys_enter;
5052
+ /*
5053
+ * Now we do the same for the *syscalls:sys_enter event so that
5054
+ * if we handle it directly, i.e. if the BPF prog returns 0 so
5055
+ * as not to filter it, then we'll handle it just like we would
5056
+ * for the BPF_OUTPUT one:
5057
+ */
5058
+ if (evsel__init_augmented_syscall_tp(evsel, evsel) ||
5059
+ evsel__init_augmented_syscall_tp_args(evsel))
5060
+ goto out;
5061
+ evsel->handler = trace__sys_enter;
5062
+ }
5063
+
5064
+ if (strstarts(evsel__name(evsel), "syscalls:sys_exit_")) {
5065
+ struct syscall_tp *sc;
5066
+init_augmented_syscall_tp:
5067
+ if (evsel__init_augmented_syscall_tp(evsel, evsel))
5068
+ goto out;
5069
+ sc = __evsel__syscall_tp(evsel);
5070
+ /*
5071
+ * For now with BPF raw_augmented we hook into
5072
+ * raw_syscalls:sys_enter and there we get all
5073
+ * 6 syscall args plus the tracepoint common
5074
+ * fields and the syscall_nr (another long).
5075
+ * So we check if that is the case and if so
5076
+ * don't look after the sc->args_size but
5077
+ * always after the full raw_syscalls:sys_enter
5078
+ * payload, which is fixed.
5079
+ *
5080
+ * We'll revisit this later to pass
5081
+ * s->args_size to the BPF augmenter (now
5082
+ * tools/perf/examples/bpf/augmented_raw_syscalls.c,
5083
+ * so that it copies only what we need for each
5084
+ * syscall, like what happens when we use
5085
+ * syscalls:sys_enter_NAME, so that we reduce
5086
+ * the kernel/userspace traffic to just what is
5087
+ * needed for each syscall.
5088
+ */
5089
+ if (trace.raw_augmented_syscalls)
5090
+ trace.raw_augmented_syscalls_args_size = (6 + 1) * sizeof(long) + sc->id.offset;
5091
+ evsel__init_augmented_syscall_tp_ret(evsel);
5092
+ evsel->handler = trace__sys_exit;
5093
+ }
5094
+ }
5095
+ }
5096
+
33405097 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
33415098 return trace__record(&trace, argc-1, &argv[1]);
5099
+
5100
+ /* Using just --errno-summary will trigger --summary */
5101
+ if (trace.errno_summary && !trace.summary && !trace.summary_only)
5102
+ trace.summary_only = true;
33425103
33435104 /* summary_only implies summary option, but don't overwrite summary if set */
33445105 if (trace.summary_only)
33455106 trace.summary = trace.summary_only;
3346
-
3347
- if (!trace.trace_syscalls && !trace.trace_pgfaults &&
3348
- trace.evlist->nr_entries == 0 /* Was --events used? */) {
3349
- trace.trace_syscalls = true;
3350
- }
33515107
33525108 if (output_name != NULL) {
33535109 err = trace__open_output(&trace, output_name);
....@@ -3356,6 +5112,10 @@
33565112 goto out;
33575113 }
33585114 }
5115
+
5116
+ err = evswitch__init(&trace.evswitch, trace.evlist, stderr);
5117
+ if (err)
5118
+ goto out_close;
33595119
33605120 err = target__validate(&trace.opts.target);
33615121 if (err) {
....@@ -3383,5 +5143,6 @@
33835143 if (output_name != NULL)
33845144 fclose(trace.output);
33855145 out:
5146
+ zfree(&trace.perfconfig_events);
33865147 return err;
33875148 }