hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/tools/perf/builtin-stat.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * builtin-stat.c
34 *
....@@ -37,14 +38,11 @@
3738 * Mike Galbraith <efault@gmx.de>
3839 * Paul Mackerras <paulus@samba.org>
3940 * Jaswinder Singh Rajput <jaswinder@kernel.org>
40
- *
41
- * Released under the GPL v2. (and only v2, not any later version)
4241 */
4342
44
-#include "perf.h"
4543 #include "builtin.h"
44
+#include "perf.h"
4645 #include "util/cgroup.h"
47
-#include "util/util.h"
4846 #include <subcmd/parse-options.h>
4947 #include "util/parse-events.h"
5048 #include "util/pmu.h"
....@@ -52,23 +50,27 @@
5250 #include "util/evlist.h"
5351 #include "util/evsel.h"
5452 #include "util/debug.h"
55
-#include "util/drv_configs.h"
5653 #include "util/color.h"
5754 #include "util/stat.h"
5855 #include "util/header.h"
5956 #include "util/cpumap.h"
60
-#include "util/thread.h"
6157 #include "util/thread_map.h"
6258 #include "util/counts.h"
63
-#include "util/group.h"
59
+#include "util/topdown.h"
6460 #include "util/session.h"
6561 #include "util/tool.h"
6662 #include "util/string2.h"
6763 #include "util/metricgroup.h"
64
+#include "util/synthetic-events.h"
65
+#include "util/target.h"
66
+#include "util/time-utils.h"
6867 #include "util/top.h"
68
+#include "util/affinity.h"
69
+#include "util/pfm.h"
6970 #include "asm/bug.h"
7071
7172 #include <linux/time64.h>
73
+#include <linux/zalloc.h>
7274 #include <api/fs/fs.h>
7375 #include <errno.h>
7476 #include <signal.h>
....@@ -83,13 +85,12 @@
8385 #include <unistd.h>
8486 #include <sys/time.h>
8587 #include <sys/resource.h>
86
-#include <sys/wait.h>
88
+#include <linux/err.h>
8789
88
-#include "sane_ctype.h"
90
+#include <linux/ctype.h>
91
+#include <perf/evlist.h>
8992
9093 #define DEFAULT_SEPARATOR " "
91
-#define CNTR_NOT_SUPPORTED "<not supported>"
92
-#define CNTR_NOT_COUNTED "<not counted>"
9394 #define FREEZE_ON_SMI_PATH "devices/cpu/freeze_on_smi"
9495
9596 static void print_counters(struct timespec *ts, int argc, const char **argv);
....@@ -127,6 +128,15 @@
127128 NULL,
128129 };
129130
131
+static const char *topdown_metric_attrs[] = {
132
+ "slots",
133
+ "topdown-retiring",
134
+ "topdown-bad-spec",
135
+ "topdown-fe-bound",
136
+ "topdown-be-bound",
137
+ NULL,
138
+};
139
+
130140 static const char *smi_cost_attrs = {
131141 "{"
132142 "msr/aperf/,"
....@@ -135,56 +145,32 @@
135145 "}"
136146 };
137147
138
-static struct perf_evlist *evsel_list;
139
-
140
-static struct rblist metric_events;
148
+static struct evlist *evsel_list;
141149
142150 static struct target target = {
143151 .uid = UINT_MAX,
144152 };
145153
146
-typedef int (*aggr_get_id_t)(struct cpu_map *m, int cpu);
147
-
148154 #define METRIC_ONLY_LEN 20
149155
150
-static int run_count = 1;
151
-static bool no_inherit = false;
152156 static volatile pid_t child_pid = -1;
153
-static bool null_run = false;
154157 static int detailed_run = 0;
155158 static bool transaction_run;
156159 static bool topdown_run = false;
157160 static bool smi_cost = false;
158161 static bool smi_reset = false;
159
-static bool big_num = true;
160162 static int big_num_opt = -1;
161
-static const char *csv_sep = NULL;
162
-static bool csv_output = false;
163163 static bool group = false;
164164 static const char *pre_cmd = NULL;
165165 static const char *post_cmd = NULL;
166166 static bool sync_run = false;
167
-static unsigned int initial_delay = 0;
168
-static unsigned int unit_width = 4; /* strlen("unit") */
169167 static bool forever = false;
170
-static bool metric_only = false;
171168 static bool force_metric_only = false;
172
-static bool no_merge = false;
173
-static bool walltime_run_table = false;
174169 static struct timespec ref_time;
175
-static struct cpu_map *aggr_map;
176
-static aggr_get_id_t aggr_get_id;
177170 static bool append_file;
178171 static bool interval_count;
179
-static bool interval_clear;
180172 static const char *output_name;
181173 static int output_fd;
182
-static int print_free_counters_hint;
183
-static int print_mixed_hw_group_error;
184
-static u64 *walltime_run;
185
-static bool ru_display = false;
186
-static struct rusage ru_data;
187
-static unsigned int metric_only_len = METRIC_ONLY_LEN;
188174
189175 struct perf_stat {
190176 bool record;
....@@ -193,8 +179,8 @@
193179 u64 bytes_written;
194180 struct perf_tool tool;
195181 bool maps_allocated;
196
- struct cpu_map *cpus;
197
- struct thread_map *threads;
182
+ struct perf_cpu_map *cpus;
183
+ struct perf_thread_map *threads;
198184 enum aggr_mode aggr_mode;
199185 };
200186
....@@ -204,13 +190,68 @@
204190 static volatile int done = 0;
205191
206192 static struct perf_stat_config stat_config = {
207
- .aggr_mode = AGGR_GLOBAL,
208
- .scale = true,
193
+ .aggr_mode = AGGR_GLOBAL,
194
+ .scale = true,
195
+ .unit_width = 4, /* strlen("unit") */
196
+ .run_count = 1,
197
+ .metric_only_len = METRIC_ONLY_LEN,
198
+ .walltime_nsecs_stats = &walltime_nsecs_stats,
199
+ .big_num = true,
200
+ .ctl_fd = -1,
201
+ .ctl_fd_ack = -1
209202 };
210203
211
-static bool is_duration_time(struct perf_evsel *evsel)
204
+static bool cpus_map_matched(struct evsel *a, struct evsel *b)
212205 {
213
- return !strcmp(evsel->name, "duration_time");
206
+ if (!a->core.cpus && !b->core.cpus)
207
+ return true;
208
+
209
+ if (!a->core.cpus || !b->core.cpus)
210
+ return false;
211
+
212
+ if (a->core.cpus->nr != b->core.cpus->nr)
213
+ return false;
214
+
215
+ for (int i = 0; i < a->core.cpus->nr; i++) {
216
+ if (a->core.cpus->map[i] != b->core.cpus->map[i])
217
+ return false;
218
+ }
219
+
220
+ return true;
221
+}
222
+
223
+static void evlist__check_cpu_maps(struct evlist *evlist)
224
+{
225
+ struct evsel *evsel, *pos, *leader;
226
+ char buf[1024];
227
+
228
+ evlist__for_each_entry(evlist, evsel) {
229
+ leader = evsel->leader;
230
+
231
+ /* Check that leader matches cpus with each member. */
232
+ if (leader == evsel)
233
+ continue;
234
+ if (cpus_map_matched(leader, evsel))
235
+ continue;
236
+
237
+ /* If there's mismatch disable the group and warn user. */
238
+ WARN_ONCE(1, "WARNING: grouped events cpus do not match, disabling group:\n");
239
+ evsel__group_desc(leader, buf, sizeof(buf));
240
+ pr_warning(" %s\n", buf);
241
+
242
+ if (verbose) {
243
+ cpu_map__snprint(leader->core.cpus, buf, sizeof(buf));
244
+ pr_warning(" %s: %s\n", leader->name, buf);
245
+ cpu_map__snprint(evsel->core.cpus, buf, sizeof(buf));
246
+ pr_warning(" %s: %s\n", evsel->name, buf);
247
+ }
248
+
249
+ for_each_group_evsel(pos, leader) {
250
+ pos->leader = pos;
251
+ pos->core.nr_members = 0;
252
+ }
253
+ evsel->leader->core.nr_members = 0;
254
+ }
214255 }
215256
216257 static inline void diff_timespec(struct timespec *r, struct timespec *a,
....@@ -234,66 +275,6 @@
234275
235276 for (i = 0; i < stat_config.stats_num; i++)
236277 perf_stat__reset_shadow_per_stat(&stat_config.stats[i]);
237
-}
238
-
239
-static int create_perf_stat_counter(struct perf_evsel *evsel)
240
-{
241
- struct perf_event_attr *attr = &evsel->attr;
242
- struct perf_evsel *leader = evsel->leader;
243
-
244
- if (stat_config.scale) {
245
- attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
246
- PERF_FORMAT_TOTAL_TIME_RUNNING;
247
- }
248
-
249
- /*
250
- * The event is part of non trivial group, let's enable
251
- * the group read (for leader) and ID retrieval for all
252
- * members.
253
- */
254
- if (leader->nr_members > 1)
255
- attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP;
256
-
257
- attr->inherit = !no_inherit;
258
-
259
- /*
260
- * Some events get initialized with sample_(period/type) set,
261
- * like tracepoints. Clear it up for counting.
262
- */
263
- attr->sample_period = 0;
264
-
265
- /*
266
- * But set sample_type to PERF_SAMPLE_IDENTIFIER, which should be harmless
267
- * while avoiding that older tools show confusing messages.
268
- *
269
- * However for pipe sessions we need to keep it zero,
270
- * because script's perf_evsel__check_attr is triggered
271
- * by attr->sample_type != 0, and we can't run it on
272
- * stat sessions.
273
- */
274
- if (!(STAT_RECORD && perf_stat.data.is_pipe))
275
- attr->sample_type = PERF_SAMPLE_IDENTIFIER;
276
-
277
- /*
278
- * Disabling all counters initially, they will be enabled
279
- * either manually by us or by kernel via enable_on_exec
280
- * set later.
281
- */
282
- if (perf_evsel__is_group_leader(evsel)) {
283
- attr->disabled = 1;
284
-
285
- /*
286
- * In case of initial_delay we enable tracee
287
- * events manually.
288
- */
289
- if (target__none(&target) && !initial_delay)
290
- attr->enable_on_exec = 1;
291
- }
292
-
293
- if (target__has_cpu(&target) && !target__has_per_thread(&target))
294
- return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel));
295
-
296
- return perf_evsel__open_per_thread(evsel, evsel_list->threads);
297278 }
298279
299280 static int process_synthesized_event(struct perf_tool *tool __maybe_unused,
....@@ -320,11 +301,10 @@
320301 #define WRITE_STAT_ROUND_EVENT(time, interval) \
321302 write_stat_round_event(time, PERF_STAT_ROUND_TYPE__ ## interval)
322303
323
-#define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
304
+#define SID(e, x, y) xyarray__entry(e->core.sample_id, x, y)
324305
325
-static int
326
-perf_evsel__write_stat_event(struct perf_evsel *counter, u32 cpu, u32 thread,
327
- struct perf_counts_values *count)
306
+static int evsel__write_stat_event(struct evsel *counter, u32 cpu, u32 thread,
307
+ struct perf_counts_values *count)
328308 {
329309 struct perf_sample_id *sid = SID(counter, cpu, thread);
330310
....@@ -332,89 +312,169 @@
332312 process_synthesized_event, NULL);
333313 }
334314
315
+static int read_single_counter(struct evsel *counter, int cpu,
316
+ int thread, struct timespec *rs)
317
+{
318
+ if (counter->tool_event == PERF_TOOL_DURATION_TIME) {
319
+ u64 val = rs->tv_nsec + rs->tv_sec*1000000000ULL;
320
+ struct perf_counts_values *count =
321
+ perf_counts(counter->counts, cpu, thread);
322
+ count->ena = count->run = val;
323
+ count->val = val;
324
+ return 0;
325
+ }
326
+ return evsel__read_counter(counter, cpu, thread);
327
+}
328
+
335329 /*
336330 * Read out the results of a single counter:
337331 * do not aggregate counts across CPUs in system-wide mode
338332 */
339
-static int read_counter(struct perf_evsel *counter)
333
+static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu)
340334 {
341
- int nthreads = thread_map__nr(evsel_list->threads);
342
- int ncpus, cpu, thread;
343
-
344
- if (target__has_cpu(&target) && !target__has_per_thread(&target))
345
- ncpus = perf_evsel__nr_cpus(counter);
346
- else
347
- ncpus = 1;
335
+ int nthreads = perf_thread_map__nr(evsel_list->core.threads);
336
+ int thread;
348337
349338 if (!counter->supported)
350339 return -ENOENT;
351340
352
- if (counter->system_wide)
341
+ if (counter->core.system_wide)
353342 nthreads = 1;
354343
355344 for (thread = 0; thread < nthreads; thread++) {
356
- for (cpu = 0; cpu < ncpus; cpu++) {
357
- struct perf_counts_values *count;
345
+ struct perf_counts_values *count;
358346
359
- count = perf_counts(counter->counts, cpu, thread);
347
+ count = perf_counts(counter->counts, cpu, thread);
360348
361
- /*
362
- * The leader's group read loads data into its group members
363
- * (via perf_evsel__read_counter) and sets threir count->loaded.
364
- */
365
- if (!count->loaded &&
366
- perf_evsel__read_counter(counter, cpu, thread)) {
367
- counter->counts->scaled = -1;
368
- perf_counts(counter->counts, cpu, thread)->ena = 0;
369
- perf_counts(counter->counts, cpu, thread)->run = 0;
349
+ /*
350
+ * The leader's group read loads data into its group members
351
+ * (via evsel__read_counter()) and sets their count->loaded.
352
+ */
353
+ if (!perf_counts__is_loaded(counter->counts, cpu, thread) &&
354
+ read_single_counter(counter, cpu, thread, rs)) {
355
+ counter->counts->scaled = -1;
356
+ perf_counts(counter->counts, cpu, thread)->ena = 0;
357
+ perf_counts(counter->counts, cpu, thread)->run = 0;
358
+ return -1;
359
+ }
360
+
361
+ perf_counts__set_loaded(counter->counts, cpu, thread, false);
362
+
363
+ if (STAT_RECORD) {
364
+ if (evsel__write_stat_event(counter, cpu, thread, count)) {
365
+ pr_err("failed to write stat event\n");
370366 return -1;
371367 }
368
+ }
372369
373
- count->loaded = false;
374
-
375
- if (STAT_RECORD) {
376
- if (perf_evsel__write_stat_event(counter, cpu, thread, count)) {
377
- pr_err("failed to write stat event\n");
378
- return -1;
379
- }
380
- }
381
-
382
- if (verbose > 1) {
383
- fprintf(stat_config.output,
384
- "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
385
- perf_evsel__name(counter),
386
- cpu,
387
- count->val, count->ena, count->run);
388
- }
370
+ if (verbose > 1) {
371
+ fprintf(stat_config.output,
372
+ "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
373
+ evsel__name(counter),
374
+ cpu,
375
+ count->val, count->ena, count->run);
389376 }
390377 }
391378
392379 return 0;
393380 }
394381
395
-static void read_counters(void)
382
+static int read_affinity_counters(struct timespec *rs)
396383 {
397
- struct perf_evsel *counter;
398
- int ret;
384
+ struct evsel *counter;
385
+ struct affinity affinity;
386
+ int i, ncpus, cpu;
387
+
388
+ if (affinity__setup(&affinity) < 0)
389
+ return -1;
390
+
391
+ ncpus = perf_cpu_map__nr(evsel_list->core.all_cpus);
392
+ if (!target__has_cpu(&target) || target__has_per_thread(&target))
393
+ ncpus = 1;
394
+ evlist__for_each_cpu(evsel_list, i, cpu) {
395
+ if (i >= ncpus)
396
+ break;
397
+ affinity__set(&affinity, cpu);
398
+
399
+ evlist__for_each_entry(evsel_list, counter) {
400
+ if (evsel__cpu_iter_skip(counter, cpu))
401
+ continue;
402
+ if (!counter->err) {
403
+ counter->err = read_counter_cpu(counter, rs,
404
+ counter->cpu_iter - 1);
405
+ }
406
+ }
407
+ }
408
+ affinity__cleanup(&affinity);
409
+ return 0;
410
+}
411
+
412
+static void read_counters(struct timespec *rs)
413
+{
414
+ struct evsel *counter;
415
+
416
+ if (!stat_config.stop_read_counter && (read_affinity_counters(rs) < 0))
417
+ return;
399418
400419 evlist__for_each_entry(evsel_list, counter) {
401
- ret = read_counter(counter);
402
- if (ret)
420
+ if (counter->err)
403421 pr_debug("failed to read counter %s\n", counter->name);
404
-
405
- if (ret == 0 && perf_stat_process_counter(&stat_config, counter))
422
+ if (counter->err == 0 && perf_stat_process_counter(&stat_config, counter))
406423 pr_warning("failed to process counter %s\n", counter->name);
424
+ counter->err = 0;
407425 }
426
+}
427
+
428
+static int runtime_stat_new(struct perf_stat_config *config, int nthreads)
429
+{
430
+ int i;
431
+
432
+ config->stats = calloc(nthreads, sizeof(struct runtime_stat));
433
+ if (!config->stats)
434
+ return -1;
435
+
436
+ config->stats_num = nthreads;
437
+
438
+ for (i = 0; i < nthreads; i++)
439
+ runtime_stat__init(&config->stats[i]);
440
+
441
+ return 0;
442
+}
443
+
444
+static void runtime_stat_delete(struct perf_stat_config *config)
445
+{
446
+ int i;
447
+
448
+ if (!config->stats)
449
+ return;
450
+
451
+ for (i = 0; i < config->stats_num; i++)
452
+ runtime_stat__exit(&config->stats[i]);
453
+
454
+ zfree(&config->stats);
455
+}
456
+
457
+static void runtime_stat_reset(struct perf_stat_config *config)
458
+{
459
+ int i;
460
+
461
+ if (!config->stats)
462
+ return;
463
+
464
+ for (i = 0; i < config->stats_num; i++)
465
+ perf_stat__reset_shadow_per_stat(&config->stats[i]);
408466 }
409467
410468 static void process_interval(void)
411469 {
412470 struct timespec ts, rs;
413471
414
- read_counters();
415
-
416472 clock_gettime(CLOCK_MONOTONIC, &ts);
417473 diff_timespec(&rs, &ts, &ref_time);
474
+
475
+ perf_stat__reset_shadow_per_stat(&rt_stat);
476
+ runtime_stat_reset(&stat_config);
477
+ read_counters(&rs);
418478
419479 if (STAT_RECORD) {
420480 if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSEC_PER_SEC + rs.tv_nsec, INTERVAL))
....@@ -426,18 +486,38 @@
426486 print_counters(&rs, 0, NULL);
427487 }
428488
489
+static bool handle_interval(unsigned int interval, int *times)
490
+{
491
+ if (interval) {
492
+ process_interval();
493
+ if (interval_count && !(--(*times)))
494
+ return true;
495
+ }
496
+ return false;
497
+}
498
+
429499 static void enable_counters(void)
430500 {
431
- if (initial_delay)
432
- usleep(initial_delay * USEC_PER_MSEC);
501
+ if (stat_config.initial_delay < 0) {
502
+ pr_info(EVLIST_DISABLED_MSG);
503
+ return;
504
+ }
505
+
506
+ if (stat_config.initial_delay > 0) {
507
+ pr_info(EVLIST_DISABLED_MSG);
508
+ usleep(stat_config.initial_delay * USEC_PER_MSEC);
509
+ }
433510
434511 /*
435512 * We need to enable counters only if:
436513 * - we don't have tracee (attaching to task or cpu)
437514 * - we have initial delay configured
438515 */
439
- if (!target__none(&target) || initial_delay)
440
- perf_evlist__enable(evsel_list);
516
+ if (!target__none(&target) || stat_config.initial_delay) {
517
+ evlist__enable(evsel_list);
518
+ if (stat_config.initial_delay > 0)
519
+ pr_info(EVLIST_ENABLED_MSG);
520
+ }
441521 }
442522
443523 static void disable_counters(void)
....@@ -448,7 +528,7 @@
448528 * from counting before reading their constituent counters.
449529 */
450530 if (!target__none(&target))
451
- perf_evlist__disable(evsel_list);
531
+ evlist__disable(evsel_list);
452532 }
453533
454534 static volatile int workload_exec_errno;
....@@ -464,109 +544,167 @@
464544 workload_exec_errno = info->si_value.sival_int;
465545 }
466546
467
-static int perf_stat_synthesize_config(bool is_pipe)
547
+static bool evsel__should_store_id(struct evsel *counter)
468548 {
469
- int err;
549
+ return STAT_RECORD || counter->core.attr.read_format & PERF_FORMAT_ID;
550
+}
470551
471
- if (is_pipe) {
472
- err = perf_event__synthesize_attrs(NULL, perf_stat.session,
473
- process_synthesized_event);
474
- if (err < 0) {
475
- pr_err("Couldn't synthesize attrs.\n");
476
- return err;
552
+static bool is_target_alive(struct target *_target,
553
+ struct perf_thread_map *threads)
554
+{
555
+ struct stat st;
556
+ int i;
557
+
558
+ if (!target__has_task(_target))
559
+ return true;
560
+
561
+ for (i = 0; i < threads->nr; i++) {
562
+ char path[PATH_MAX];
563
+
564
+ scnprintf(path, PATH_MAX, "%s/%d", procfs__mountpoint(),
565
+ threads->map[i].pid);
566
+
567
+ if (!stat(path, &st))
568
+ return true;
569
+ }
570
+
571
+ return false;
572
+}
573
+
574
+static void process_evlist(struct evlist *evlist, unsigned int interval)
575
+{
576
+ enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED;
577
+
578
+ if (evlist__ctlfd_process(evlist, &cmd) > 0) {
579
+ switch (cmd) {
580
+ case EVLIST_CTL_CMD_ENABLE:
581
+ pr_info(EVLIST_ENABLED_MSG);
582
+ if (interval)
583
+ process_interval();
584
+ break;
585
+ case EVLIST_CTL_CMD_DISABLE:
586
+ if (interval)
587
+ process_interval();
588
+ pr_info(EVLIST_DISABLED_MSG);
589
+ break;
590
+ case EVLIST_CTL_CMD_SNAPSHOT:
591
+ case EVLIST_CTL_CMD_ACK:
592
+ case EVLIST_CTL_CMD_UNSUPPORTED:
593
+ default:
594
+ break;
595
+ }
596
+ }
597
+}
598
+
599
+static void compute_tts(struct timespec *time_start, struct timespec *time_stop,
600
+ int *time_to_sleep)
601
+{
602
+ int tts = *time_to_sleep;
603
+ struct timespec time_diff;
604
+
605
+ diff_timespec(&time_diff, time_stop, time_start);
606
+
607
+ tts -= time_diff.tv_sec * MSEC_PER_SEC +
608
+ time_diff.tv_nsec / NSEC_PER_MSEC;
609
+
610
+ if (tts < 0)
611
+ tts = 0;
612
+
613
+ *time_to_sleep = tts;
614
+}
615
+
616
+static int dispatch_events(bool forks, int timeout, int interval, int *times)
617
+{
618
+ int child_exited = 0, status = 0;
619
+ int time_to_sleep, sleep_time;
620
+ struct timespec time_start, time_stop;
621
+
622
+ if (interval)
623
+ sleep_time = interval;
624
+ else if (timeout)
625
+ sleep_time = timeout;
626
+ else
627
+ sleep_time = 1000;
628
+
629
+ time_to_sleep = sleep_time;
630
+
631
+ while (!done) {
632
+ if (forks)
633
+ child_exited = waitpid(child_pid, &status, WNOHANG);
634
+ else
635
+ child_exited = !is_target_alive(&target, evsel_list->core.threads) ? 1 : 0;
636
+
637
+ if (child_exited)
638
+ break;
639
+
640
+ clock_gettime(CLOCK_MONOTONIC, &time_start);
641
+ if (!(evlist__poll(evsel_list, time_to_sleep) > 0)) { /* poll timeout or EINTR */
642
+ if (timeout || handle_interval(interval, times))
643
+ break;
644
+ time_to_sleep = sleep_time;
645
+ } else { /* fd revent */
646
+ process_evlist(evsel_list, interval);
647
+ clock_gettime(CLOCK_MONOTONIC, &time_stop);
648
+ compute_tts(&time_start, &time_stop, &time_to_sleep);
477649 }
478650 }
479651
480
- err = perf_event__synthesize_extra_attr(NULL,
481
- evsel_list,
482
- process_synthesized_event,
483
- is_pipe);
484
-
485
- err = perf_event__synthesize_thread_map2(NULL, evsel_list->threads,
486
- process_synthesized_event,
487
- NULL);
488
- if (err < 0) {
489
- pr_err("Couldn't synthesize thread map.\n");
490
- return err;
491
- }
492
-
493
- err = perf_event__synthesize_cpu_map(NULL, evsel_list->cpus,
494
- process_synthesized_event, NULL);
495
- if (err < 0) {
496
- pr_err("Couldn't synthesize thread map.\n");
497
- return err;
498
- }
499
-
500
- err = perf_event__synthesize_stat_config(NULL, &stat_config,
501
- process_synthesized_event, NULL);
502
- if (err < 0) {
503
- pr_err("Couldn't synthesize config.\n");
504
- return err;
505
- }
506
-
507
- return 0;
652
+ return status;
508653 }
509654
510
-#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
655
+enum counter_recovery {
656
+ COUNTER_SKIP,
657
+ COUNTER_RETRY,
658
+ COUNTER_FATAL,
659
+};
511660
512
-static int __store_counter_ids(struct perf_evsel *counter)
661
+static enum counter_recovery stat_handle_error(struct evsel *counter)
513662 {
514
- int cpu, thread;
515
-
516
- for (cpu = 0; cpu < xyarray__max_x(counter->fd); cpu++) {
517
- for (thread = 0; thread < xyarray__max_y(counter->fd);
518
- thread++) {
519
- int fd = FD(counter, cpu, thread);
520
-
521
- if (perf_evlist__id_add_fd(evsel_list, counter,
522
- cpu, thread, fd) < 0)
523
- return -1;
524
- }
525
- }
526
-
527
- return 0;
528
-}
529
-
530
-static int store_counter_ids(struct perf_evsel *counter)
531
-{
532
- struct cpu_map *cpus = counter->cpus;
533
- struct thread_map *threads = counter->threads;
534
-
535
- if (perf_evsel__alloc_id(counter, cpus->nr, threads->nr))
536
- return -ENOMEM;
537
-
538
- return __store_counter_ids(counter);
539
-}
540
-
541
-static bool perf_evsel__should_store_id(struct perf_evsel *counter)
542
-{
543
- return STAT_RECORD || counter->attr.read_format & PERF_FORMAT_ID;
544
-}
545
-
546
-static struct perf_evsel *perf_evsel__reset_weak_group(struct perf_evsel *evsel)
547
-{
548
- struct perf_evsel *c2, *leader;
549
- bool is_open = true;
550
-
551
- leader = evsel->leader;
552
- pr_debug("Weak group for %s/%d failed\n",
553
- leader->name, leader->nr_members);
554
-
663
+ char msg[BUFSIZ];
555664 /*
556
- * for_each_group_member doesn't work here because it doesn't
557
- * include the first entry.
665
+ * PPC returns ENXIO for HW counters until 2.6.37
666
+ * (behavior changed with commit b0a873e).
558667 */
559
- evlist__for_each_entry(evsel_list, c2) {
560
- if (c2 == evsel)
561
- is_open = false;
562
- if (c2->leader == leader) {
563
- if (is_open)
564
- perf_evsel__close(c2);
565
- c2->leader = c2;
566
- c2->nr_members = 0;
668
+ if (errno == EINVAL || errno == ENOSYS ||
669
+ errno == ENOENT || errno == EOPNOTSUPP ||
670
+ errno == ENXIO) {
671
+ if (verbose > 0)
672
+ ui__warning("%s event is not supported by the kernel.\n",
673
+ evsel__name(counter));
674
+ counter->supported = false;
675
+ /*
676
+ * errored is a sticky flag that means one of the counter's
677
+ * cpu event had a problem and needs to be reexamined.
678
+ */
679
+ counter->errored = true;
680
+
681
+ if ((counter->leader != counter) ||
682
+ !(counter->leader->core.nr_members > 1))
683
+ return COUNTER_SKIP;
684
+ } else if (evsel__fallback(counter, errno, msg, sizeof(msg))) {
685
+ if (verbose > 0)
686
+ ui__warning("%s\n", msg);
687
+ return COUNTER_RETRY;
688
+ } else if (target__has_per_thread(&target) &&
689
+ evsel_list->core.threads &&
690
+ evsel_list->core.threads->err_thread != -1) {
691
+ /*
692
+ * For global --per-thread case, skip current
693
+ * error thread.
694
+ */
695
+ if (!thread_map__remove(evsel_list->core.threads,
696
+ evsel_list->core.threads->err_thread)) {
697
+ evsel_list->core.threads->err_thread = -1;
698
+ return COUNTER_RETRY;
567699 }
568700 }
569
- return leader;
701
+
702
+ evsel__open_strerror(counter, &target, errno, msg, sizeof(msg));
703
+ ui__error("%s\n", msg);
704
+
705
+ if (child_pid != -1)
706
+ kill(child_pid, SIGTERM);
707
+ return COUNTER_FATAL;
570708 }
571709
572710 static int __run_perf_stat(int argc, const char **argv, int run_idx)
....@@ -576,24 +714,14 @@
576714 int timeout = stat_config.timeout;
577715 char msg[BUFSIZ];
578716 unsigned long long t0, t1;
579
- struct perf_evsel *counter;
580
- struct timespec ts;
717
+ struct evsel *counter;
581718 size_t l;
582719 int status = 0;
583720 const bool forks = (argc > 0);
584721 bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false;
585
- struct perf_evsel_config_term *err_term;
586
-
587
- if (interval) {
588
- ts.tv_sec = interval / USEC_PER_MSEC;
589
- ts.tv_nsec = (interval % USEC_PER_MSEC) * NSEC_PER_MSEC;
590
- } else if (timeout) {
591
- ts.tv_sec = timeout / USEC_PER_MSEC;
592
- ts.tv_nsec = (timeout % USEC_PER_MSEC) * NSEC_PER_MSEC;
593
- } else {
594
- ts.tv_sec = 1;
595
- ts.tv_nsec = 0;
596
- }
722
+ struct affinity affinity;
723
+ int i, cpu;
724
+ bool second_pass = false;
597725
598726 if (forks) {
599727 if (perf_evlist__prepare_workload(evsel_list, &target, argv, is_pipe,
....@@ -607,82 +735,118 @@
607735 if (group)
608736 perf_evlist__set_leader(evsel_list);
609737
610
- evlist__for_each_entry(evsel_list, counter) {
738
+ if (affinity__setup(&affinity) < 0)
739
+ return -1;
740
+
741
+ evlist__for_each_cpu (evsel_list, i, cpu) {
742
+ affinity__set(&affinity, cpu);
743
+
744
+ evlist__for_each_entry(evsel_list, counter) {
745
+ if (evsel__cpu_iter_skip(counter, cpu))
746
+ continue;
747
+ if (counter->reset_group || counter->errored)
748
+ continue;
611749 try_again:
612
- if (create_perf_stat_counter(counter) < 0) {
750
+ if (create_perf_stat_counter(counter, &stat_config, &target,
751
+ counter->cpu_iter - 1) < 0) {
613752
614
- /* Weak group failed. Reset the group. */
615
- if ((errno == EINVAL || errno == EBADF) &&
616
- counter->leader != counter &&
617
- counter->weak_group) {
618
- counter = perf_evsel__reset_weak_group(counter);
619
- goto try_again;
620
- }
621
-
622
- /*
623
- * PPC returns ENXIO for HW counters until 2.6.37
624
- * (behavior changed with commit b0a873e).
625
- */
626
- if (errno == EINVAL || errno == ENOSYS ||
627
- errno == ENOENT || errno == EOPNOTSUPP ||
628
- errno == ENXIO) {
629
- if (verbose > 0)
630
- ui__warning("%s event is not supported by the kernel.\n",
631
- perf_evsel__name(counter));
632
- counter->supported = false;
633
-
634
- if ((counter->leader != counter) ||
635
- !(counter->leader->nr_members > 1))
636
- continue;
637
- } else if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) {
638
- if (verbose > 0)
639
- ui__warning("%s\n", msg);
640
- goto try_again;
641
- } else if (target__has_per_thread(&target) &&
642
- evsel_list->threads &&
643
- evsel_list->threads->err_thread != -1) {
644753 /*
645
- * For global --per-thread case, skip current
646
- * error thread.
754
+ * Weak group failed. We cannot just undo this here
755
+ * because earlier CPUs might be in group mode, and the kernel
756
+ * doesn't support mixing group and non group reads. Defer
757
+ * it to later.
758
+ * Don't close here because we're in the wrong affinity.
647759 */
648
- if (!thread_map__remove(evsel_list->threads,
649
- evsel_list->threads->err_thread)) {
650
- evsel_list->threads->err_thread = -1;
651
- goto try_again;
760
+ if ((errno == EINVAL || errno == EBADF) &&
761
+ counter->leader != counter &&
762
+ counter->weak_group) {
763
+ perf_evlist__reset_weak_group(evsel_list, counter, false);
764
+ assert(counter->reset_group);
765
+ second_pass = true;
766
+ continue;
652767 }
768
+
769
+ switch (stat_handle_error(counter)) {
770
+ case COUNTER_FATAL:
771
+ return -1;
772
+ case COUNTER_RETRY:
773
+ goto try_again;
774
+ case COUNTER_SKIP:
775
+ continue;
776
+ default:
777
+ break;
778
+ }
779
+
653780 }
654
-
655
- perf_evsel__open_strerror(counter, &target,
656
- errno, msg, sizeof(msg));
657
- ui__error("%s\n", msg);
658
-
659
- if (child_pid != -1)
660
- kill(child_pid, SIGTERM);
661
-
662
- return -1;
781
+ counter->supported = true;
663782 }
664
- counter->supported = true;
783
+ }
784
+
785
+ if (second_pass) {
786
+ /*
787
+ * Now redo all the weak group after closing them,
788
+ * and also close errored counters.
789
+ */
790
+
791
+ evlist__for_each_cpu(evsel_list, i, cpu) {
792
+ affinity__set(&affinity, cpu);
793
+ /* First close errored or weak retry */
794
+ evlist__for_each_entry(evsel_list, counter) {
795
+ if (!counter->reset_group && !counter->errored)
796
+ continue;
797
+ if (evsel__cpu_iter_skip_no_inc(counter, cpu))
798
+ continue;
799
+ perf_evsel__close_cpu(&counter->core, counter->cpu_iter);
800
+ }
801
+ /* Now reopen weak */
802
+ evlist__for_each_entry(evsel_list, counter) {
803
+ if (!counter->reset_group && !counter->errored)
804
+ continue;
805
+ if (evsel__cpu_iter_skip(counter, cpu))
806
+ continue;
807
+ if (!counter->reset_group)
808
+ continue;
809
+try_again_reset:
810
+ pr_debug2("reopening weak %s\n", evsel__name(counter));
811
+ if (create_perf_stat_counter(counter, &stat_config, &target,
812
+ counter->cpu_iter - 1) < 0) {
813
+
814
+ switch (stat_handle_error(counter)) {
815
+ case COUNTER_FATAL:
816
+ return -1;
817
+ case COUNTER_RETRY:
818
+ goto try_again_reset;
819
+ case COUNTER_SKIP:
820
+ continue;
821
+ default:
822
+ break;
823
+ }
824
+ }
825
+ counter->supported = true;
826
+ }
827
+ }
828
+ }
829
+ affinity__cleanup(&affinity);
830
+
831
+ evlist__for_each_entry(evsel_list, counter) {
832
+ if (!counter->supported) {
833
+ perf_evsel__free_fd(&counter->core);
834
+ continue;
835
+ }
665836
666837 l = strlen(counter->unit);
667
- if (l > unit_width)
668
- unit_width = l;
838
+ if (l > stat_config.unit_width)
839
+ stat_config.unit_width = l;
669840
670
- if (perf_evsel__should_store_id(counter) &&
671
- store_counter_ids(counter))
841
+ if (evsel__should_store_id(counter) &&
842
+ evsel__store_ids(counter, evsel_list))
672843 return -1;
673844 }
674845
675846 if (perf_evlist__apply_filters(evsel_list, &counter)) {
676847 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
677
- counter->filter, perf_evsel__name(counter), errno,
848
+ counter->filter, evsel__name(counter), errno,
678849 str_error_r(errno, msg, sizeof(msg)));
679
- return -1;
680
- }
681
-
682
- if (perf_evlist__apply_drv_configs(evsel_list, &counter, &err_term)) {
683
- pr_err("failed to set config \"%s\" on event %s with %d (%s)\n",
684
- err_term->val.drv_cfg, perf_evsel__name(counter), errno,
685
- str_error_r(errno, msg, sizeof(msg)));
686850 return -1;
687851 }
688852
....@@ -699,7 +863,8 @@
699863 if (err < 0)
700864 return err;
701865
702
- err = perf_stat_synthesize_config(is_pipe);
866
+ err = perf_event__synthesize_stat_events(&stat_config, NULL, evsel_list,
867
+ process_synthesized_event, is_pipe);
703868 if (err < 0)
704869 return err;
705870 }
....@@ -714,17 +879,13 @@
714879 perf_evlist__start_workload(evsel_list);
715880 enable_counters();
716881
717
- if (interval || timeout) {
718
- while (!waitpid(child_pid, &status, WNOHANG)) {
719
- nanosleep(&ts, NULL);
720
- if (timeout)
721
- break;
722
- process_interval();
723
- if (interval_count && !(--times))
724
- break;
725
- }
882
+ if (interval || timeout || evlist__ctlfd_initialized(evsel_list))
883
+ status = dispatch_events(forks, timeout, interval, &times);
884
+ if (child_pid != -1) {
885
+ if (timeout)
886
+ kill(child_pid, SIGTERM);
887
+ wait4(child_pid, &status, 0, &stat_config.ru_data);
726888 }
727
- wait4(child_pid, &status, 0, &ru_data);
728889
729890 if (workload_exec_errno) {
730891 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
....@@ -736,26 +897,31 @@
736897 psignal(WTERMSIG(status), argv[0]);
737898 } else {
738899 enable_counters();
739
- while (!done) {
740
- nanosleep(&ts, NULL);
741
- if (timeout)
742
- break;
743
- if (interval) {
744
- process_interval();
745
- if (interval_count && !(--times))
746
- break;
747
- }
748
- }
900
+ status = dispatch_events(forks, timeout, interval, &times);
749901 }
750902
751903 disable_counters();
752904
753905 t1 = rdclock();
754906
755
- if (walltime_run_table)
756
- walltime_run[run_idx] = t1 - t0;
907
+ if (stat_config.walltime_run_table)
908
+ stat_config.walltime_run[run_idx] = t1 - t0;
757909
758
- update_stats(&walltime_nsecs_stats, t1 - t0);
910
+ if (interval && stat_config.summary) {
911
+ stat_config.interval = 0;
912
+ stat_config.stop_read_counter = true;
913
+ init_stats(&walltime_nsecs_stats);
914
+ update_stats(&walltime_nsecs_stats, t1 - t0);
915
+
916
+ if (stat_config.aggr_mode == AGGR_GLOBAL)
917
+ perf_evlist__save_aggr_prev_raw_counts(evsel_list);
918
+
919
+ perf_evlist__copy_prev_raw_counts(evsel_list);
920
+ perf_evlist__reset_prev_raw_counts(evsel_list);
921
+ runtime_stat_reset(&stat_config);
922
+ perf_stat__reset_shadow_per_stat(&rt_stat);
923
+ } else
924
+ update_stats(&walltime_nsecs_stats, t1 - t0);
759925
760926 /*
761927 * Closing a group leader splits the group, and as we only disable
....@@ -763,8 +929,14 @@
763929 * avoid arbitrary skew, we must read all counters before closing any
764930 * group leaders.
765931 */
766
- read_counters();
767
- perf_evlist__close(evsel_list);
932
+ read_counters(&(struct timespec) { .tv_nsec = t1-t0 });
933
+
934
+ /*
935
+ * We need to keep evsel_list alive, because it's processed
936
+ * later the evsel_list will be closed after.
937
+ */
938
+ if (!STAT_RECORD)
939
+ evlist__close(evsel_list);
768940
769941 return WEXITSTATUS(status);
770942 }
....@@ -795,1105 +967,14 @@
795967 return ret;
796968 }
797969
798
-static void print_running(u64 run, u64 ena)
799
-{
800
- if (csv_output) {
801
- fprintf(stat_config.output, "%s%" PRIu64 "%s%.2f",
802
- csv_sep,
803
- run,
804
- csv_sep,
805
- ena ? 100.0 * run / ena : 100.0);
806
- } else if (run != ena) {
807
- fprintf(stat_config.output, " (%.2f%%)", 100.0 * run / ena);
808
- }
809
-}
810
-
811
-static void print_noise_pct(double total, double avg)
812
-{
813
- double pct = rel_stddev_stats(total, avg);
814
-
815
- if (csv_output)
816
- fprintf(stat_config.output, "%s%.2f%%", csv_sep, pct);
817
- else if (pct)
818
- fprintf(stat_config.output, " ( +-%6.2f%% )", pct);
819
-}
820
-
821
-static void print_noise(struct perf_evsel *evsel, double avg)
822
-{
823
- struct perf_stat_evsel *ps;
824
-
825
- if (run_count == 1)
826
- return;
827
-
828
- ps = evsel->stats;
829
- print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
830
-}
831
-
832
-static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
833
-{
834
- switch (stat_config.aggr_mode) {
835
- case AGGR_CORE:
836
- fprintf(stat_config.output, "S%d-C%*d%s%*d%s",
837
- cpu_map__id_to_socket(id),
838
- csv_output ? 0 : -8,
839
- cpu_map__id_to_cpu(id),
840
- csv_sep,
841
- csv_output ? 0 : 4,
842
- nr,
843
- csv_sep);
844
- break;
845
- case AGGR_SOCKET:
846
- fprintf(stat_config.output, "S%*d%s%*d%s",
847
- csv_output ? 0 : -5,
848
- id,
849
- csv_sep,
850
- csv_output ? 0 : 4,
851
- nr,
852
- csv_sep);
853
- break;
854
- case AGGR_NONE:
855
- fprintf(stat_config.output, "CPU%*d%s",
856
- csv_output ? 0 : -4,
857
- perf_evsel__cpus(evsel)->map[id], csv_sep);
858
- break;
859
- case AGGR_THREAD:
860
- fprintf(stat_config.output, "%*s-%*d%s",
861
- csv_output ? 0 : 16,
862
- thread_map__comm(evsel->threads, id),
863
- csv_output ? 0 : -8,
864
- thread_map__pid(evsel->threads, id),
865
- csv_sep);
866
- break;
867
- case AGGR_GLOBAL:
868
- case AGGR_UNSET:
869
- default:
870
- break;
871
- }
872
-}
873
-
874
-struct outstate {
875
- FILE *fh;
876
- bool newline;
877
- const char *prefix;
878
- int nfields;
879
- int id, nr;
880
- struct perf_evsel *evsel;
881
-};
882
-
883
-#define METRIC_LEN 35
884
-
885
-static void new_line_std(void *ctx)
886
-{
887
- struct outstate *os = ctx;
888
-
889
- os->newline = true;
890
-}
891
-
892
-static void do_new_line_std(struct outstate *os)
893
-{
894
- fputc('\n', os->fh);
895
- fputs(os->prefix, os->fh);
896
- aggr_printout(os->evsel, os->id, os->nr);
897
- if (stat_config.aggr_mode == AGGR_NONE)
898
- fprintf(os->fh, " ");
899
- fprintf(os->fh, " ");
900
-}
901
-
902
-static void print_metric_std(void *ctx, const char *color, const char *fmt,
903
- const char *unit, double val)
904
-{
905
- struct outstate *os = ctx;
906
- FILE *out = os->fh;
907
- int n;
908
- bool newline = os->newline;
909
-
910
- os->newline = false;
911
-
912
- if (unit == NULL || fmt == NULL) {
913
- fprintf(out, "%-*s", METRIC_LEN, "");
914
- return;
915
- }
916
-
917
- if (newline)
918
- do_new_line_std(os);
919
-
920
- n = fprintf(out, " # ");
921
- if (color)
922
- n += color_fprintf(out, color, fmt, val);
923
- else
924
- n += fprintf(out, fmt, val);
925
- fprintf(out, " %-*s", METRIC_LEN - n - 1, unit);
926
-}
927
-
928
-static void new_line_csv(void *ctx)
929
-{
930
- struct outstate *os = ctx;
931
- int i;
932
-
933
- fputc('\n', os->fh);
934
- if (os->prefix)
935
- fprintf(os->fh, "%s%s", os->prefix, csv_sep);
936
- aggr_printout(os->evsel, os->id, os->nr);
937
- for (i = 0; i < os->nfields; i++)
938
- fputs(csv_sep, os->fh);
939
-}
940
-
941
-static void print_metric_csv(void *ctx,
942
- const char *color __maybe_unused,
943
- const char *fmt, const char *unit, double val)
944
-{
945
- struct outstate *os = ctx;
946
- FILE *out = os->fh;
947
- char buf[64], *vals, *ends;
948
-
949
- if (unit == NULL || fmt == NULL) {
950
- fprintf(out, "%s%s", csv_sep, csv_sep);
951
- return;
952
- }
953
- snprintf(buf, sizeof(buf), fmt, val);
954
- ends = vals = ltrim(buf);
955
- while (isdigit(*ends) || *ends == '.')
956
- ends++;
957
- *ends = 0;
958
- while (isspace(*unit))
959
- unit++;
960
- fprintf(out, "%s%s%s%s", csv_sep, vals, csv_sep, unit);
961
-}
962
-
963
-/* Filter out some columns that don't work well in metrics only mode */
964
-
965
-static bool valid_only_metric(const char *unit)
966
-{
967
- if (!unit)
968
- return false;
969
- if (strstr(unit, "/sec") ||
970
- strstr(unit, "hz") ||
971
- strstr(unit, "Hz") ||
972
- strstr(unit, "CPUs utilized"))
973
- return false;
974
- return true;
975
-}
976
-
977
-static const char *fixunit(char *buf, struct perf_evsel *evsel,
978
- const char *unit)
979
-{
980
- if (!strncmp(unit, "of all", 6)) {
981
- snprintf(buf, 1024, "%s %s", perf_evsel__name(evsel),
982
- unit);
983
- return buf;
984
- }
985
- return unit;
986
-}
987
-
988
-static void print_metric_only(void *ctx, const char *color, const char *fmt,
989
- const char *unit, double val)
990
-{
991
- struct outstate *os = ctx;
992
- FILE *out = os->fh;
993
- char buf[1024], str[1024];
994
- unsigned mlen = metric_only_len;
995
-
996
- if (!valid_only_metric(unit))
997
- return;
998
- unit = fixunit(buf, os->evsel, unit);
999
- if (mlen < strlen(unit))
1000
- mlen = strlen(unit) + 1;
1001
-
1002
- if (color)
1003
- mlen += strlen(color) + sizeof(PERF_COLOR_RESET) - 1;
1004
-
1005
- color_snprintf(str, sizeof(str), color ?: "", fmt, val);
1006
- fprintf(out, "%*s ", mlen, str);
1007
-}
1008
-
1009
-static void print_metric_only_csv(void *ctx, const char *color __maybe_unused,
1010
- const char *fmt,
1011
- const char *unit, double val)
1012
-{
1013
- struct outstate *os = ctx;
1014
- FILE *out = os->fh;
1015
- char buf[64], *vals, *ends;
1016
- char tbuf[1024];
1017
-
1018
- if (!valid_only_metric(unit))
1019
- return;
1020
- unit = fixunit(tbuf, os->evsel, unit);
1021
- snprintf(buf, sizeof buf, fmt, val);
1022
- ends = vals = ltrim(buf);
1023
- while (isdigit(*ends) || *ends == '.')
1024
- ends++;
1025
- *ends = 0;
1026
- fprintf(out, "%s%s", vals, csv_sep);
1027
-}
1028
-
1029
-static void new_line_metric(void *ctx __maybe_unused)
1030
-{
1031
-}
1032
-
1033
-static void print_metric_header(void *ctx, const char *color __maybe_unused,
1034
- const char *fmt __maybe_unused,
1035
- const char *unit, double val __maybe_unused)
1036
-{
1037
- struct outstate *os = ctx;
1038
- char tbuf[1024];
1039
-
1040
- if (!valid_only_metric(unit))
1041
- return;
1042
- unit = fixunit(tbuf, os->evsel, unit);
1043
- if (csv_output)
1044
- fprintf(os->fh, "%s%s", unit, csv_sep);
1045
- else
1046
- fprintf(os->fh, "%*s ", metric_only_len, unit);
1047
-}
1048
-
1049
-static int first_shadow_cpu(struct perf_evsel *evsel, int id)
1050
-{
1051
- int i;
1052
-
1053
- if (!aggr_get_id)
1054
- return 0;
1055
-
1056
- if (stat_config.aggr_mode == AGGR_NONE)
1057
- return id;
1058
-
1059
- if (stat_config.aggr_mode == AGGR_GLOBAL)
1060
- return 0;
1061
-
1062
- for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) {
1063
- int cpu2 = perf_evsel__cpus(evsel)->map[i];
1064
-
1065
- if (aggr_get_id(evsel_list->cpus, cpu2) == id)
1066
- return cpu2;
1067
- }
1068
- return 0;
1069
-}
1070
-
1071
-static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
1072
-{
1073
- FILE *output = stat_config.output;
1074
- double sc = evsel->scale;
1075
- const char *fmt;
1076
-
1077
- if (csv_output) {
1078
- fmt = floor(sc) != sc ? "%.2f%s" : "%.0f%s";
1079
- } else {
1080
- if (big_num)
1081
- fmt = floor(sc) != sc ? "%'18.2f%s" : "%'18.0f%s";
1082
- else
1083
- fmt = floor(sc) != sc ? "%18.2f%s" : "%18.0f%s";
1084
- }
1085
-
1086
- aggr_printout(evsel, id, nr);
1087
-
1088
- fprintf(output, fmt, avg, csv_sep);
1089
-
1090
- if (evsel->unit)
1091
- fprintf(output, "%-*s%s",
1092
- csv_output ? 0 : unit_width,
1093
- evsel->unit, csv_sep);
1094
-
1095
- fprintf(output, "%-*s", csv_output ? 0 : 25, perf_evsel__name(evsel));
1096
-
1097
- if (evsel->cgrp)
1098
- fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
1099
-}
1100
-
1101
-static bool is_mixed_hw_group(struct perf_evsel *counter)
1102
-{
1103
- struct perf_evlist *evlist = counter->evlist;
1104
- u32 pmu_type = counter->attr.type;
1105
- struct perf_evsel *pos;
1106
-
1107
- if (counter->nr_members < 2)
1108
- return false;
1109
-
1110
- evlist__for_each_entry(evlist, pos) {
1111
- /* software events can be part of any hardware group */
1112
- if (pos->attr.type == PERF_TYPE_SOFTWARE)
1113
- continue;
1114
- if (pmu_type == PERF_TYPE_SOFTWARE) {
1115
- pmu_type = pos->attr.type;
1116
- continue;
1117
- }
1118
- if (pmu_type != pos->attr.type)
1119
- return true;
1120
- }
1121
-
1122
- return false;
1123
-}
1124
-
1125
-static void printout(int id, int nr, struct perf_evsel *counter, double uval,
1126
- char *prefix, u64 run, u64 ena, double noise,
1127
- struct runtime_stat *st)
1128
-{
1129
- struct perf_stat_output_ctx out;
1130
- struct outstate os = {
1131
- .fh = stat_config.output,
1132
- .prefix = prefix ? prefix : "",
1133
- .id = id,
1134
- .nr = nr,
1135
- .evsel = counter,
1136
- };
1137
- print_metric_t pm = print_metric_std;
1138
- void (*nl)(void *);
1139
-
1140
- if (metric_only) {
1141
- nl = new_line_metric;
1142
- if (csv_output)
1143
- pm = print_metric_only_csv;
1144
- else
1145
- pm = print_metric_only;
1146
- } else
1147
- nl = new_line_std;
1148
-
1149
- if (csv_output && !metric_only) {
1150
- static int aggr_fields[] = {
1151
- [AGGR_GLOBAL] = 0,
1152
- [AGGR_THREAD] = 1,
1153
- [AGGR_NONE] = 1,
1154
- [AGGR_SOCKET] = 2,
1155
- [AGGR_CORE] = 2,
1156
- };
1157
-
1158
- pm = print_metric_csv;
1159
- nl = new_line_csv;
1160
- os.nfields = 3;
1161
- os.nfields += aggr_fields[stat_config.aggr_mode];
1162
- if (counter->cgrp)
1163
- os.nfields++;
1164
- }
1165
- if (run == 0 || ena == 0 || counter->counts->scaled == -1) {
1166
- if (metric_only) {
1167
- pm(&os, NULL, "", "", 0);
1168
- return;
1169
- }
1170
- aggr_printout(counter, id, nr);
1171
-
1172
- fprintf(stat_config.output, "%*s%s",
1173
- csv_output ? 0 : 18,
1174
- counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
1175
- csv_sep);
1176
-
1177
- if (counter->supported) {
1178
- print_free_counters_hint = 1;
1179
- if (is_mixed_hw_group(counter))
1180
- print_mixed_hw_group_error = 1;
1181
- }
1182
-
1183
- fprintf(stat_config.output, "%-*s%s",
1184
- csv_output ? 0 : unit_width,
1185
- counter->unit, csv_sep);
1186
-
1187
- fprintf(stat_config.output, "%*s",
1188
- csv_output ? 0 : -25,
1189
- perf_evsel__name(counter));
1190
-
1191
- if (counter->cgrp)
1192
- fprintf(stat_config.output, "%s%s",
1193
- csv_sep, counter->cgrp->name);
1194
-
1195
- if (!csv_output)
1196
- pm(&os, NULL, NULL, "", 0);
1197
- print_noise(counter, noise);
1198
- print_running(run, ena);
1199
- if (csv_output)
1200
- pm(&os, NULL, NULL, "", 0);
1201
- return;
1202
- }
1203
-
1204
- if (!metric_only)
1205
- abs_printout(id, nr, counter, uval);
1206
-
1207
- out.print_metric = pm;
1208
- out.new_line = nl;
1209
- out.ctx = &os;
1210
- out.force_header = false;
1211
-
1212
- if (csv_output && !metric_only) {
1213
- print_noise(counter, noise);
1214
- print_running(run, ena);
1215
- }
1216
-
1217
- perf_stat__print_shadow_stats(counter, uval,
1218
- first_shadow_cpu(counter, id),
1219
- &out, &metric_events, st);
1220
- if (!csv_output && !metric_only) {
1221
- print_noise(counter, noise);
1222
- print_running(run, ena);
1223
- }
1224
-}
1225
-
1226
-static void aggr_update_shadow(void)
1227
-{
1228
- int cpu, s2, id, s;
1229
- u64 val;
1230
- struct perf_evsel *counter;
1231
-
1232
- for (s = 0; s < aggr_map->nr; s++) {
1233
- id = aggr_map->map[s];
1234
- evlist__for_each_entry(evsel_list, counter) {
1235
- val = 0;
1236
- for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
1237
- s2 = aggr_get_id(evsel_list->cpus, cpu);
1238
- if (s2 != id)
1239
- continue;
1240
- val += perf_counts(counter->counts, cpu, 0)->val;
1241
- }
1242
- perf_stat__update_shadow_stats(counter, val,
1243
- first_shadow_cpu(counter, id),
1244
- &rt_stat);
1245
- }
1246
- }
1247
-}
1248
-
1249
-static void uniquify_event_name(struct perf_evsel *counter)
1250
-{
1251
- char *new_name;
1252
- char *config;
1253
-
1254
- if (counter->uniquified_name ||
1255
- !counter->pmu_name || !strncmp(counter->name, counter->pmu_name,
1256
- strlen(counter->pmu_name)))
1257
- return;
1258
-
1259
- config = strchr(counter->name, '/');
1260
- if (config) {
1261
- if (asprintf(&new_name,
1262
- "%s%s", counter->pmu_name, config) > 0) {
1263
- free(counter->name);
1264
- counter->name = new_name;
1265
- }
1266
- } else {
1267
- if (asprintf(&new_name,
1268
- "%s [%s]", counter->name, counter->pmu_name) > 0) {
1269
- free(counter->name);
1270
- counter->name = new_name;
1271
- }
1272
- }
1273
-
1274
- counter->uniquified_name = true;
1275
-}
1276
-
1277
-static void collect_all_aliases(struct perf_evsel *counter,
1278
- void (*cb)(struct perf_evsel *counter, void *data,
1279
- bool first),
1280
- void *data)
1281
-{
1282
- struct perf_evsel *alias;
1283
-
1284
- alias = list_prepare_entry(counter, &(evsel_list->entries), node);
1285
- list_for_each_entry_continue (alias, &evsel_list->entries, node) {
1286
- if (strcmp(perf_evsel__name(alias), perf_evsel__name(counter)) ||
1287
- alias->scale != counter->scale ||
1288
- alias->cgrp != counter->cgrp ||
1289
- strcmp(alias->unit, counter->unit) ||
1290
- perf_evsel__is_clock(alias) != perf_evsel__is_clock(counter))
1291
- break;
1292
- alias->merged_stat = true;
1293
- cb(alias, data, false);
1294
- }
1295
-}
1296
-
1297
-static bool collect_data(struct perf_evsel *counter,
1298
- void (*cb)(struct perf_evsel *counter, void *data,
1299
- bool first),
1300
- void *data)
1301
-{
1302
- if (counter->merged_stat)
1303
- return false;
1304
- cb(counter, data, true);
1305
- if (no_merge)
1306
- uniquify_event_name(counter);
1307
- else if (counter->auto_merge_stats)
1308
- collect_all_aliases(counter, cb, data);
1309
- return true;
1310
-}
1311
-
1312
-struct aggr_data {
1313
- u64 ena, run, val;
1314
- int id;
1315
- int nr;
1316
- int cpu;
1317
-};
1318
-
1319
-static void aggr_cb(struct perf_evsel *counter, void *data, bool first)
1320
-{
1321
- struct aggr_data *ad = data;
1322
- int cpu, s2;
1323
-
1324
- for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
1325
- struct perf_counts_values *counts;
1326
-
1327
- s2 = aggr_get_id(perf_evsel__cpus(counter), cpu);
1328
- if (s2 != ad->id)
1329
- continue;
1330
- if (first)
1331
- ad->nr++;
1332
- counts = perf_counts(counter->counts, cpu, 0);
1333
- /*
1334
- * When any result is bad, make them all to give
1335
- * consistent output in interval mode.
1336
- */
1337
- if (counts->ena == 0 || counts->run == 0 ||
1338
- counter->counts->scaled == -1) {
1339
- ad->ena = 0;
1340
- ad->run = 0;
1341
- break;
1342
- }
1343
- ad->val += counts->val;
1344
- ad->ena += counts->ena;
1345
- ad->run += counts->run;
1346
- }
1347
-}
1348
-
1349
-static void print_aggr(char *prefix)
1350
-{
1351
- FILE *output = stat_config.output;
1352
- struct perf_evsel *counter;
1353
- int s, id, nr;
1354
- double uval;
1355
- u64 ena, run, val;
1356
- bool first;
1357
-
1358
- if (!(aggr_map || aggr_get_id))
1359
- return;
1360
-
1361
- aggr_update_shadow();
1362
-
1363
- /*
1364
- * With metric_only everything is on a single line.
1365
- * Without each counter has its own line.
1366
- */
1367
- for (s = 0; s < aggr_map->nr; s++) {
1368
- struct aggr_data ad;
1369
- if (prefix && metric_only)
1370
- fprintf(output, "%s", prefix);
1371
-
1372
- ad.id = id = aggr_map->map[s];
1373
- first = true;
1374
- evlist__for_each_entry(evsel_list, counter) {
1375
- if (is_duration_time(counter))
1376
- continue;
1377
-
1378
- ad.val = ad.ena = ad.run = 0;
1379
- ad.nr = 0;
1380
- if (!collect_data(counter, aggr_cb, &ad))
1381
- continue;
1382
- nr = ad.nr;
1383
- ena = ad.ena;
1384
- run = ad.run;
1385
- val = ad.val;
1386
- if (first && metric_only) {
1387
- first = false;
1388
- aggr_printout(counter, id, nr);
1389
- }
1390
- if (prefix && !metric_only)
1391
- fprintf(output, "%s", prefix);
1392
-
1393
- uval = val * counter->scale;
1394
- printout(id, nr, counter, uval, prefix, run, ena, 1.0,
1395
- &rt_stat);
1396
- if (!metric_only)
1397
- fputc('\n', output);
1398
- }
1399
- if (metric_only)
1400
- fputc('\n', output);
1401
- }
1402
-}
1403
-
1404
-static int cmp_val(const void *a, const void *b)
1405
-{
1406
- return ((struct perf_aggr_thread_value *)b)->val -
1407
- ((struct perf_aggr_thread_value *)a)->val;
1408
-}
1409
-
1410
-static struct perf_aggr_thread_value *sort_aggr_thread(
1411
- struct perf_evsel *counter,
1412
- int nthreads, int ncpus,
1413
- int *ret)
1414
-{
1415
- int cpu, thread, i = 0;
1416
- double uval;
1417
- struct perf_aggr_thread_value *buf;
1418
-
1419
- buf = calloc(nthreads, sizeof(struct perf_aggr_thread_value));
1420
- if (!buf)
1421
- return NULL;
1422
-
1423
- for (thread = 0; thread < nthreads; thread++) {
1424
- u64 ena = 0, run = 0, val = 0;
1425
-
1426
- for (cpu = 0; cpu < ncpus; cpu++) {
1427
- val += perf_counts(counter->counts, cpu, thread)->val;
1428
- ena += perf_counts(counter->counts, cpu, thread)->ena;
1429
- run += perf_counts(counter->counts, cpu, thread)->run;
1430
- }
1431
-
1432
- uval = val * counter->scale;
1433
-
1434
- /*
1435
- * Skip value 0 when enabling --per-thread globally,
1436
- * otherwise too many 0 output.
1437
- */
1438
- if (uval == 0.0 && target__has_per_thread(&target))
1439
- continue;
1440
-
1441
- buf[i].counter = counter;
1442
- buf[i].id = thread;
1443
- buf[i].uval = uval;
1444
- buf[i].val = val;
1445
- buf[i].run = run;
1446
- buf[i].ena = ena;
1447
- i++;
1448
- }
1449
-
1450
- qsort(buf, i, sizeof(struct perf_aggr_thread_value), cmp_val);
1451
-
1452
- if (ret)
1453
- *ret = i;
1454
-
1455
- return buf;
1456
-}
1457
-
1458
-static void print_aggr_thread(struct perf_evsel *counter, char *prefix)
1459
-{
1460
- FILE *output = stat_config.output;
1461
- int nthreads = thread_map__nr(counter->threads);
1462
- int ncpus = cpu_map__nr(counter->cpus);
1463
- int thread, sorted_threads, id;
1464
- struct perf_aggr_thread_value *buf;
1465
-
1466
- buf = sort_aggr_thread(counter, nthreads, ncpus, &sorted_threads);
1467
- if (!buf) {
1468
- perror("cannot sort aggr thread");
1469
- return;
1470
- }
1471
-
1472
- for (thread = 0; thread < sorted_threads; thread++) {
1473
- if (prefix)
1474
- fprintf(output, "%s", prefix);
1475
-
1476
- id = buf[thread].id;
1477
- if (stat_config.stats)
1478
- printout(id, 0, buf[thread].counter, buf[thread].uval,
1479
- prefix, buf[thread].run, buf[thread].ena, 1.0,
1480
- &stat_config.stats[id]);
1481
- else
1482
- printout(id, 0, buf[thread].counter, buf[thread].uval,
1483
- prefix, buf[thread].run, buf[thread].ena, 1.0,
1484
- &rt_stat);
1485
- fputc('\n', output);
1486
- }
1487
-
1488
- free(buf);
1489
-}
1490
-
1491
-struct caggr_data {
1492
- double avg, avg_enabled, avg_running;
1493
-};
1494
-
1495
-static void counter_aggr_cb(struct perf_evsel *counter, void *data,
1496
- bool first __maybe_unused)
1497
-{
1498
- struct caggr_data *cd = data;
1499
- struct perf_stat_evsel *ps = counter->stats;
1500
-
1501
- cd->avg += avg_stats(&ps->res_stats[0]);
1502
- cd->avg_enabled += avg_stats(&ps->res_stats[1]);
1503
- cd->avg_running += avg_stats(&ps->res_stats[2]);
1504
-}
1505
-
1506
-/*
1507
- * Print out the results of a single counter:
1508
- * aggregated counts in system-wide mode
1509
- */
1510
-static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
1511
-{
1512
- FILE *output = stat_config.output;
1513
- double uval;
1514
- struct caggr_data cd = { .avg = 0.0 };
1515
-
1516
- if (!collect_data(counter, counter_aggr_cb, &cd))
1517
- return;
1518
-
1519
- if (prefix && !metric_only)
1520
- fprintf(output, "%s", prefix);
1521
-
1522
- uval = cd.avg * counter->scale;
1523
- printout(-1, 0, counter, uval, prefix, cd.avg_running, cd.avg_enabled,
1524
- cd.avg, &rt_stat);
1525
- if (!metric_only)
1526
- fprintf(output, "\n");
1527
-}
1528
-
1529
-static void counter_cb(struct perf_evsel *counter, void *data,
1530
- bool first __maybe_unused)
1531
-{
1532
- struct aggr_data *ad = data;
1533
-
1534
- ad->val += perf_counts(counter->counts, ad->cpu, 0)->val;
1535
- ad->ena += perf_counts(counter->counts, ad->cpu, 0)->ena;
1536
- ad->run += perf_counts(counter->counts, ad->cpu, 0)->run;
1537
-}
1538
-
1539
-/*
1540
- * Print out the results of a single counter:
1541
- * does not use aggregated count in system-wide
1542
- */
1543
-static void print_counter(struct perf_evsel *counter, char *prefix)
1544
-{
1545
- FILE *output = stat_config.output;
1546
- u64 ena, run, val;
1547
- double uval;
1548
- int cpu;
1549
-
1550
- for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
1551
- struct aggr_data ad = { .cpu = cpu };
1552
-
1553
- if (!collect_data(counter, counter_cb, &ad))
1554
- return;
1555
- val = ad.val;
1556
- ena = ad.ena;
1557
- run = ad.run;
1558
-
1559
- if (prefix)
1560
- fprintf(output, "%s", prefix);
1561
-
1562
- uval = val * counter->scale;
1563
- printout(cpu, 0, counter, uval, prefix, run, ena, 1.0,
1564
- &rt_stat);
1565
-
1566
- fputc('\n', output);
1567
- }
1568
-}
1569
-
1570
-static void print_no_aggr_metric(char *prefix)
1571
-{
1572
- int cpu;
1573
- int nrcpus = 0;
1574
- struct perf_evsel *counter;
1575
- u64 ena, run, val;
1576
- double uval;
1577
-
1578
- nrcpus = evsel_list->cpus->nr;
1579
- for (cpu = 0; cpu < nrcpus; cpu++) {
1580
- bool first = true;
1581
-
1582
- if (prefix)
1583
- fputs(prefix, stat_config.output);
1584
- evlist__for_each_entry(evsel_list, counter) {
1585
- if (is_duration_time(counter))
1586
- continue;
1587
- if (first) {
1588
- aggr_printout(counter, cpu, 0);
1589
- first = false;
1590
- }
1591
- val = perf_counts(counter->counts, cpu, 0)->val;
1592
- ena = perf_counts(counter->counts, cpu, 0)->ena;
1593
- run = perf_counts(counter->counts, cpu, 0)->run;
1594
-
1595
- uval = val * counter->scale;
1596
- printout(cpu, 0, counter, uval, prefix, run, ena, 1.0,
1597
- &rt_stat);
1598
- }
1599
- fputc('\n', stat_config.output);
1600
- }
1601
-}
1602
-
1603
-static int aggr_header_lens[] = {
1604
- [AGGR_CORE] = 18,
1605
- [AGGR_SOCKET] = 12,
1606
- [AGGR_NONE] = 6,
1607
- [AGGR_THREAD] = 24,
1608
- [AGGR_GLOBAL] = 0,
1609
-};
1610
-
1611
-static const char *aggr_header_csv[] = {
1612
- [AGGR_CORE] = "core,cpus,",
1613
- [AGGR_SOCKET] = "socket,cpus",
1614
- [AGGR_NONE] = "cpu,",
1615
- [AGGR_THREAD] = "comm-pid,",
1616
- [AGGR_GLOBAL] = ""
1617
-};
1618
-
1619
-static void print_metric_headers(const char *prefix, bool no_indent)
1620
-{
1621
- struct perf_stat_output_ctx out;
1622
- struct perf_evsel *counter;
1623
- struct outstate os = {
1624
- .fh = stat_config.output
1625
- };
1626
-
1627
- if (prefix)
1628
- fprintf(stat_config.output, "%s", prefix);
1629
-
1630
- if (!csv_output && !no_indent)
1631
- fprintf(stat_config.output, "%*s",
1632
- aggr_header_lens[stat_config.aggr_mode], "");
1633
- if (csv_output) {
1634
- if (stat_config.interval)
1635
- fputs("time,", stat_config.output);
1636
- fputs(aggr_header_csv[stat_config.aggr_mode],
1637
- stat_config.output);
1638
- }
1639
-
1640
- /* Print metrics headers only */
1641
- evlist__for_each_entry(evsel_list, counter) {
1642
- if (is_duration_time(counter))
1643
- continue;
1644
- os.evsel = counter;
1645
- out.ctx = &os;
1646
- out.print_metric = print_metric_header;
1647
- out.new_line = new_line_metric;
1648
- out.force_header = true;
1649
- os.evsel = counter;
1650
- perf_stat__print_shadow_stats(counter, 0,
1651
- 0,
1652
- &out,
1653
- &metric_events,
1654
- &rt_stat);
1655
- }
1656
- fputc('\n', stat_config.output);
1657
-}
1658
-
1659
-static void print_interval(char *prefix, struct timespec *ts)
1660
-{
1661
- FILE *output = stat_config.output;
1662
- static int num_print_interval;
1663
-
1664
- if (interval_clear)
1665
- puts(CONSOLE_CLEAR);
1666
-
1667
- sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep);
1668
-
1669
- if ((num_print_interval == 0 && !csv_output) || interval_clear) {
1670
- switch (stat_config.aggr_mode) {
1671
- case AGGR_SOCKET:
1672
- fprintf(output, "# time socket cpus");
1673
- if (!metric_only)
1674
- fprintf(output, " counts %*s events\n", unit_width, "unit");
1675
- break;
1676
- case AGGR_CORE:
1677
- fprintf(output, "# time core cpus");
1678
- if (!metric_only)
1679
- fprintf(output, " counts %*s events\n", unit_width, "unit");
1680
- break;
1681
- case AGGR_NONE:
1682
- fprintf(output, "# time CPU ");
1683
- if (!metric_only)
1684
- fprintf(output, " counts %*s events\n", unit_width, "unit");
1685
- break;
1686
- case AGGR_THREAD:
1687
- fprintf(output, "# time comm-pid");
1688
- if (!metric_only)
1689
- fprintf(output, " counts %*s events\n", unit_width, "unit");
1690
- break;
1691
- case AGGR_GLOBAL:
1692
- default:
1693
- fprintf(output, "# time");
1694
- if (!metric_only)
1695
- fprintf(output, " counts %*s events\n", unit_width, "unit");
1696
- case AGGR_UNSET:
1697
- break;
1698
- }
1699
- }
1700
-
1701
- if ((num_print_interval == 0 || interval_clear) && metric_only)
1702
- print_metric_headers(" ", true);
1703
- if (++num_print_interval == 25)
1704
- num_print_interval = 0;
1705
-}
1706
-
1707
-static void print_header(int argc, const char **argv)
1708
-{
1709
- FILE *output = stat_config.output;
1710
- int i;
1711
-
1712
- fflush(stdout);
1713
-
1714
- if (!csv_output) {
1715
- fprintf(output, "\n");
1716
- fprintf(output, " Performance counter stats for ");
1717
- if (target.system_wide)
1718
- fprintf(output, "\'system wide");
1719
- else if (target.cpu_list)
1720
- fprintf(output, "\'CPU(s) %s", target.cpu_list);
1721
- else if (!target__has_task(&target)) {
1722
- fprintf(output, "\'%s", argv ? argv[0] : "pipe");
1723
- for (i = 1; argv && (i < argc); i++)
1724
- fprintf(output, " %s", argv[i]);
1725
- } else if (target.pid)
1726
- fprintf(output, "process id \'%s", target.pid);
1727
- else
1728
- fprintf(output, "thread id \'%s", target.tid);
1729
-
1730
- fprintf(output, "\'");
1731
- if (run_count > 1)
1732
- fprintf(output, " (%d runs)", run_count);
1733
- fprintf(output, ":\n\n");
1734
- }
1735
-}
1736
-
1737
-static int get_precision(double num)
1738
-{
1739
- if (num > 1)
1740
- return 0;
1741
-
1742
- return lround(ceil(-log10(num)));
1743
-}
1744
-
1745
-static void print_table(FILE *output, int precision, double avg)
1746
-{
1747
- char tmp[64];
1748
- int idx, indent = 0;
1749
-
1750
- scnprintf(tmp, 64, " %17.*f", precision, avg);
1751
- while (tmp[indent] == ' ')
1752
- indent++;
1753
-
1754
- fprintf(output, "%*s# Table of individual measurements:\n", indent, "");
1755
-
1756
- for (idx = 0; idx < run_count; idx++) {
1757
- double run = (double) walltime_run[idx] / NSEC_PER_SEC;
1758
- int h, n = 1 + abs((int) (100.0 * (run - avg)/run) / 5);
1759
-
1760
- fprintf(output, " %17.*f (%+.*f) ",
1761
- precision, run, precision, run - avg);
1762
-
1763
- for (h = 0; h < n; h++)
1764
- fprintf(output, "#");
1765
-
1766
- fprintf(output, "\n");
1767
- }
1768
-
1769
- fprintf(output, "\n%*s# Final result:\n", indent, "");
1770
-}
1771
-
1772
-static double timeval2double(struct timeval *t)
1773
-{
1774
- return t->tv_sec + (double) t->tv_usec/USEC_PER_SEC;
1775
-}
1776
-
1777
-static void print_footer(void)
1778
-{
1779
- double avg = avg_stats(&walltime_nsecs_stats) / NSEC_PER_SEC;
1780
- FILE *output = stat_config.output;
1781
- int n;
1782
-
1783
- if (!null_run)
1784
- fprintf(output, "\n");
1785
-
1786
- if (run_count == 1) {
1787
- fprintf(output, " %17.9f seconds time elapsed", avg);
1788
-
1789
- if (ru_display) {
1790
- double ru_utime = timeval2double(&ru_data.ru_utime);
1791
- double ru_stime = timeval2double(&ru_data.ru_stime);
1792
-
1793
- fprintf(output, "\n\n");
1794
- fprintf(output, " %17.9f seconds user\n", ru_utime);
1795
- fprintf(output, " %17.9f seconds sys\n", ru_stime);
1796
- }
1797
- } else {
1798
- double sd = stddev_stats(&walltime_nsecs_stats) / NSEC_PER_SEC;
1799
- /*
1800
- * Display at most 2 more significant
1801
- * digits than the stddev inaccuracy.
1802
- */
1803
- int precision = get_precision(sd) + 2;
1804
-
1805
- if (walltime_run_table)
1806
- print_table(output, precision, avg);
1807
-
1808
- fprintf(output, " %17.*f +- %.*f seconds time elapsed",
1809
- precision, avg, precision, sd);
1810
-
1811
- print_noise_pct(sd, avg);
1812
- }
1813
- fprintf(output, "\n\n");
1814
-
1815
- if (print_free_counters_hint &&
1816
- sysctl__read_int("kernel/nmi_watchdog", &n) >= 0 &&
1817
- n > 0)
1818
- fprintf(output,
1819
-"Some events weren't counted. Try disabling the NMI watchdog:\n"
1820
-" echo 0 > /proc/sys/kernel/nmi_watchdog\n"
1821
-" perf stat ...\n"
1822
-" echo 1 > /proc/sys/kernel/nmi_watchdog\n");
1823
-
1824
- if (print_mixed_hw_group_error)
1825
- fprintf(output,
1826
- "The events in group usually have to be from "
1827
- "the same PMU. Try reorganizing the group.\n");
1828
-}
1829
-
1830970 static void print_counters(struct timespec *ts, int argc, const char **argv)
1831971 {
1832
- int interval = stat_config.interval;
1833
- struct perf_evsel *counter;
1834
- char buf[64], *prefix = NULL;
1835
-
1836972 /* Do not print anything if we record to the pipe. */
1837973 if (STAT_RECORD && perf_stat.data.is_pipe)
1838974 return;
1839975
1840
- if (interval)
1841
- print_interval(prefix = buf, ts);
1842
- else
1843
- print_header(argc, argv);
1844
-
1845
- if (metric_only) {
1846
- static int num_print_iv;
1847
-
1848
- if (num_print_iv == 0 && !interval)
1849
- print_metric_headers(prefix, false);
1850
- if (num_print_iv++ == 25)
1851
- num_print_iv = 0;
1852
- if (stat_config.aggr_mode == AGGR_GLOBAL && prefix)
1853
- fprintf(stat_config.output, "%s", prefix);
1854
- }
1855
-
1856
- switch (stat_config.aggr_mode) {
1857
- case AGGR_CORE:
1858
- case AGGR_SOCKET:
1859
- print_aggr(prefix);
1860
- break;
1861
- case AGGR_THREAD:
1862
- evlist__for_each_entry(evsel_list, counter) {
1863
- if (is_duration_time(counter))
1864
- continue;
1865
- print_aggr_thread(counter, prefix);
1866
- }
1867
- break;
1868
- case AGGR_GLOBAL:
1869
- evlist__for_each_entry(evsel_list, counter) {
1870
- if (is_duration_time(counter))
1871
- continue;
1872
- print_counter_aggr(counter, prefix);
1873
- }
1874
- if (metric_only)
1875
- fputc('\n', stat_config.output);
1876
- break;
1877
- case AGGR_NONE:
1878
- if (metric_only)
1879
- print_no_aggr_metric(prefix);
1880
- else {
1881
- evlist__for_each_entry(evsel_list, counter) {
1882
- if (is_duration_time(counter))
1883
- continue;
1884
- print_counter(counter, prefix);
1885
- }
1886
- }
1887
- break;
1888
- case AGGR_UNSET:
1889
- default:
1890
- break;
1891
- }
1892
-
1893
- if (!interval && !csv_output)
1894
- print_footer();
1895
-
1896
- fflush(stat_config.output);
976
+ perf_evlist__print_counters(evsel_list, &stat_config, &target,
977
+ ts, argc, argv);
1897978 }
1898979
1899980 static volatile int signr = -1;
....@@ -1939,10 +1020,16 @@
19391020 kill(getpid(), signr);
19401021 }
19411022
1023
+void perf_stat__set_big_num(int set)
1024
+{
1025
+ stat_config.big_num = (set != 0);
1026
+}
1027
+
19421028 static int stat__set_big_num(const struct option *opt __maybe_unused,
19431029 const char *s __maybe_unused, int unset)
19441030 {
19451031 big_num_opt = unset ? 0 : 1;
1032
+ perf_stat__set_big_num(!unset);
19461033 return 0;
19471034 }
19481035
....@@ -1950,7 +1037,7 @@
19501037 const char *s __maybe_unused, int unset)
19511038 {
19521039 force_metric_only = true;
1953
- metric_only = !unset;
1040
+ stat_config.metric_only = !unset;
19541041 return 0;
19551042 }
19561043
....@@ -1958,7 +1045,30 @@
19581045 const char *str,
19591046 int unset __maybe_unused)
19601047 {
1961
- return metricgroup__parse_groups(opt, str, &metric_events);
1048
+ return metricgroup__parse_groups(opt, str,
1049
+ stat_config.metric_no_group,
1050
+ stat_config.metric_no_merge,
1051
+ &stat_config.metric_events);
1052
+}
1053
+
1054
+static int parse_control_option(const struct option *opt,
1055
+ const char *str,
1056
+ int unset __maybe_unused)
1057
+{
1058
+ struct perf_stat_config *config = opt->value;
1059
+
1060
+ return evlist__parse_control(str, &config->ctl_fd, &config->ctl_fd_ack, &config->ctl_fd_close);
1061
+}
1062
+
1063
+static int parse_stat_cgroups(const struct option *opt,
1064
+ const char *str, int unset)
1065
+{
1066
+ if (stat_config.cgroup_list) {
1067
+ pr_err("--cgroup and --for-each-cgroup cannot be used together\n");
1068
+ return -1;
1069
+ }
1070
+
1071
+ return parse_cgroups(opt, str, unset);
19621072 }
19631073
19641074 static struct option stat_options[] = {
....@@ -1969,7 +1079,7 @@
19691079 parse_events_option),
19701080 OPT_CALLBACK(0, "filter", &evsel_list, "filter",
19711081 "event filter", parse_filter),
1972
- OPT_BOOLEAN('i', "no-inherit", &no_inherit,
1082
+ OPT_BOOLEAN('i', "no-inherit", &stat_config.no_inherit,
19731083 "child tasks do not inherit counters"),
19741084 OPT_STRING('p', "pid", &target.pid, "pid",
19751085 "stat events on existing process id"),
....@@ -1979,14 +1089,15 @@
19791089 "system-wide collection from all CPUs"),
19801090 OPT_BOOLEAN('g', "group", &group,
19811091 "put the counters into a counter group"),
1982
- OPT_BOOLEAN('c', "scale", &stat_config.scale, "scale/normalize counters"),
1092
+ OPT_BOOLEAN(0, "scale", &stat_config.scale,
1093
+ "Use --no-scale to disable counter scaling for multiplexing"),
19831094 OPT_INCR('v', "verbose", &verbose,
19841095 "be more verbose (show counter open errors, etc)"),
1985
- OPT_INTEGER('r', "repeat", &run_count,
1096
+ OPT_INTEGER('r', "repeat", &stat_config.run_count,
19861097 "repeat command and print average + stddev (max: 100, forever: 0)"),
1987
- OPT_BOOLEAN(0, "table", &walltime_run_table,
1098
+ OPT_BOOLEAN(0, "table", &stat_config.walltime_run_table,
19881099 "display details about each run (only with -r option)"),
1989
- OPT_BOOLEAN('n', "null", &null_run,
1100
+ OPT_BOOLEAN('n', "null", &stat_config.null_run,
19901101 "null run - dont start any counters"),
19911102 OPT_INCR('d', "detailed", &detailed_run,
19921103 "detailed run - start a lot of events"),
....@@ -1999,11 +1110,13 @@
19991110 "list of cpus to monitor in system-wide"),
20001111 OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode,
20011112 "disable CPU count aggregation", AGGR_NONE),
2002
- OPT_BOOLEAN(0, "no-merge", &no_merge, "Do not merge identical named events"),
2003
- OPT_STRING('x', "field-separator", &csv_sep, "separator",
1113
+ OPT_BOOLEAN(0, "no-merge", &stat_config.no_merge, "Do not merge identical named events"),
1114
+ OPT_STRING('x', "field-separator", &stat_config.csv_sep, "separator",
20041115 "print counts with custom separator"),
20051116 OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
2006
- "monitor event in cgroup name only", parse_cgroups),
1117
+ "monitor event in cgroup name only", parse_stat_cgroups),
1118
+ OPT_STRING(0, "for-each-cgroup", &stat_config.cgroup_list, "name",
1119
+ "expand events for each cgroup"),
20071120 OPT_STRING('o', "output", &output_name, "file", "output file name"),
20081121 OPT_BOOLEAN(0, "append", &append_file, "append to the output file"),
20091122 OPT_INTEGER(0, "log-fd", &output_fd,
....@@ -2017,20 +1130,28 @@
20171130 "(overhead is possible for values <= 100ms)"),
20181131 OPT_INTEGER(0, "interval-count", &stat_config.times,
20191132 "print counts for fixed number of times"),
2020
- OPT_BOOLEAN(0, "interval-clear", &interval_clear,
1133
+ OPT_BOOLEAN(0, "interval-clear", &stat_config.interval_clear,
20211134 "clear screen in between new interval"),
20221135 OPT_UINTEGER(0, "timeout", &stat_config.timeout,
20231136 "stop workload and print counts after a timeout period in ms (>= 10ms)"),
20241137 OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode,
20251138 "aggregate counts per processor socket", AGGR_SOCKET),
1139
+ OPT_SET_UINT(0, "per-die", &stat_config.aggr_mode,
1140
+ "aggregate counts per processor die", AGGR_DIE),
20261141 OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode,
20271142 "aggregate counts per physical processor core", AGGR_CORE),
20281143 OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode,
20291144 "aggregate counts per thread", AGGR_THREAD),
2030
- OPT_UINTEGER('D', "delay", &initial_delay,
2031
- "ms to wait before starting measurement after program start"),
2032
- OPT_CALLBACK_NOOPT(0, "metric-only", &metric_only, NULL,
1145
+ OPT_SET_UINT(0, "per-node", &stat_config.aggr_mode,
1146
+ "aggregate counts per numa node", AGGR_NODE),
1147
+ OPT_INTEGER('D', "delay", &stat_config.initial_delay,
1148
+ "ms to wait before starting measurement after program start (-1: start with events disabled)"),
1149
+ OPT_CALLBACK_NOOPT(0, "metric-only", &stat_config.metric_only, NULL,
20331150 "Only print computed metrics. No raw values", enable_metric_only),
1151
+ OPT_BOOLEAN(0, "metric-no-group", &stat_config.metric_no_group,
1152
+ "don't group metric events, impacts multiplexing"),
1153
+ OPT_BOOLEAN(0, "metric-no-merge", &stat_config.metric_no_merge,
1154
+ "don't try to share events between metrics in a group"),
20341155 OPT_BOOLEAN(0, "topdown", &topdown_run,
20351156 "measure topdown level 1 statistics"),
20361157 OPT_BOOLEAN(0, "smi-cost", &smi_cost,
....@@ -2038,34 +1159,57 @@
20381159 OPT_CALLBACK('M', "metrics", &evsel_list, "metric/metric group list",
20391160 "monitor specified metrics or metric groups (separated by ,)",
20401161 parse_metric_groups),
1162
+ OPT_BOOLEAN_FLAG(0, "all-kernel", &stat_config.all_kernel,
1163
+ "Configure all used events to run in kernel space.",
1164
+ PARSE_OPT_EXCLUSIVE),
1165
+ OPT_BOOLEAN_FLAG(0, "all-user", &stat_config.all_user,
1166
+ "Configure all used events to run in user space.",
1167
+ PARSE_OPT_EXCLUSIVE),
1168
+ OPT_BOOLEAN(0, "percore-show-thread", &stat_config.percore_show_thread,
1169
+ "Use with 'percore' event qualifier to show the event "
1170
+ "counts of one hardware thread by sum up total hardware "
1171
+ "threads of same physical core"),
1172
+ OPT_BOOLEAN(0, "summary", &stat_config.summary,
1173
+ "print summary for interval mode"),
1174
+#ifdef HAVE_LIBPFM
1175
+ OPT_CALLBACK(0, "pfm-events", &evsel_list, "event",
1176
+ "libpfm4 event selector. use 'perf list' to list available events",
1177
+ parse_libpfm_events_option),
1178
+#endif
1179
+ OPT_CALLBACK(0, "control", &stat_config, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]",
1180
+ "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events).\n"
1181
+ "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n"
1182
+ "\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.",
1183
+ parse_control_option),
20411184 OPT_END()
20421185 };
20431186
2044
-static int perf_stat__get_socket(struct cpu_map *map, int cpu)
1187
+static int perf_stat__get_socket(struct perf_stat_config *config __maybe_unused,
1188
+ struct perf_cpu_map *map, int cpu)
20451189 {
20461190 return cpu_map__get_socket(map, cpu, NULL);
20471191 }
20481192
2049
-static int perf_stat__get_core(struct cpu_map *map, int cpu)
1193
+static int perf_stat__get_die(struct perf_stat_config *config __maybe_unused,
1194
+ struct perf_cpu_map *map, int cpu)
1195
+{
1196
+ return cpu_map__get_die(map, cpu, NULL);
1197
+}
1198
+
1199
+static int perf_stat__get_core(struct perf_stat_config *config __maybe_unused,
1200
+ struct perf_cpu_map *map, int cpu)
20501201 {
20511202 return cpu_map__get_core(map, cpu, NULL);
20521203 }
20531204
2054
-static int cpu_map__get_max(struct cpu_map *map)
1205
+static int perf_stat__get_node(struct perf_stat_config *config __maybe_unused,
1206
+ struct perf_cpu_map *map, int cpu)
20551207 {
2056
- int i, max = -1;
2057
-
2058
- for (i = 0; i < map->nr; i++) {
2059
- if (map->map[i] > max)
2060
- max = map->map[i];
2061
- }
2062
-
2063
- return max;
1208
+ return cpu_map__get_node(map, cpu, NULL);
20641209 }
20651210
2066
-static struct cpu_map *cpus_aggr_map;
2067
-
2068
-static int perf_stat__get_aggr(aggr_get_id_t get_id, struct cpu_map *map, int idx)
1211
+static int perf_stat__get_aggr(struct perf_stat_config *config,
1212
+ aggr_get_id_t get_id, struct perf_cpu_map *map, int idx)
20691213 {
20701214 int cpu;
20711215
....@@ -2074,20 +1218,46 @@
20741218
20751219 cpu = map->map[idx];
20761220
2077
- if (cpus_aggr_map->map[cpu] == -1)
2078
- cpus_aggr_map->map[cpu] = get_id(map, idx);
1221
+ if (config->cpus_aggr_map->map[cpu] == -1)
1222
+ config->cpus_aggr_map->map[cpu] = get_id(config, map, idx);
20791223
2080
- return cpus_aggr_map->map[cpu];
1224
+ return config->cpus_aggr_map->map[cpu];
20811225 }
20821226
2083
-static int perf_stat__get_socket_cached(struct cpu_map *map, int idx)
1227
+static int perf_stat__get_socket_cached(struct perf_stat_config *config,
1228
+ struct perf_cpu_map *map, int idx)
20841229 {
2085
- return perf_stat__get_aggr(perf_stat__get_socket, map, idx);
1230
+ return perf_stat__get_aggr(config, perf_stat__get_socket, map, idx);
20861231 }
20871232
2088
-static int perf_stat__get_core_cached(struct cpu_map *map, int idx)
1233
+static int perf_stat__get_die_cached(struct perf_stat_config *config,
1234
+ struct perf_cpu_map *map, int idx)
20891235 {
2090
- return perf_stat__get_aggr(perf_stat__get_core, map, idx);
1236
+ return perf_stat__get_aggr(config, perf_stat__get_die, map, idx);
1237
+}
1238
+
1239
+static int perf_stat__get_core_cached(struct perf_stat_config *config,
1240
+ struct perf_cpu_map *map, int idx)
1241
+{
1242
+ return perf_stat__get_aggr(config, perf_stat__get_core, map, idx);
1243
+}
1244
+
1245
+static int perf_stat__get_node_cached(struct perf_stat_config *config,
1246
+ struct perf_cpu_map *map, int idx)
1247
+{
1248
+ return perf_stat__get_aggr(config, perf_stat__get_node, map, idx);
1249
+}
1250
+
1251
+static bool term_percore_set(void)
1252
+{
1253
+ struct evsel *counter;
1254
+
1255
+ evlist__for_each_entry(evsel_list, counter) {
1256
+ if (counter->percore)
1257
+ return true;
1258
+ }
1259
+
1260
+ return false;
20911261 }
20921262
20931263 static int perf_stat_init_aggr_mode(void)
....@@ -2096,20 +1266,43 @@
20961266
20971267 switch (stat_config.aggr_mode) {
20981268 case AGGR_SOCKET:
2099
- if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) {
1269
+ if (cpu_map__build_socket_map(evsel_list->core.cpus, &stat_config.aggr_map)) {
21001270 perror("cannot build socket map");
21011271 return -1;
21021272 }
2103
- aggr_get_id = perf_stat__get_socket_cached;
1273
+ stat_config.aggr_get_id = perf_stat__get_socket_cached;
1274
+ break;
1275
+ case AGGR_DIE:
1276
+ if (cpu_map__build_die_map(evsel_list->core.cpus, &stat_config.aggr_map)) {
1277
+ perror("cannot build die map");
1278
+ return -1;
1279
+ }
1280
+ stat_config.aggr_get_id = perf_stat__get_die_cached;
21041281 break;
21051282 case AGGR_CORE:
2106
- if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) {
1283
+ if (cpu_map__build_core_map(evsel_list->core.cpus, &stat_config.aggr_map)) {
21071284 perror("cannot build core map");
21081285 return -1;
21091286 }
2110
- aggr_get_id = perf_stat__get_core_cached;
1287
+ stat_config.aggr_get_id = perf_stat__get_core_cached;
1288
+ break;
1289
+ case AGGR_NODE:
1290
+ if (cpu_map__build_node_map(evsel_list->core.cpus, &stat_config.aggr_map)) {
1291
+ perror("cannot build core map");
1292
+ return -1;
1293
+ }
1294
+ stat_config.aggr_get_id = perf_stat__get_node_cached;
21111295 break;
21121296 case AGGR_NONE:
1297
+ if (term_percore_set()) {
1298
+ if (cpu_map__build_core_map(evsel_list->core.cpus,
1299
+ &stat_config.aggr_map)) {
1300
+ perror("cannot build core map");
1301
+ return -1;
1302
+ }
1303
+ stat_config.aggr_get_id = perf_stat__get_core_cached;
1304
+ }
1305
+ break;
21131306 case AGGR_GLOBAL:
21141307 case AGGR_THREAD:
21151308 case AGGR_UNSET:
....@@ -2122,20 +1315,20 @@
21221315 * taking the highest cpu number to be the size of
21231316 * the aggregation translate cpumap.
21241317 */
2125
- nr = cpu_map__get_max(evsel_list->cpus);
2126
- cpus_aggr_map = cpu_map__empty_new(nr + 1);
2127
- return cpus_aggr_map ? 0 : -ENOMEM;
1318
+ nr = perf_cpu_map__max(evsel_list->core.cpus);
1319
+ stat_config.cpus_aggr_map = perf_cpu_map__empty_new(nr + 1);
1320
+ return stat_config.cpus_aggr_map ? 0 : -ENOMEM;
21281321 }
21291322
21301323 static void perf_stat__exit_aggr_mode(void)
21311324 {
2132
- cpu_map__put(aggr_map);
2133
- cpu_map__put(cpus_aggr_map);
2134
- aggr_map = NULL;
2135
- cpus_aggr_map = NULL;
1325
+ perf_cpu_map__put(stat_config.aggr_map);
1326
+ perf_cpu_map__put(stat_config.cpus_aggr_map);
1327
+ stat_config.aggr_map = NULL;
1328
+ stat_config.cpus_aggr_map = NULL;
21361329 }
21371330
2138
-static inline int perf_env__get_cpu(struct perf_env *env, struct cpu_map *map, int idx)
1331
+static inline int perf_env__get_cpu(struct perf_env *env, struct perf_cpu_map *map, int idx)
21391332 {
21401333 int cpu;
21411334
....@@ -2150,7 +1343,7 @@
21501343 return cpu;
21511344 }
21521345
2153
-static int perf_env__get_socket(struct cpu_map *map, int idx, void *data)
1346
+static int perf_env__get_socket(struct perf_cpu_map *map, int idx, void *data)
21541347 {
21551348 struct perf_env *env = data;
21561349 int cpu = perf_env__get_cpu(env, map, idx);
....@@ -2158,46 +1351,112 @@
21581351 return cpu == -1 ? -1 : env->cpu[cpu].socket_id;
21591352 }
21601353
2161
-static int perf_env__get_core(struct cpu_map *map, int idx, void *data)
1354
+static int perf_env__get_die(struct perf_cpu_map *map, int idx, void *data)
1355
+{
1356
+ struct perf_env *env = data;
1357
+ int die_id = -1, cpu = perf_env__get_cpu(env, map, idx);
1358
+
1359
+ if (cpu != -1) {
1360
+ /*
1361
+ * Encode socket in bit range 15:8
1362
+ * die_id is relative to socket,
1363
+ * we need a global id. So we combine
1364
+ * socket + die id
1365
+ */
1366
+ if (WARN_ONCE(env->cpu[cpu].socket_id >> 8, "The socket id number is too big.\n"))
1367
+ return -1;
1368
+
1369
+ if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n"))
1370
+ return -1;
1371
+
1372
+ die_id = (env->cpu[cpu].socket_id << 8) | (env->cpu[cpu].die_id & 0xff);
1373
+ }
1374
+
1375
+ return die_id;
1376
+}
1377
+
1378
+static int perf_env__get_core(struct perf_cpu_map *map, int idx, void *data)
21621379 {
21631380 struct perf_env *env = data;
21641381 int core = -1, cpu = perf_env__get_cpu(env, map, idx);
21651382
21661383 if (cpu != -1) {
2167
- int socket_id = env->cpu[cpu].socket_id;
2168
-
21691384 /*
2170
- * Encode socket in upper 16 bits
2171
- * core_id is relative to socket, and
1385
+ * Encode socket in bit range 31:24
1386
+ * encode die id in bit range 23:16
1387
+ * core_id is relative to socket and die,
21721388 * we need a global id. So we combine
2173
- * socket + core id.
1389
+ * socket + die id + core id
21741390 */
2175
- core = (socket_id << 16) | (env->cpu[cpu].core_id & 0xffff);
1391
+ if (WARN_ONCE(env->cpu[cpu].socket_id >> 8, "The socket id number is too big.\n"))
1392
+ return -1;
1393
+
1394
+ if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n"))
1395
+ return -1;
1396
+
1397
+ if (WARN_ONCE(env->cpu[cpu].core_id >> 16, "The core id number is too big.\n"))
1398
+ return -1;
1399
+
1400
+ core = (env->cpu[cpu].socket_id << 24) |
1401
+ (env->cpu[cpu].die_id << 16) |
1402
+ (env->cpu[cpu].core_id & 0xffff);
21761403 }
21771404
21781405 return core;
21791406 }
21801407
2181
-static int perf_env__build_socket_map(struct perf_env *env, struct cpu_map *cpus,
2182
- struct cpu_map **sockp)
1408
+static int perf_env__get_node(struct perf_cpu_map *map, int idx, void *data)
1409
+{
1410
+ int cpu = perf_env__get_cpu(data, map, idx);
1411
+
1412
+ return perf_env__numa_node(data, cpu);
1413
+}
1414
+
1415
+static int perf_env__build_socket_map(struct perf_env *env, struct perf_cpu_map *cpus,
1416
+ struct perf_cpu_map **sockp)
21831417 {
21841418 return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env);
21851419 }
21861420
2187
-static int perf_env__build_core_map(struct perf_env *env, struct cpu_map *cpus,
2188
- struct cpu_map **corep)
1421
+static int perf_env__build_die_map(struct perf_env *env, struct perf_cpu_map *cpus,
1422
+ struct perf_cpu_map **diep)
1423
+{
1424
+ return cpu_map__build_map(cpus, diep, perf_env__get_die, env);
1425
+}
1426
+
1427
+static int perf_env__build_core_map(struct perf_env *env, struct perf_cpu_map *cpus,
1428
+ struct perf_cpu_map **corep)
21891429 {
21901430 return cpu_map__build_map(cpus, corep, perf_env__get_core, env);
21911431 }
21921432
2193
-static int perf_stat__get_socket_file(struct cpu_map *map, int idx)
1433
+static int perf_env__build_node_map(struct perf_env *env, struct perf_cpu_map *cpus,
1434
+ struct perf_cpu_map **nodep)
1435
+{
1436
+ return cpu_map__build_map(cpus, nodep, perf_env__get_node, env);
1437
+}
1438
+
1439
+static int perf_stat__get_socket_file(struct perf_stat_config *config __maybe_unused,
1440
+ struct perf_cpu_map *map, int idx)
21941441 {
21951442 return perf_env__get_socket(map, idx, &perf_stat.session->header.env);
21961443 }
1444
+static int perf_stat__get_die_file(struct perf_stat_config *config __maybe_unused,
1445
+ struct perf_cpu_map *map, int idx)
1446
+{
1447
+ return perf_env__get_die(map, idx, &perf_stat.session->header.env);
1448
+}
21971449
2198
-static int perf_stat__get_core_file(struct cpu_map *map, int idx)
1450
+static int perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused,
1451
+ struct perf_cpu_map *map, int idx)
21991452 {
22001453 return perf_env__get_core(map, idx, &perf_stat.session->header.env);
1454
+}
1455
+
1456
+static int perf_stat__get_node_file(struct perf_stat_config *config __maybe_unused,
1457
+ struct perf_cpu_map *map, int idx)
1458
+{
1459
+ return perf_env__get_node(map, idx, &perf_stat.session->header.env);
22011460 }
22021461
22031462 static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
....@@ -2206,18 +1465,32 @@
22061465
22071466 switch (stat_config.aggr_mode) {
22081467 case AGGR_SOCKET:
2209
- if (perf_env__build_socket_map(env, evsel_list->cpus, &aggr_map)) {
1468
+ if (perf_env__build_socket_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) {
22101469 perror("cannot build socket map");
22111470 return -1;
22121471 }
2213
- aggr_get_id = perf_stat__get_socket_file;
1472
+ stat_config.aggr_get_id = perf_stat__get_socket_file;
1473
+ break;
1474
+ case AGGR_DIE:
1475
+ if (perf_env__build_die_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) {
1476
+ perror("cannot build die map");
1477
+ return -1;
1478
+ }
1479
+ stat_config.aggr_get_id = perf_stat__get_die_file;
22141480 break;
22151481 case AGGR_CORE:
2216
- if (perf_env__build_core_map(env, evsel_list->cpus, &aggr_map)) {
1482
+ if (perf_env__build_core_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) {
22171483 perror("cannot build core map");
22181484 return -1;
22191485 }
2220
- aggr_get_id = perf_stat__get_core_file;
1486
+ stat_config.aggr_get_id = perf_stat__get_core_file;
1487
+ break;
1488
+ case AGGR_NODE:
1489
+ if (perf_env__build_node_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) {
1490
+ perror("cannot build core map");
1491
+ return -1;
1492
+ }
1493
+ stat_config.aggr_get_id = perf_stat__get_node_file;
22211494 break;
22221495 case AGGR_NONE:
22231496 case AGGR_GLOBAL:
....@@ -2228,55 +1501,6 @@
22281501 }
22291502
22301503 return 0;
2231
-}
2232
-
2233
-static int topdown_filter_events(const char **attr, char **str, bool use_group)
2234
-{
2235
- int off = 0;
2236
- int i;
2237
- int len = 0;
2238
- char *s;
2239
-
2240
- for (i = 0; attr[i]; i++) {
2241
- if (pmu_have_event("cpu", attr[i])) {
2242
- len += strlen(attr[i]) + 1;
2243
- attr[i - off] = attr[i];
2244
- } else
2245
- off++;
2246
- }
2247
- attr[i - off] = NULL;
2248
-
2249
- *str = malloc(len + 1 + 2);
2250
- if (!*str)
2251
- return -1;
2252
- s = *str;
2253
- if (i - off == 0) {
2254
- *s = 0;
2255
- return 0;
2256
- }
2257
- if (use_group)
2258
- *s++ = '{';
2259
- for (i = 0; attr[i]; i++) {
2260
- strcpy(s, attr[i]);
2261
- s += strlen(s);
2262
- *s++ = ',';
2263
- }
2264
- if (use_group) {
2265
- s[-1] = '}';
2266
- *s = 0;
2267
- } else
2268
- s[-1] = 0;
2269
- return 0;
2270
-}
2271
-
2272
-__weak bool arch_topdown_check_group(bool *warn)
2273
-{
2274
- *warn = false;
2275
- return false;
2276
-}
2277
-
2278
-__weak void arch_topdown_group_warn(void)
2279
-{
22801504 }
22811505
22821506 /*
....@@ -2401,9 +1625,10 @@
24011625 struct parse_events_error errinfo;
24021626
24031627 /* Set attrs if no event is selected and !null_run: */
2404
- if (null_run)
1628
+ if (stat_config.null_run)
24051629 return 0;
24061630
1631
+ bzero(&errinfo, sizeof(errinfo));
24071632 if (transaction_run) {
24081633 /* Handle -T as -M transaction. Once platform specific metrics
24091634 * support has been added to the json files, all archictures
....@@ -2414,7 +1639,9 @@
24141639 struct option opt = { .value = &evsel_list };
24151640
24161641 return metricgroup__parse_groups(&opt, "transaction",
2417
- &metric_events);
1642
+ stat_config.metric_no_group,
1643
+ stat_config.metric_no_merge,
1644
+ &stat_config.metric_events);
24181645 }
24191646
24201647 if (pmu_have_event("cpu", "cycles-ct") &&
....@@ -2452,7 +1679,7 @@
24521679 if (pmu_have_event("msr", "aperf") &&
24531680 pmu_have_event("msr", "smi")) {
24541681 if (!force_metric_only)
2455
- metric_only = true;
1682
+ stat_config.metric_only = true;
24561683 err = parse_events(evsel_list, smi_cost_attrs, &errinfo);
24571684 } else {
24581685 fprintf(stderr, "To measure SMI cost, it needs "
....@@ -2461,6 +1688,7 @@
24611688 return -1;
24621689 }
24631690 if (err) {
1691
+ parse_events_print_error(&errinfo, smi_cost_attrs);
24641692 fprintf(stderr, "Cannot set up SMI cost events\n");
24651693 return -1;
24661694 }
....@@ -2470,6 +1698,24 @@
24701698 if (topdown_run) {
24711699 char *str = NULL;
24721700 bool warn = false;
1701
+
1702
+ if (!force_metric_only)
1703
+ stat_config.metric_only = true;
1704
+
1705
+ if (topdown_filter_events(topdown_metric_attrs, &str, 1) < 0) {
1706
+ pr_err("Out of memory\n");
1707
+ return -1;
1708
+ }
1709
+ if (topdown_metric_attrs[0] && str) {
1710
+ if (!stat_config.interval && !stat_config.metric_only) {
1711
+ fprintf(stat_config.output,
1712
+ "Topdown accuracy may decrease when measuring long periods.\n"
1713
+ "Please print the result regularly, e.g. -I1000\n");
1714
+ }
1715
+ goto setup_metrics;
1716
+ }
1717
+
1718
+ zfree(&str);
24731719
24741720 if (stat_config.aggr_mode != AGGR_GLOBAL &&
24751721 stat_config.aggr_mode != AGGR_CORE) {
....@@ -2482,8 +1728,6 @@
24821728 return -1;
24831729 }
24841730
2485
- if (!force_metric_only)
2486
- metric_only = true;
24871731 if (topdown_filter_events(topdown_attrs, &str,
24881732 arch_topdown_check_group(&warn)) < 0) {
24891733 pr_err("Out of memory\n");
....@@ -2492,6 +1736,7 @@
24921736 if (topdown_attrs[0] && str) {
24931737 if (warn)
24941738 arch_topdown_group_warn();
1739
+setup_metrics:
24951740 err = parse_events(evsel_list, str, &errinfo);
24961741 if (err) {
24971742 fprintf(stderr,
....@@ -2508,23 +1753,21 @@
25081753 free(str);
25091754 }
25101755
2511
- if (!evsel_list->nr_entries) {
1756
+ if (!evsel_list->core.nr_entries) {
25121757 if (target__has_cpu(&target))
25131758 default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK;
25141759
2515
- if (perf_evlist__add_default_attrs(evsel_list, default_attrs0) < 0)
1760
+ if (evlist__add_default_attrs(evsel_list, default_attrs0) < 0)
25161761 return -1;
25171762 if (pmu_have_event("cpu", "stalled-cycles-frontend")) {
2518
- if (perf_evlist__add_default_attrs(evsel_list,
2519
- frontend_attrs) < 0)
1763
+ if (evlist__add_default_attrs(evsel_list, frontend_attrs) < 0)
25201764 return -1;
25211765 }
25221766 if (pmu_have_event("cpu", "stalled-cycles-backend")) {
2523
- if (perf_evlist__add_default_attrs(evsel_list,
2524
- backend_attrs) < 0)
1767
+ if (evlist__add_default_attrs(evsel_list, backend_attrs) < 0)
25251768 return -1;
25261769 }
2527
- if (perf_evlist__add_default_attrs(evsel_list, default_attrs1) < 0)
1770
+ if (evlist__add_default_attrs(evsel_list, default_attrs1) < 0)
25281771 return -1;
25291772 }
25301773
....@@ -2534,21 +1777,21 @@
25341777 return 0;
25351778
25361779 /* Append detailed run extra attributes: */
2537
- if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0)
1780
+ if (evlist__add_default_attrs(evsel_list, detailed_attrs) < 0)
25381781 return -1;
25391782
25401783 if (detailed_run < 2)
25411784 return 0;
25421785
25431786 /* Append very detailed run extra attributes: */
2544
- if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0)
1787
+ if (evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0)
25451788 return -1;
25461789
25471790 if (detailed_run < 3)
25481791 return 0;
25491792
25501793 /* Append very, very detailed run extra attributes: */
2551
- return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs);
1794
+ return evlist__add_default_attrs(evsel_list, very_very_detailed_attrs);
25521795 }
25531796
25541797 static const char * const stat_record_usage[] = {
....@@ -2563,6 +1806,7 @@
25631806 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
25641807 perf_header__set_feat(&session->header, feat);
25651808
1809
+ perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
25661810 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
25671811 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
25681812 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
....@@ -2578,17 +1822,17 @@
25781822 PARSE_OPT_STOP_AT_NON_OPTION);
25791823
25801824 if (output_name)
2581
- data->file.path = output_name;
1825
+ data->path = output_name;
25821826
2583
- if (run_count != 1 || forever) {
1827
+ if (stat_config.run_count != 1 || forever) {
25841828 pr_err("Cannot use -r option with perf stat record.\n");
25851829 return -1;
25861830 }
25871831
25881832 session = perf_session__new(data, false, NULL);
2589
- if (session == NULL) {
2590
- pr_err("Perf session creation failed.\n");
2591
- return -1;
1833
+ if (IS_ERR(session)) {
1834
+ pr_err("Perf session creation failed\n");
1835
+ return PTR_ERR(session);
25921836 }
25931837
25941838 init_features(session);
....@@ -2599,12 +1843,11 @@
25991843 return argc;
26001844 }
26011845
2602
-static int process_stat_round_event(struct perf_tool *tool __maybe_unused,
2603
- union perf_event *event,
2604
- struct perf_session *session)
1846
+static int process_stat_round_event(struct perf_session *session,
1847
+ union perf_event *event)
26051848 {
2606
- struct stat_round_event *stat_round = &event->stat_round;
2607
- struct perf_evsel *counter;
1849
+ struct perf_record_stat_round *stat_round = &event->stat_round;
1850
+ struct evsel *counter;
26081851 struct timespec tsh, *ts = NULL;
26091852 const char **argv = session->header.env.cmdline_argv;
26101853 int argc = session->header.env.nr_cmdline;
....@@ -2626,15 +1869,15 @@
26261869 }
26271870
26281871 static
2629
-int process_stat_config_event(struct perf_tool *tool,
2630
- union perf_event *event,
2631
- struct perf_session *session __maybe_unused)
1872
+int process_stat_config_event(struct perf_session *session,
1873
+ union perf_event *event)
26321874 {
1875
+ struct perf_tool *tool = session->tool;
26331876 struct perf_stat *st = container_of(tool, struct perf_stat, tool);
26341877
26351878 perf_event__read_stat_config(&stat_config, &event->stat_config);
26361879
2637
- if (cpu_map__empty(st->cpus)) {
1880
+ if (perf_cpu_map__empty(st->cpus)) {
26381881 if (st->aggr_mode != AGGR_UNSET)
26391882 pr_warning("warning: processing task data, aggregation mode not set\n");
26401883 return 0;
....@@ -2659,7 +1902,7 @@
26591902 if (WARN_ONCE(st->maps_allocated, "stats double allocation\n"))
26601903 return -EINVAL;
26611904
2662
- perf_evlist__set_maps(evsel_list, st->cpus, st->threads);
1905
+ perf_evlist__set_maps(&evsel_list->core, st->cpus, st->threads);
26631906
26641907 if (perf_evlist__alloc_stats(evsel_list, true))
26651908 return -ENOMEM;
....@@ -2669,10 +1912,10 @@
26691912 }
26701913
26711914 static
2672
-int process_thread_map_event(struct perf_tool *tool,
2673
- union perf_event *event,
2674
- struct perf_session *session __maybe_unused)
1915
+int process_thread_map_event(struct perf_session *session,
1916
+ union perf_event *event)
26751917 {
1918
+ struct perf_tool *tool = session->tool;
26761919 struct perf_stat *st = container_of(tool, struct perf_stat, tool);
26771920
26781921 if (st->threads) {
....@@ -2688,12 +1931,12 @@
26881931 }
26891932
26901933 static
2691
-int process_cpu_map_event(struct perf_tool *tool,
2692
- union perf_event *event,
2693
- struct perf_session *session __maybe_unused)
1934
+int process_cpu_map_event(struct perf_session *session,
1935
+ union perf_event *event)
26941936 {
1937
+ struct perf_tool *tool = session->tool;
26951938 struct perf_stat *st = container_of(tool, struct perf_stat, tool);
2696
- struct cpu_map *cpus;
1939
+ struct perf_cpu_map *cpus;
26971940
26981941 if (st->cpus) {
26991942 pr_warning("Extra cpu map event, ignoring.\n");
....@@ -2706,35 +1949,6 @@
27061949
27071950 st->cpus = cpus;
27081951 return set_maps(st);
2709
-}
2710
-
2711
-static int runtime_stat_new(struct perf_stat_config *config, int nthreads)
2712
-{
2713
- int i;
2714
-
2715
- config->stats = calloc(nthreads, sizeof(struct runtime_stat));
2716
- if (!config->stats)
2717
- return -1;
2718
-
2719
- config->stats_num = nthreads;
2720
-
2721
- for (i = 0; i < nthreads; i++)
2722
- runtime_stat__init(&config->stats[i]);
2723
-
2724
- return 0;
2725
-}
2726
-
2727
-static void runtime_stat_delete(struct perf_stat_config *config)
2728
-{
2729
- int i;
2730
-
2731
- if (!config->stats)
2732
- return;
2733
-
2734
- for (i = 0; i < config->stats_num; i++)
2735
- runtime_stat__exit(&config->stats[i]);
2736
-
2737
- free(config->stats);
27381952 }
27391953
27401954 static const char * const stat_report_usage[] = {
....@@ -2762,8 +1976,12 @@
27621976 OPT_STRING('i', "input", &input_name, "file", "input file name"),
27631977 OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode,
27641978 "aggregate counts per processor socket", AGGR_SOCKET),
1979
+ OPT_SET_UINT(0, "per-die", &perf_stat.aggr_mode,
1980
+ "aggregate counts per processor die", AGGR_DIE),
27651981 OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode,
27661982 "aggregate counts per physical processor core", AGGR_CORE),
1983
+ OPT_SET_UINT(0, "per-node", &perf_stat.aggr_mode,
1984
+ "aggregate counts per numa node", AGGR_NODE),
27671985 OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode,
27681986 "disable CPU count aggregation", AGGR_NONE),
27691987 OPT_END()
....@@ -2780,12 +1998,12 @@
27801998 input_name = "perf.data";
27811999 }
27822000
2783
- perf_stat.data.file.path = input_name;
2784
- perf_stat.data.mode = PERF_DATA_MODE_READ;
2001
+ perf_stat.data.path = input_name;
2002
+ perf_stat.data.mode = PERF_DATA_MODE_READ;
27852003
27862004 session = perf_session__new(&perf_stat.data, false, &perf_stat.tool);
2787
- if (session == NULL)
2788
- return -1;
2005
+ if (IS_ERR(session))
2006
+ return PTR_ERR(session);
27892007
27902008 perf_stat.session = session;
27912009 stat_config.output = stderr;
....@@ -2816,14 +2034,16 @@
28162034 if (!forks)
28172035 target.system_wide = true;
28182036 else {
2819
- struct perf_evsel *counter;
2037
+ struct evsel *counter;
28202038
28212039 evlist__for_each_entry(evsel_list, counter) {
2822
- if (!counter->system_wide)
2040
+ if (!counter->core.system_wide &&
2041
+ strcmp(counter->name, "duration_time")) {
28232042 return;
2043
+ }
28242044 }
28252045
2826
- if (evsel_list->nr_entries)
2046
+ if (evsel_list->core.nr_entries)
28272047 target.system_wide = true;
28282048 }
28292049 }
....@@ -2842,7 +2062,7 @@
28422062
28432063 setlocale(LC_ALL, "");
28442064
2845
- evsel_list = perf_evlist__new();
2065
+ evsel_list = evlist__new();
28462066 if (evsel_list == NULL)
28472067 return -ENOMEM;
28482068
....@@ -2859,12 +2079,12 @@
28592079 perf_stat__collect_metric_expr(evsel_list);
28602080 perf_stat__init_shadow_stats();
28612081
2862
- if (csv_sep) {
2863
- csv_output = true;
2864
- if (!strcmp(csv_sep, "\\t"))
2865
- csv_sep = "\t";
2082
+ if (stat_config.csv_sep) {
2083
+ stat_config.csv_output = true;
2084
+ if (!strcmp(stat_config.csv_sep, "\\t"))
2085
+ stat_config.csv_sep = "\t";
28662086 } else
2867
- csv_sep = DEFAULT_SEPARATOR;
2087
+ stat_config.csv_sep = DEFAULT_SEPARATOR;
28682088
28692089 if (argc && !strncmp(argv[0], "rec", 3)) {
28702090 argc = __cmd_record(argc, argv);
....@@ -2889,17 +2109,17 @@
28892109 goto out;
28902110 }
28912111
2892
- if (metric_only && stat_config.aggr_mode == AGGR_THREAD) {
2112
+ if (stat_config.metric_only && stat_config.aggr_mode == AGGR_THREAD) {
28932113 fprintf(stderr, "--metric-only is not supported with --per-thread\n");
28942114 goto out;
28952115 }
28962116
2897
- if (metric_only && run_count > 1) {
2117
+ if (stat_config.metric_only && stat_config.run_count > 1) {
28982118 fprintf(stderr, "--metric-only is not supported with -r\n");
28992119 goto out;
29002120 }
29012121
2902
- if (walltime_run_table && run_count <= 1) {
2122
+ if (stat_config.walltime_run_table && stat_config.run_count <= 1) {
29032123 fprintf(stderr, "--table is only supported with -r\n");
29042124 parse_options_usage(stat_usage, stat_options, "r", 1);
29052125 parse_options_usage(NULL, stat_options, "table", 0);
....@@ -2937,7 +2157,7 @@
29372157 /*
29382158 * let the spreadsheet do the pretty-printing
29392159 */
2940
- if (csv_output) {
2160
+ if (stat_config.csv_output) {
29412161 /* User explicitly passed -B? */
29422162 if (big_num_opt == 1) {
29432163 fprintf(stderr, "-B option not supported with -x\n");
....@@ -2945,9 +2165,9 @@
29452165 parse_options_usage(NULL, stat_options, "x", 1);
29462166 goto out;
29472167 } else /* Nope, so disable big number formatting */
2948
- big_num = false;
2168
+ stat_config.big_num = false;
29492169 } else if (big_num_opt == 0) /* User passed --no-big-num */
2950
- big_num = false;
2170
+ stat_config.big_num = false;
29512171
29522172 setup_system_wide(argc);
29532173
....@@ -2955,21 +2175,21 @@
29552175 * Display user/system times only for single
29562176 * run and when there's specified tracee.
29572177 */
2958
- if ((run_count == 1) && target__none(&target))
2959
- ru_display = true;
2178
+ if ((stat_config.run_count == 1) && target__none(&target))
2179
+ stat_config.ru_display = true;
29602180
2961
- if (run_count < 0) {
2181
+ if (stat_config.run_count < 0) {
29622182 pr_err("Run count must be a positive number\n");
29632183 parse_options_usage(stat_usage, stat_options, "r", 1);
29642184 goto out;
2965
- } else if (run_count == 0) {
2185
+ } else if (stat_config.run_count == 0) {
29662186 forever = true;
2967
- run_count = 1;
2187
+ stat_config.run_count = 1;
29682188 }
29692189
2970
- if (walltime_run_table) {
2971
- walltime_run = zalloc(run_count * sizeof(walltime_run[0]));
2972
- if (!walltime_run) {
2190
+ if (stat_config.walltime_run_table) {
2191
+ stat_config.walltime_run = zalloc(stat_config.run_count * sizeof(stat_config.walltime_run[0]));
2192
+ if (!stat_config.walltime_run) {
29732193 pr_err("failed to setup -r option");
29742194 goto out;
29752195 }
....@@ -3006,6 +2226,19 @@
30062226 if (add_default_attributes())
30072227 goto out;
30082228
2229
+ if (stat_config.cgroup_list) {
2230
+ if (nr_cgroups > 0) {
2231
+ pr_err("--cgroup and --for-each-cgroup cannot be used together\n");
2232
+ parse_options_usage(stat_usage, stat_options, "G", 1);
2233
+ parse_options_usage(NULL, stat_options, "for-each-cgroup", 0);
2234
+ goto out;
2235
+ }
2236
+
2237
+ if (evlist__expand_cgroup(evsel_list, stat_config.cgroup_list,
2238
+ &stat_config.metric_events, true) < 0)
2239
+ goto out;
2240
+ }
2241
+
30092242 target__validate(&target);
30102243
30112244 if ((stat_config.aggr_mode == AGGR_THREAD) && (target.system_wide))
....@@ -3024,19 +2257,24 @@
30242257 goto out;
30252258 }
30262259
2260
+ evlist__check_cpu_maps(evsel_list);
2261
+
30272262 /*
30282263 * Initialize thread_map with comm names,
30292264 * so we could print it out on output.
30302265 */
30312266 if (stat_config.aggr_mode == AGGR_THREAD) {
3032
- thread_map__read_comms(evsel_list->threads);
2267
+ thread_map__read_comms(evsel_list->core.threads);
30332268 if (target.system_wide) {
30342269 if (runtime_stat_new(&stat_config,
3035
- thread_map__nr(evsel_list->threads))) {
2270
+ perf_thread_map__nr(evsel_list->core.threads))) {
30362271 goto out;
30372272 }
30382273 }
30392274 }
2275
+
2276
+ if (stat_config.aggr_mode == AGGR_NODE)
2277
+ cpu__setup_cpunode_map();
30402278
30412279 if (stat_config.times && interval)
30422280 interval_count = true;
....@@ -3072,6 +2310,17 @@
30722310 goto out;
30732311
30742312 /*
2313
+ * Set sample_type to PERF_SAMPLE_IDENTIFIER, which should be harmless
2314
+ * while avoiding that older tools show confusing messages.
2315
+ *
2316
+ * However for pipe sessions we need to keep it zero,
2317
+ * because script's perf_evsel__check_attr is triggered
2318
+ * by attr->sample_type != 0, and we can't run it on
2319
+ * stat sessions.
2320
+ */
2321
+ stat_config.identifier = !(STAT_RECORD && perf_stat.data.is_pipe);
2322
+
2323
+ /*
30752324 * We dont want to block the signals - that would cause
30762325 * child tasks to inherit that and Ctrl-C would not work.
30772326 * What we want is for Ctrl-C to work in the exec()-ed
....@@ -3084,9 +2333,12 @@
30842333 signal(SIGALRM, skip_signal);
30852334 signal(SIGABRT, skip_signal);
30862335
2336
+ if (evlist__initialize_ctlfd(evsel_list, stat_config.ctl_fd, stat_config.ctl_fd_ack))
2337
+ goto out;
2338
+
30872339 status = 0;
3088
- for (run_idx = 0; forever || run_idx < run_count; run_idx++) {
3089
- if (run_count != 1 && verbose > 0)
2340
+ for (run_idx = 0; forever || run_idx < stat_config.run_count; run_idx++) {
2341
+ if (stat_config.run_count != 1 && verbose > 0)
30902342 fprintf(output, "[ perf stat: executing run #%d ... ]\n",
30912343 run_idx + 1);
30922344
....@@ -3100,8 +2352,10 @@
31002352 }
31012353 }
31022354
3103
- if (!forever && status != -1 && !interval)
2355
+ if (!forever && status != -1 && (!interval || stat_config.summary))
31042356 print_counters(NULL, argc, argv);
2357
+
2358
+ evlist__finalize_ctlfd(evsel_list);
31052359
31062360 if (STAT_RECORD) {
31072361 /*
....@@ -3135,20 +2389,23 @@
31352389 perf_session__write_header(perf_stat.session, evsel_list, fd, true);
31362390 }
31372391
2392
+ evlist__close(evsel_list);
31382393 perf_session__delete(perf_stat.session);
31392394 }
31402395
31412396 perf_stat__exit_aggr_mode();
31422397 perf_evlist__free_stats(evsel_list);
31432398 out:
3144
- free(walltime_run);
2399
+ zfree(&stat_config.walltime_run);
31452400
31462401 if (smi_cost && smi_reset)
31472402 sysfs__write_int(FREEZE_ON_SMI_PATH, 0);
31482403
3149
- perf_evlist__delete(evsel_list);
2404
+ evlist__delete(evsel_list);
31502405
2406
+ metricgroup__rblist_exit(&stat_config.metric_events);
31512407 runtime_stat_delete(&stat_config);
2408
+ evlist__close_control(stat_config.ctl_fd, stat_config.ctl_fd_ack, &stat_config.ctl_fd_close);
31522409
31532410 return status;
31542411 }