hc
2024-05-10 37f49e37ab4cb5d0bc4c60eb5c6d4dd57db767bb
kernel/tools/perf/util/stat-shadow.c
....@@ -8,27 +8,30 @@
88 #include "evlist.h"
99 #include "expr.h"
1010 #include "metricgroup.h"
11
+#include <linux/zalloc.h>
1112
1213 /*
1314 * AGGR_GLOBAL: Use CPU 0
1415 * AGGR_SOCKET: Use first CPU of socket
16
+ * AGGR_DIE: Use first CPU of die
1517 * AGGR_CORE: Use first CPU of core
1618 * AGGR_NONE: Use matching CPU
1719 * AGGR_THREAD: Not supported?
1820 */
19
-static bool have_frontend_stalled;
2021
2122 struct runtime_stat rt_stat;
2223 struct stats walltime_nsecs_stats;
2324
2425 struct saved_value {
2526 struct rb_node rb_node;
26
- struct perf_evsel *evsel;
27
+ struct evsel *evsel;
2728 enum stat_type type;
2829 int ctx;
2930 int cpu;
3031 struct runtime_stat *stat;
3132 struct stats stats;
33
+ u64 metric_total;
34
+ int metric_other;
3235 };
3336
3437 static int saved_value_cmp(struct rb_node *rb_node, const void *entry)
....@@ -92,7 +95,7 @@
9295 free(v);
9396 }
9497
95
-static struct saved_value *saved_value_lookup(struct perf_evsel *evsel,
98
+static struct saved_value *saved_value_lookup(struct evsel *evsel,
9699 int cpu,
97100 bool create,
98101 enum stat_type type,
....@@ -140,23 +143,22 @@
140143
141144 void perf_stat__init_shadow_stats(void)
142145 {
143
- have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend");
144146 runtime_stat__init(&rt_stat);
145147 }
146148
147
-static int evsel_context(struct perf_evsel *evsel)
149
+static int evsel_context(struct evsel *evsel)
148150 {
149151 int ctx = 0;
150152
151
- if (evsel->attr.exclude_kernel)
153
+ if (evsel->core.attr.exclude_kernel)
152154 ctx |= CTX_BIT_KERNEL;
153
- if (evsel->attr.exclude_user)
155
+ if (evsel->core.attr.exclude_user)
154156 ctx |= CTX_BIT_USER;
155
- if (evsel->attr.exclude_hv)
157
+ if (evsel->core.attr.exclude_hv)
156158 ctx |= CTX_BIT_HV;
157
- if (evsel->attr.exclude_host)
159
+ if (evsel->core.attr.exclude_host)
158160 ctx |= CTX_BIT_HOST;
159
- if (evsel->attr.exclude_idle)
161
+ if (evsel->core.attr.exclude_idle)
160162 ctx |= CTX_BIT_IDLE;
161163
162164 return ctx;
....@@ -168,7 +170,7 @@
168170 struct rb_node *pos, *next;
169171
170172 rblist = &st->value_list;
171
- next = rb_first(&rblist->entries);
173
+ next = rb_first_cached(&rblist->entries);
172174 while (next) {
173175 pos = next;
174176 next = rb_next(pos);
....@@ -205,17 +207,18 @@
205207 * more semantic information such as miss/hit ratios,
206208 * instruction rates, etc:
207209 */
208
-void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count,
210
+void perf_stat__update_shadow_stats(struct evsel *counter, u64 count,
209211 int cpu, struct runtime_stat *st)
210212 {
211213 int ctx = evsel_context(counter);
212214 u64 count_ns = count;
215
+ struct saved_value *v;
213216
214217 count *= counter->scale;
215218
216
- if (perf_evsel__is_clock(counter))
219
+ if (evsel__is_clock(counter))
217220 update_runtime_stat(st, STAT_NSECS, 0, cpu, count_ns);
218
- else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
221
+ else if (evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
219222 update_runtime_stat(st, STAT_CYCLES, ctx, cpu, count);
220223 else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
221224 update_runtime_stat(st, STAT_CYCLES_IN_TX, ctx, cpu, count);
....@@ -238,25 +241,37 @@
238241 else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES))
239242 update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES,
240243 ctx, cpu, count);
241
- else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
244
+ else if (perf_stat_evsel__is(counter, TOPDOWN_RETIRING))
245
+ update_runtime_stat(st, STAT_TOPDOWN_RETIRING,
246
+ ctx, cpu, count);
247
+ else if (perf_stat_evsel__is(counter, TOPDOWN_BAD_SPEC))
248
+ update_runtime_stat(st, STAT_TOPDOWN_BAD_SPEC,
249
+ ctx, cpu, count);
250
+ else if (perf_stat_evsel__is(counter, TOPDOWN_FE_BOUND))
251
+ update_runtime_stat(st, STAT_TOPDOWN_FE_BOUND,
252
+ ctx, cpu, count);
253
+ else if (perf_stat_evsel__is(counter, TOPDOWN_BE_BOUND))
254
+ update_runtime_stat(st, STAT_TOPDOWN_BE_BOUND,
255
+ ctx, cpu, count);
256
+ else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
242257 update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT,
243258 ctx, cpu, count);
244
- else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
259
+ else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
245260 update_runtime_stat(st, STAT_STALLED_CYCLES_BACK,
246261 ctx, cpu, count);
247
- else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
262
+ else if (evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
248263 update_runtime_stat(st, STAT_BRANCHES, ctx, cpu, count);
249
- else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
264
+ else if (evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
250265 update_runtime_stat(st, STAT_CACHEREFS, ctx, cpu, count);
251
- else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
266
+ else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
252267 update_runtime_stat(st, STAT_L1_DCACHE, ctx, cpu, count);
253
- else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
268
+ else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
254269 update_runtime_stat(st, STAT_L1_ICACHE, ctx, cpu, count);
255
- else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
270
+ else if (evsel__match(counter, HW_CACHE, HW_CACHE_LL))
256271 update_runtime_stat(st, STAT_LL_CACHE, ctx, cpu, count);
257
- else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
272
+ else if (evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
258273 update_runtime_stat(st, STAT_DTLB_CACHE, ctx, cpu, count);
259
- else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
274
+ else if (evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
260275 update_runtime_stat(st, STAT_ITLB_CACHE, ctx, cpu, count);
261276 else if (perf_stat_evsel__is(counter, SMI_NUM))
262277 update_runtime_stat(st, STAT_SMI_NUM, ctx, cpu, count);
....@@ -264,9 +279,15 @@
264279 update_runtime_stat(st, STAT_APERF, ctx, cpu, count);
265280
266281 if (counter->collect_stat) {
267
- struct saved_value *v = saved_value_lookup(counter, cpu, true,
268
- STAT_NONE, 0, st);
282
+ v = saved_value_lookup(counter, cpu, true, STAT_NONE, 0, st);
269283 update_stats(&v->stats, count);
284
+ if (counter->metric_leader)
285
+ v->metric_total += count;
286
+ } else if (counter->metric_leader) {
287
+ v = saved_value_lookup(counter->metric_leader,
288
+ cpu, true, STAT_NONE, 0, st);
289
+ v->metric_total += count;
290
+ v->metric_other++;
270291 }
271292 }
272293
....@@ -297,10 +318,10 @@
297318 return color;
298319 }
299320
300
-static struct perf_evsel *perf_stat__find_event(struct perf_evlist *evsel_list,
321
+static struct evsel *perf_stat__find_event(struct evlist *evsel_list,
301322 const char *name)
302323 {
303
- struct perf_evsel *c2;
324
+ struct evsel *c2;
304325
305326 evlist__for_each_entry (evsel_list, c2) {
306327 if (!strcasecmp(c2->name, name) && !c2->collect_stat)
....@@ -310,39 +331,50 @@
310331 }
311332
312333 /* Mark MetricExpr target events and link events using them to them. */
313
-void perf_stat__collect_metric_expr(struct perf_evlist *evsel_list)
334
+void perf_stat__collect_metric_expr(struct evlist *evsel_list)
314335 {
315
- struct perf_evsel *counter, *leader, **metric_events, *oc;
336
+ struct evsel *counter, *leader, **metric_events, *oc;
316337 bool found;
317
- const char **metric_names;
338
+ struct expr_parse_ctx ctx;
339
+ struct hashmap_entry *cur;
340
+ size_t bkt;
318341 int i;
319
- int num_metric_names;
320342
343
+ expr__ctx_init(&ctx);
321344 evlist__for_each_entry(evsel_list, counter) {
322345 bool invalid = false;
323346
324347 leader = counter->leader;
325348 if (!counter->metric_expr)
326349 continue;
350
+
351
+ expr__ctx_clear(&ctx);
327352 metric_events = counter->metric_events;
328353 if (!metric_events) {
329
- if (expr__find_other(counter->metric_expr, counter->name,
330
- &metric_names, &num_metric_names) < 0)
354
+ if (expr__find_other(counter->metric_expr,
355
+ counter->name,
356
+ &ctx, 1) < 0)
331357 continue;
332358
333
- metric_events = calloc(sizeof(struct perf_evsel *),
334
- num_metric_names + 1);
335
- if (!metric_events)
359
+ metric_events = calloc(sizeof(struct evsel *),
360
+ hashmap__size(&ctx.ids) + 1);
361
+ if (!metric_events) {
362
+ expr__ctx_clear(&ctx);
336363 return;
364
+ }
337365 counter->metric_events = metric_events;
338366 }
339367
340
- for (i = 0; i < num_metric_names; i++) {
368
+ i = 0;
369
+ hashmap__for_each_entry((&ctx.ids), cur, bkt) {
370
+ const char *metric_name = (const char *)cur->key;
371
+
341372 found = false;
342373 if (leader) {
343374 /* Search in group */
344375 for_each_group_member (oc, leader) {
345
- if (!strcasecmp(oc->name, metric_names[i]) &&
376
+ if (!strcasecmp(oc->name,
377
+ metric_name) &&
346378 !oc->collect_stat) {
347379 found = true;
348380 break;
....@@ -351,7 +383,8 @@
351383 }
352384 if (!found) {
353385 /* Search ignoring groups */
354
- oc = perf_stat__find_event(evsel_list, metric_names[i]);
386
+ oc = perf_stat__find_event(evsel_list,
387
+ metric_name);
355388 }
356389 if (!oc) {
357390 /* Deduping one is good enough to handle duplicated PMUs. */
....@@ -364,27 +397,28 @@
364397 * of events. So we ask the user instead to add the missing
365398 * events.
366399 */
367
- if (!printed || strcasecmp(printed, metric_names[i])) {
400
+ if (!printed ||
401
+ strcasecmp(printed, metric_name)) {
368402 fprintf(stderr,
369403 "Add %s event to groups to get metric expression for %s\n",
370
- metric_names[i],
404
+ metric_name,
371405 counter->name);
372
- printed = strdup(metric_names[i]);
406
+ printed = strdup(metric_name);
373407 }
374408 invalid = true;
375409 continue;
376410 }
377
- metric_events[i] = oc;
411
+ metric_events[i++] = oc;
378412 oc->collect_stat = true;
379413 }
380414 metric_events[i] = NULL;
381
- free(metric_names);
382415 if (invalid) {
383416 free(metric_events);
384417 counter->metric_events = NULL;
385418 counter->metric_expr = NULL;
386419 }
387420 }
421
+ expr__ctx_clear(&ctx);
388422 }
389423
390424 static double runtime_stat_avg(struct runtime_stat *st,
....@@ -411,8 +445,9 @@
411445 return v->stats.n;
412446 }
413447
414
-static void print_stalled_cycles_frontend(int cpu,
415
- struct perf_evsel *evsel, double avg,
448
+static void print_stalled_cycles_frontend(struct perf_stat_config *config,
449
+ int cpu,
450
+ struct evsel *evsel, double avg,
416451 struct perf_stat_output_ctx *out,
417452 struct runtime_stat *st)
418453 {
....@@ -428,14 +463,15 @@
428463 color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
429464
430465 if (ratio)
431
- out->print_metric(out->ctx, color, "%7.2f%%", "frontend cycles idle",
466
+ out->print_metric(config, out->ctx, color, "%7.2f%%", "frontend cycles idle",
432467 ratio);
433468 else
434
- out->print_metric(out->ctx, NULL, NULL, "frontend cycles idle", 0);
469
+ out->print_metric(config, out->ctx, NULL, NULL, "frontend cycles idle", 0);
435470 }
436471
437
-static void print_stalled_cycles_backend(int cpu,
438
- struct perf_evsel *evsel, double avg,
472
+static void print_stalled_cycles_backend(struct perf_stat_config *config,
473
+ int cpu,
474
+ struct evsel *evsel, double avg,
439475 struct perf_stat_output_ctx *out,
440476 struct runtime_stat *st)
441477 {
....@@ -450,11 +486,12 @@
450486
451487 color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
452488
453
- out->print_metric(out->ctx, color, "%7.2f%%", "backend cycles idle", ratio);
489
+ out->print_metric(config, out->ctx, color, "%7.2f%%", "backend cycles idle", ratio);
454490 }
455491
456
-static void print_branch_misses(int cpu,
457
- struct perf_evsel *evsel,
492
+static void print_branch_misses(struct perf_stat_config *config,
493
+ int cpu,
494
+ struct evsel *evsel,
458495 double avg,
459496 struct perf_stat_output_ctx *out,
460497 struct runtime_stat *st)
....@@ -470,11 +507,12 @@
470507
471508 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
472509
473
- out->print_metric(out->ctx, color, "%7.2f%%", "of all branches", ratio);
510
+ out->print_metric(config, out->ctx, color, "%7.2f%%", "of all branches", ratio);
474511 }
475512
476
-static void print_l1_dcache_misses(int cpu,
477
- struct perf_evsel *evsel,
513
+static void print_l1_dcache_misses(struct perf_stat_config *config,
514
+ int cpu,
515
+ struct evsel *evsel,
478516 double avg,
479517 struct perf_stat_output_ctx *out,
480518 struct runtime_stat *st)
....@@ -491,11 +529,12 @@
491529
492530 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
493531
494
- out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio);
532
+ out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-dcache accesses", ratio);
495533 }
496534
497
-static void print_l1_icache_misses(int cpu,
498
- struct perf_evsel *evsel,
535
+static void print_l1_icache_misses(struct perf_stat_config *config,
536
+ int cpu,
537
+ struct evsel *evsel,
499538 double avg,
500539 struct perf_stat_output_ctx *out,
501540 struct runtime_stat *st)
....@@ -511,11 +550,12 @@
511550 ratio = avg / total * 100.0;
512551
513552 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
514
- out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio);
553
+ out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-icache accesses", ratio);
515554 }
516555
517
-static void print_dtlb_cache_misses(int cpu,
518
- struct perf_evsel *evsel,
556
+static void print_dtlb_cache_misses(struct perf_stat_config *config,
557
+ int cpu,
558
+ struct evsel *evsel,
519559 double avg,
520560 struct perf_stat_output_ctx *out,
521561 struct runtime_stat *st)
....@@ -530,11 +570,12 @@
530570 ratio = avg / total * 100.0;
531571
532572 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
533
- out->print_metric(out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio);
573
+ out->print_metric(config, out->ctx, color, "%7.2f%%", "of all dTLB cache accesses", ratio);
534574 }
535575
536
-static void print_itlb_cache_misses(int cpu,
537
- struct perf_evsel *evsel,
576
+static void print_itlb_cache_misses(struct perf_stat_config *config,
577
+ int cpu,
578
+ struct evsel *evsel,
538579 double avg,
539580 struct perf_stat_output_ctx *out,
540581 struct runtime_stat *st)
....@@ -549,11 +590,12 @@
549590 ratio = avg / total * 100.0;
550591
551592 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
552
- out->print_metric(out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio);
593
+ out->print_metric(config, out->ctx, color, "%7.2f%%", "of all iTLB cache accesses", ratio);
553594 }
554595
555
-static void print_ll_cache_misses(int cpu,
556
- struct perf_evsel *evsel,
596
+static void print_ll_cache_misses(struct perf_stat_config *config,
597
+ int cpu,
598
+ struct evsel *evsel,
557599 double avg,
558600 struct perf_stat_output_ctx *out,
559601 struct runtime_stat *st)
....@@ -568,7 +610,7 @@
568610 ratio = avg / total * 100.0;
569611
570612 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
571
- out->print_metric(out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio);
613
+ out->print_metric(config, out->ctx, color, "%7.2f%%", "of all LL-cache accesses", ratio);
572614 }
573615
574616 /*
....@@ -675,7 +717,49 @@
675717 return sanitize_val(1.0 - sum);
676718 }
677719
678
-static void print_smi_cost(int cpu, struct perf_evsel *evsel,
720
+/*
721
+ * Kernel reports metrics multiplied with slots. To get back
722
+ * the ratios we need to recreate the sum.
723
+ */
724
+
725
+static double td_metric_ratio(int ctx, int cpu,
726
+ enum stat_type type,
727
+ struct runtime_stat *stat)
728
+{
729
+ double sum = runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, ctx, cpu) +
730
+ runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, ctx, cpu) +
731
+ runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, ctx, cpu) +
732
+ runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, ctx, cpu);
733
+ double d = runtime_stat_avg(stat, type, ctx, cpu);
734
+
735
+ if (sum)
736
+ return d / sum;
737
+ return 0;
738
+}
739
+
740
+/*
741
+ * ... but only if most of the values are actually available.
742
+ * We allow two missing.
743
+ */
744
+
745
+static bool full_td(int ctx, int cpu,
746
+ struct runtime_stat *stat)
747
+{
748
+ int c = 0;
749
+
750
+ if (runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, ctx, cpu) > 0)
751
+ c++;
752
+ if (runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, ctx, cpu) > 0)
753
+ c++;
754
+ if (runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, ctx, cpu) > 0)
755
+ c++;
756
+ if (runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, ctx, cpu) > 0)
757
+ c++;
758
+ return c >= 2;
759
+}
760
+
761
+static void print_smi_cost(struct perf_stat_config *config,
762
+ int cpu, struct evsel *evsel,
679763 struct perf_stat_output_ctx *out,
680764 struct runtime_stat *st)
681765 {
....@@ -695,31 +779,25 @@
695779
696780 if (cost > 10)
697781 color = PERF_COLOR_RED;
698
- out->print_metric(out->ctx, color, "%8.1f%%", "SMI cycles%", cost);
699
- out->print_metric(out->ctx, NULL, "%4.0f", "SMI#", smi_num);
782
+ out->print_metric(config, out->ctx, color, "%8.1f%%", "SMI cycles%", cost);
783
+ out->print_metric(config, out->ctx, NULL, "%4.0f", "SMI#", smi_num);
700784 }
701785
702
-static void generic_metric(const char *metric_expr,
703
- struct perf_evsel **metric_events,
704
- char *name,
705
- const char *metric_name,
706
- double avg,
707
- int cpu,
708
- struct perf_stat_output_ctx *out,
709
- struct runtime_stat *st)
786
+static int prepare_metric(struct evsel **metric_events,
787
+ struct metric_ref *metric_refs,
788
+ struct expr_parse_ctx *pctx,
789
+ int cpu,
790
+ struct runtime_stat *st)
710791 {
711
- print_metric_t print_metric = out->print_metric;
712
- struct parse_ctx pctx;
713
- double ratio;
714
- int i;
715
- void *ctxp = out->ctx;
792
+ double scale;
793
+ char *n, *pn;
794
+ int i, j, ret;
716795
717
- expr__ctx_init(&pctx);
718
- expr__add_id(&pctx, name, avg);
796
+ expr__ctx_init(pctx);
719797 for (i = 0; metric_events[i]; i++) {
720798 struct saved_value *v;
721799 struct stats *stats;
722
- double scale;
800
+ u64 metric_total = 0;
723801
724802 if (!strcmp(metric_events[i]->name, "duration_time")) {
725803 stats = &walltime_nsecs_stats;
....@@ -731,27 +809,118 @@
731809 break;
732810 stats = &v->stats;
733811 scale = 1.0;
734
- }
735
- expr__add_id(&pctx, metric_events[i]->name, avg_stats(stats)*scale);
736
- }
737
- if (!metric_events[i]) {
738
- const char *p = metric_expr;
739812
740
- if (expr__parse(&ratio, &pctx, &p) == 0)
741
- print_metric(ctxp, NULL, "%8.1f",
742
- metric_name ?
743
- metric_name :
744
- out->force_header ? name : "",
745
- ratio);
813
+ if (v->metric_other)
814
+ metric_total = v->metric_total;
815
+ }
816
+
817
+ n = strdup(metric_events[i]->name);
818
+ if (!n)
819
+ return -ENOMEM;
820
+ /*
821
+ * This display code with --no-merge adds [cpu] postfixes.
822
+ * These are not supported by the parser. Remove everything
823
+ * after the space.
824
+ */
825
+ pn = strchr(n, ' ');
826
+ if (pn)
827
+ *pn = 0;
828
+
829
+ if (metric_total)
830
+ expr__add_id_val(pctx, n, metric_total);
746831 else
747
- print_metric(ctxp, NULL, NULL,
748
- out->force_header ?
749
- (metric_name ? metric_name : name) : "", 0);
750
- } else
751
- print_metric(ctxp, NULL, NULL, "", 0);
832
+ expr__add_id_val(pctx, n, avg_stats(stats)*scale);
833
+ }
834
+
835
+ for (j = 0; metric_refs && metric_refs[j].metric_name; j++) {
836
+ ret = expr__add_ref(pctx, &metric_refs[j]);
837
+ if (ret)
838
+ return ret;
839
+ }
840
+
841
+ return i;
752842 }
753843
754
-void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
844
+static void generic_metric(struct perf_stat_config *config,
845
+ const char *metric_expr,
846
+ struct evsel **metric_events,
847
+ struct metric_ref *metric_refs,
848
+ char *name,
849
+ const char *metric_name,
850
+ const char *metric_unit,
851
+ int runtime,
852
+ int cpu,
853
+ struct perf_stat_output_ctx *out,
854
+ struct runtime_stat *st)
855
+{
856
+ print_metric_t print_metric = out->print_metric;
857
+ struct expr_parse_ctx pctx;
858
+ double ratio, scale;
859
+ int i;
860
+ void *ctxp = out->ctx;
861
+
862
+ i = prepare_metric(metric_events, metric_refs, &pctx, cpu, st);
863
+ if (i < 0)
864
+ return;
865
+
866
+ if (!metric_events[i]) {
867
+ if (expr__parse(&ratio, &pctx, metric_expr, runtime) == 0) {
868
+ char *unit;
869
+ char metric_bf[64];
870
+
871
+ if (metric_unit && metric_name) {
872
+ if (perf_pmu__convert_scale(metric_unit,
873
+ &unit, &scale) >= 0) {
874
+ ratio *= scale;
875
+ }
876
+ if (strstr(metric_expr, "?"))
877
+ scnprintf(metric_bf, sizeof(metric_bf),
878
+ "%s %s_%d", unit, metric_name, runtime);
879
+ else
880
+ scnprintf(metric_bf, sizeof(metric_bf),
881
+ "%s %s", unit, metric_name);
882
+
883
+ print_metric(config, ctxp, NULL, "%8.1f",
884
+ metric_bf, ratio);
885
+ } else {
886
+ print_metric(config, ctxp, NULL, "%8.2f",
887
+ metric_name ?
888
+ metric_name :
889
+ out->force_header ? name : "",
890
+ ratio);
891
+ }
892
+ } else {
893
+ print_metric(config, ctxp, NULL, NULL,
894
+ out->force_header ?
895
+ (metric_name ? metric_name : name) : "", 0);
896
+ }
897
+ } else {
898
+ print_metric(config, ctxp, NULL, NULL,
899
+ out->force_header ?
900
+ (metric_name ? metric_name : name) : "", 0);
901
+ }
902
+
903
+ expr__ctx_clear(&pctx);
904
+}
905
+
906
+double test_generic_metric(struct metric_expr *mexp, int cpu, struct runtime_stat *st)
907
+{
908
+ struct expr_parse_ctx pctx;
909
+ double ratio = 0.0;
910
+
911
+ if (prepare_metric(mexp->metric_events, mexp->metric_refs, &pctx, cpu, st) < 0)
912
+ goto out;
913
+
914
+ if (expr__parse(&ratio, &pctx, mexp->metric_expr, 1))
915
+ ratio = 0.0;
916
+
917
+out:
918
+ expr__ctx_clear(&pctx);
919
+ return ratio;
920
+}
921
+
922
+void perf_stat__print_shadow_stats(struct perf_stat_config *config,
923
+ struct evsel *evsel,
755924 double avg, int cpu,
756925 struct perf_stat_output_ctx *out,
757926 struct rblist *metric_events,
....@@ -765,15 +934,15 @@
765934 struct metric_event *me;
766935 int num = 1;
767936
768
- if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
937
+ if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
769938 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
770939
771940 if (total) {
772941 ratio = avg / total;
773
- print_metric(ctxp, NULL, "%7.2f ",
942
+ print_metric(config, ctxp, NULL, "%7.2f ",
774943 "insn per cycle", ratio);
775944 } else {
776
- print_metric(ctxp, NULL, NULL, "insn per cycle", 0);
945
+ print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0);
777946 }
778947
779948 total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT,
....@@ -784,103 +953,100 @@
784953 ctx, cpu));
785954
786955 if (total && avg) {
787
- out->new_line(ctxp);
956
+ out->new_line(config, ctxp);
788957 ratio = total / avg;
789
- print_metric(ctxp, NULL, "%7.2f ",
958
+ print_metric(config, ctxp, NULL, "%7.2f ",
790959 "stalled cycles per insn",
791960 ratio);
792
- } else if (have_frontend_stalled) {
793
- print_metric(ctxp, NULL, NULL,
794
- "stalled cycles per insn", 0);
795961 }
796
- } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
962
+ } else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
797963 if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0)
798
- print_branch_misses(cpu, evsel, avg, out, st);
964
+ print_branch_misses(config, cpu, evsel, avg, out, st);
799965 else
800
- print_metric(ctxp, NULL, NULL, "of all branches", 0);
966
+ print_metric(config, ctxp, NULL, NULL, "of all branches", 0);
801967 } else if (
802
- evsel->attr.type == PERF_TYPE_HW_CACHE &&
803
- evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D |
968
+ evsel->core.attr.type == PERF_TYPE_HW_CACHE &&
969
+ evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_L1D |
804970 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
805971 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
806972
807973 if (runtime_stat_n(st, STAT_L1_DCACHE, ctx, cpu) != 0)
808
- print_l1_dcache_misses(cpu, evsel, avg, out, st);
974
+ print_l1_dcache_misses(config, cpu, evsel, avg, out, st);
809975 else
810
- print_metric(ctxp, NULL, NULL, "of all L1-dcache hits", 0);
976
+ print_metric(config, ctxp, NULL, NULL, "of all L1-dcache accesses", 0);
811977 } else if (
812
- evsel->attr.type == PERF_TYPE_HW_CACHE &&
813
- evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I |
978
+ evsel->core.attr.type == PERF_TYPE_HW_CACHE &&
979
+ evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_L1I |
814980 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
815981 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
816982
817983 if (runtime_stat_n(st, STAT_L1_ICACHE, ctx, cpu) != 0)
818
- print_l1_icache_misses(cpu, evsel, avg, out, st);
984
+ print_l1_icache_misses(config, cpu, evsel, avg, out, st);
819985 else
820
- print_metric(ctxp, NULL, NULL, "of all L1-icache hits", 0);
986
+ print_metric(config, ctxp, NULL, NULL, "of all L1-icache accesses", 0);
821987 } else if (
822
- evsel->attr.type == PERF_TYPE_HW_CACHE &&
823
- evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB |
988
+ evsel->core.attr.type == PERF_TYPE_HW_CACHE &&
989
+ evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_DTLB |
824990 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
825991 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
826992
827993 if (runtime_stat_n(st, STAT_DTLB_CACHE, ctx, cpu) != 0)
828
- print_dtlb_cache_misses(cpu, evsel, avg, out, st);
994
+ print_dtlb_cache_misses(config, cpu, evsel, avg, out, st);
829995 else
830
- print_metric(ctxp, NULL, NULL, "of all dTLB cache hits", 0);
996
+ print_metric(config, ctxp, NULL, NULL, "of all dTLB cache accesses", 0);
831997 } else if (
832
- evsel->attr.type == PERF_TYPE_HW_CACHE &&
833
- evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB |
998
+ evsel->core.attr.type == PERF_TYPE_HW_CACHE &&
999
+ evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_ITLB |
8341000 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
8351001 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
8361002
8371003 if (runtime_stat_n(st, STAT_ITLB_CACHE, ctx, cpu) != 0)
838
- print_itlb_cache_misses(cpu, evsel, avg, out, st);
1004
+ print_itlb_cache_misses(config, cpu, evsel, avg, out, st);
8391005 else
840
- print_metric(ctxp, NULL, NULL, "of all iTLB cache hits", 0);
1006
+ print_metric(config, ctxp, NULL, NULL, "of all iTLB cache accesses", 0);
8411007 } else if (
842
- evsel->attr.type == PERF_TYPE_HW_CACHE &&
843
- evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL |
1008
+ evsel->core.attr.type == PERF_TYPE_HW_CACHE &&
1009
+ evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_LL |
8441010 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
8451011 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
8461012
8471013 if (runtime_stat_n(st, STAT_LL_CACHE, ctx, cpu) != 0)
848
- print_ll_cache_misses(cpu, evsel, avg, out, st);
1014
+ print_ll_cache_misses(config, cpu, evsel, avg, out, st);
8491015 else
850
- print_metric(ctxp, NULL, NULL, "of all LL-cache hits", 0);
851
- } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) {
1016
+ print_metric(config, ctxp, NULL, NULL, "of all LL-cache accesses", 0);
1017
+ } else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) {
8521018 total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu);
8531019
8541020 if (total)
8551021 ratio = avg * 100 / total;
8561022
8571023 if (runtime_stat_n(st, STAT_CACHEREFS, ctx, cpu) != 0)
858
- print_metric(ctxp, NULL, "%8.3f %%",
1024
+ print_metric(config, ctxp, NULL, "%8.3f %%",
8591025 "of all cache refs", ratio);
8601026 else
861
- print_metric(ctxp, NULL, NULL, "of all cache refs", 0);
862
- } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
863
- print_stalled_cycles_frontend(cpu, evsel, avg, out, st);
864
- } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
865
- print_stalled_cycles_backend(cpu, evsel, avg, out, st);
866
- } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
1027
+ print_metric(config, ctxp, NULL, NULL, "of all cache refs", 0);
1028
+ } else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
1029
+ print_stalled_cycles_frontend(config, cpu, evsel, avg, out, st);
1030
+ } else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
1031
+ print_stalled_cycles_backend(config, cpu, evsel, avg, out, st);
1032
+ } else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
8671033 total = runtime_stat_avg(st, STAT_NSECS, 0, cpu);
8681034
8691035 if (total) {
8701036 ratio = avg / total;
871
- print_metric(ctxp, NULL, "%8.3f", "GHz", ratio);
1037
+ print_metric(config, ctxp, NULL, "%8.3f", "GHz", ratio);
8721038 } else {
873
- print_metric(ctxp, NULL, NULL, "Ghz", 0);
1039
+ print_metric(config, ctxp, NULL, NULL, "Ghz", 0);
8741040 }
8751041 } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
8761042 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
8771043
8781044 if (total)
879
- print_metric(ctxp, NULL,
1045
+ print_metric(config, ctxp, NULL,
8801046 "%7.2f%%", "transactional cycles",
8811047 100.0 * (avg / total));
8821048 else
883
- print_metric(ctxp, NULL, NULL, "transactional cycles",
1049
+ print_metric(config, ctxp, NULL, NULL, "transactional cycles",
8841050 0);
8851051 } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
8861052 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
....@@ -889,10 +1055,10 @@
8891055 if (total2 < avg)
8901056 total2 = avg;
8911057 if (total)
892
- print_metric(ctxp, NULL, "%7.2f%%", "aborted cycles",
1058
+ print_metric(config, ctxp, NULL, "%7.2f%%", "aborted cycles",
8931059 100.0 * ((total2-avg) / total));
8941060 else
895
- print_metric(ctxp, NULL, NULL, "aborted cycles", 0);
1061
+ print_metric(config, ctxp, NULL, NULL, "aborted cycles", 0);
8961062 } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) {
8971063 total = runtime_stat_avg(st, STAT_CYCLES_IN_TX,
8981064 ctx, cpu);
....@@ -901,10 +1067,10 @@
9011067 ratio = total / avg;
9021068
9031069 if (runtime_stat_n(st, STAT_CYCLES_IN_TX, ctx, cpu) != 0)
904
- print_metric(ctxp, NULL, "%8.0f",
1070
+ print_metric(config, ctxp, NULL, "%8.0f",
9051071 "cycles / transaction", ratio);
9061072 else
907
- print_metric(ctxp, NULL, NULL, "cycles / transaction",
1073
+ print_metric(config, ctxp, NULL, NULL, "cycles / transaction",
9081074 0);
9091075 } else if (perf_stat_evsel__is(evsel, ELISION_START)) {
9101076 total = runtime_stat_avg(st, STAT_CYCLES_IN_TX,
....@@ -913,33 +1079,33 @@
9131079 if (avg)
9141080 ratio = total / avg;
9151081
916
- print_metric(ctxp, NULL, "%8.0f", "cycles / elision", ratio);
917
- } else if (perf_evsel__is_clock(evsel)) {
1082
+ print_metric(config, ctxp, NULL, "%8.0f", "cycles / elision", ratio);
1083
+ } else if (evsel__is_clock(evsel)) {
9181084 if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0)
919
- print_metric(ctxp, NULL, "%8.3f", "CPUs utilized",
1085
+ print_metric(config, ctxp, NULL, "%8.3f", "CPUs utilized",
9201086 avg / (ratio * evsel->scale));
9211087 else
922
- print_metric(ctxp, NULL, NULL, "CPUs utilized", 0);
1088
+ print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0);
9231089 } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) {
9241090 double fe_bound = td_fe_bound(ctx, cpu, st);
9251091
9261092 if (fe_bound > 0.2)
9271093 color = PERF_COLOR_RED;
928
- print_metric(ctxp, color, "%8.1f%%", "frontend bound",
1094
+ print_metric(config, ctxp, color, "%8.1f%%", "frontend bound",
9291095 fe_bound * 100.);
9301096 } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) {
9311097 double retiring = td_retiring(ctx, cpu, st);
9321098
9331099 if (retiring > 0.7)
9341100 color = PERF_COLOR_GREEN;
935
- print_metric(ctxp, color, "%8.1f%%", "retiring",
1101
+ print_metric(config, ctxp, color, "%8.1f%%", "retiring",
9361102 retiring * 100.);
9371103 } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) {
9381104 double bad_spec = td_bad_spec(ctx, cpu, st);
9391105
9401106 if (bad_spec > 0.1)
9411107 color = PERF_COLOR_RED;
942
- print_metric(ctxp, color, "%8.1f%%", "bad speculation",
1108
+ print_metric(config, ctxp, color, "%8.1f%%", "bad speculation",
9431109 bad_spec * 100.);
9441110 } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) {
9451111 double be_bound = td_be_bound(ctx, cpu, st);
....@@ -956,13 +1122,49 @@
9561122 if (be_bound > 0.2)
9571123 color = PERF_COLOR_RED;
9581124 if (td_total_slots(ctx, cpu, st) > 0)
959
- print_metric(ctxp, color, "%8.1f%%", name,
1125
+ print_metric(config, ctxp, color, "%8.1f%%", name,
9601126 be_bound * 100.);
9611127 else
962
- print_metric(ctxp, NULL, NULL, name, 0);
1128
+ print_metric(config, ctxp, NULL, NULL, name, 0);
1129
+ } else if (perf_stat_evsel__is(evsel, TOPDOWN_RETIRING) &&
1130
+ full_td(ctx, cpu, st)) {
1131
+ double retiring = td_metric_ratio(ctx, cpu,
1132
+ STAT_TOPDOWN_RETIRING, st);
1133
+
1134
+ if (retiring > 0.7)
1135
+ color = PERF_COLOR_GREEN;
1136
+ print_metric(config, ctxp, color, "%8.1f%%", "retiring",
1137
+ retiring * 100.);
1138
+ } else if (perf_stat_evsel__is(evsel, TOPDOWN_FE_BOUND) &&
1139
+ full_td(ctx, cpu, st)) {
1140
+ double fe_bound = td_metric_ratio(ctx, cpu,
1141
+ STAT_TOPDOWN_FE_BOUND, st);
1142
+
1143
+ if (fe_bound > 0.2)
1144
+ color = PERF_COLOR_RED;
1145
+ print_metric(config, ctxp, color, "%8.1f%%", "frontend bound",
1146
+ fe_bound * 100.);
1147
+ } else if (perf_stat_evsel__is(evsel, TOPDOWN_BE_BOUND) &&
1148
+ full_td(ctx, cpu, st)) {
1149
+ double be_bound = td_metric_ratio(ctx, cpu,
1150
+ STAT_TOPDOWN_BE_BOUND, st);
1151
+
1152
+ if (be_bound > 0.2)
1153
+ color = PERF_COLOR_RED;
1154
+ print_metric(config, ctxp, color, "%8.1f%%", "backend bound",
1155
+ be_bound * 100.);
1156
+ } else if (perf_stat_evsel__is(evsel, TOPDOWN_BAD_SPEC) &&
1157
+ full_td(ctx, cpu, st)) {
1158
+ double bad_spec = td_metric_ratio(ctx, cpu,
1159
+ STAT_TOPDOWN_BAD_SPEC, st);
1160
+
1161
+ if (bad_spec > 0.1)
1162
+ color = PERF_COLOR_RED;
1163
+ print_metric(config, ctxp, color, "%8.1f%%", "bad speculation",
1164
+ bad_spec * 100.);
9631165 } else if (evsel->metric_expr) {
964
- generic_metric(evsel->metric_expr, evsel->metric_events, evsel->name,
965
- evsel->metric_name, avg, cpu, out, st);
1166
+ generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL,
1167
+ evsel->name, evsel->metric_name, NULL, 1, cpu, out, st);
9661168 } else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) {
9671169 char unit = 'M';
9681170 char unit_buf[10];
....@@ -976,9 +1178,9 @@
9761178 unit = 'K';
9771179 }
9781180 snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
979
- print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio);
1181
+ print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio);
9801182 } else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
981
- print_smi_cost(cpu, evsel, out, st);
1183
+ print_smi_cost(config, cpu, evsel, out, st);
9821184 } else {
9831185 num = 0;
9841186 }
....@@ -988,12 +1190,12 @@
9881190
9891191 list_for_each_entry (mexp, &me->head, nd) {
9901192 if (num++ > 0)
991
- out->new_line(ctxp);
992
- generic_metric(mexp->metric_expr, mexp->metric_events,
993
- evsel->name, mexp->metric_name,
994
- avg, cpu, out, st);
1193
+ out->new_line(config, ctxp);
1194
+ generic_metric(config, mexp->metric_expr, mexp->metric_events,
1195
+ mexp->metric_refs, evsel->name, mexp->metric_name,
1196
+ mexp->metric_unit, mexp->runtime, cpu, out, st);
9951197 }
9961198 }
9971199 if (num == 0)
998
- print_metric(ctxp, NULL, NULL, NULL, 0);
1200
+ print_metric(config, ctxp, NULL, NULL, NULL, 0);
9991201 }