hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/tools/perf/util/stat.c
....@@ -2,10 +2,18 @@
22 #include <errno.h>
33 #include <inttypes.h>
44 #include <math.h>
5
+#include <string.h>
6
+#include "counts.h"
7
+#include "cpumap.h"
8
+#include "debug.h"
9
+#include "header.h"
510 #include "stat.h"
11
+#include "session.h"
12
+#include "target.h"
613 #include "evlist.h"
714 #include "evsel.h"
815 #include "thread_map.h"
16
+#include <linux/zalloc.h>
917
1018 void update_stats(struct stats *stats, u64 val)
1119 {
....@@ -67,7 +75,7 @@
6775 return pct;
6876 }
6977
70
-bool __perf_evsel_stat__is(struct perf_evsel *evsel,
78
+bool __perf_evsel_stat__is(struct evsel *evsel,
7179 enum perf_stat_evsel_id id)
7280 {
7381 struct perf_stat_evsel *ps = evsel->stats;
....@@ -87,12 +95,16 @@
8795 ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired),
8896 ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles),
8997 ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles),
98
+ ID(TOPDOWN_RETIRING, topdown-retiring),
99
+ ID(TOPDOWN_BAD_SPEC, topdown-bad-spec),
100
+ ID(TOPDOWN_FE_BOUND, topdown-fe-bound),
101
+ ID(TOPDOWN_BE_BOUND, topdown-be-bound),
90102 ID(SMI_NUM, msr/smi/),
91103 ID(APERF, msr/aperf/),
92104 };
93105 #undef ID
94106
95
-static void perf_stat_evsel_id_init(struct perf_evsel *evsel)
107
+static void perf_stat_evsel_id_init(struct evsel *evsel)
96108 {
97109 struct perf_stat_evsel *ps = evsel->stats;
98110 int i;
....@@ -100,14 +112,14 @@
100112 /* ps->id is 0 hence PERF_STAT_EVSEL_ID__NONE by default */
101113
102114 for (i = 0; i < PERF_STAT_EVSEL_ID__MAX; i++) {
103
- if (!strcmp(perf_evsel__name(evsel), id_str[i])) {
115
+ if (!strcmp(evsel__name(evsel), id_str[i])) {
104116 ps->id = i;
105117 break;
106118 }
107119 }
108120 }
109121
110
-static void perf_evsel__reset_stat_priv(struct perf_evsel *evsel)
122
+static void evsel__reset_stat_priv(struct evsel *evsel)
111123 {
112124 int i;
113125 struct perf_stat_evsel *ps = evsel->stats;
....@@ -118,26 +130,25 @@
118130 perf_stat_evsel_id_init(evsel);
119131 }
120132
121
-static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
133
+static int evsel__alloc_stat_priv(struct evsel *evsel)
122134 {
123135 evsel->stats = zalloc(sizeof(struct perf_stat_evsel));
124136 if (evsel->stats == NULL)
125137 return -ENOMEM;
126
- perf_evsel__reset_stat_priv(evsel);
138
+ evsel__reset_stat_priv(evsel);
127139 return 0;
128140 }
129141
130
-static void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
142
+static void evsel__free_stat_priv(struct evsel *evsel)
131143 {
132144 struct perf_stat_evsel *ps = evsel->stats;
133145
134146 if (ps)
135
- free(ps->group_data);
147
+ zfree(&ps->group_data);
136148 zfree(&evsel->stats);
137149 }
138150
139
-static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel,
140
- int ncpus, int nthreads)
151
+static int evsel__alloc_prev_raw_counts(struct evsel *evsel, int ncpus, int nthreads)
141152 {
142153 struct perf_counts *counts;
143154
....@@ -148,40 +159,37 @@
148159 return counts ? 0 : -ENOMEM;
149160 }
150161
151
-static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel)
162
+static void evsel__free_prev_raw_counts(struct evsel *evsel)
152163 {
153164 perf_counts__delete(evsel->prev_raw_counts);
154165 evsel->prev_raw_counts = NULL;
155166 }
156167
157
-static void perf_evsel__reset_prev_raw_counts(struct perf_evsel *evsel)
168
+static void evsel__reset_prev_raw_counts(struct evsel *evsel)
158169 {
159
- if (evsel->prev_raw_counts) {
160
- evsel->prev_raw_counts->aggr.val = 0;
161
- evsel->prev_raw_counts->aggr.ena = 0;
162
- evsel->prev_raw_counts->aggr.run = 0;
163
- }
170
+ if (evsel->prev_raw_counts)
171
+ perf_counts__reset(evsel->prev_raw_counts);
164172 }
165173
166
-static int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw)
174
+static int evsel__alloc_stats(struct evsel *evsel, bool alloc_raw)
167175 {
168
- int ncpus = perf_evsel__nr_cpus(evsel);
169
- int nthreads = thread_map__nr(evsel->threads);
176
+ int ncpus = evsel__nr_cpus(evsel);
177
+ int nthreads = perf_thread_map__nr(evsel->core.threads);
170178
171
- if (perf_evsel__alloc_stat_priv(evsel) < 0 ||
172
- perf_evsel__alloc_counts(evsel, ncpus, nthreads) < 0 ||
173
- (alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel, ncpus, nthreads) < 0))
179
+ if (evsel__alloc_stat_priv(evsel) < 0 ||
180
+ evsel__alloc_counts(evsel, ncpus, nthreads) < 0 ||
181
+ (alloc_raw && evsel__alloc_prev_raw_counts(evsel, ncpus, nthreads) < 0))
174182 return -ENOMEM;
175183
176184 return 0;
177185 }
178186
179
-int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw)
187
+int perf_evlist__alloc_stats(struct evlist *evlist, bool alloc_raw)
180188 {
181
- struct perf_evsel *evsel;
189
+ struct evsel *evsel;
182190
183191 evlist__for_each_entry(evlist, evsel) {
184
- if (perf_evsel__alloc_stats(evsel, alloc_raw))
192
+ if (evsel__alloc_stats(evsel, alloc_raw))
185193 goto out_free;
186194 }
187195
....@@ -192,46 +200,90 @@
192200 return -1;
193201 }
194202
195
-void perf_evlist__free_stats(struct perf_evlist *evlist)
203
+void perf_evlist__free_stats(struct evlist *evlist)
196204 {
197
- struct perf_evsel *evsel;
205
+ struct evsel *evsel;
198206
199207 evlist__for_each_entry(evlist, evsel) {
200
- perf_evsel__free_stat_priv(evsel);
201
- perf_evsel__free_counts(evsel);
202
- perf_evsel__free_prev_raw_counts(evsel);
208
+ evsel__free_stat_priv(evsel);
209
+ evsel__free_counts(evsel);
210
+ evsel__free_prev_raw_counts(evsel);
203211 }
204212 }
205213
206
-void perf_evlist__reset_stats(struct perf_evlist *evlist)
214
+void perf_evlist__reset_stats(struct evlist *evlist)
207215 {
208
- struct perf_evsel *evsel;
216
+ struct evsel *evsel;
209217
210218 evlist__for_each_entry(evlist, evsel) {
211
- perf_evsel__reset_stat_priv(evsel);
212
- perf_evsel__reset_counts(evsel);
219
+ evsel__reset_stat_priv(evsel);
220
+ evsel__reset_counts(evsel);
213221 }
214222 }
215223
216
-void perf_evlist__reset_prev_raw_counts(struct perf_evlist *evlist)
224
+void perf_evlist__reset_prev_raw_counts(struct evlist *evlist)
217225 {
218
- struct perf_evsel *evsel;
226
+ struct evsel *evsel;
219227
220228 evlist__for_each_entry(evlist, evsel)
221
- perf_evsel__reset_prev_raw_counts(evsel);
229
+ evsel__reset_prev_raw_counts(evsel);
222230 }
223231
224
-static void zero_per_pkg(struct perf_evsel *counter)
232
+static void perf_evsel__copy_prev_raw_counts(struct evsel *evsel)
233
+{
234
+ int ncpus = evsel__nr_cpus(evsel);
235
+ int nthreads = perf_thread_map__nr(evsel->core.threads);
236
+
237
+ for (int thread = 0; thread < nthreads; thread++) {
238
+ for (int cpu = 0; cpu < ncpus; cpu++) {
239
+ *perf_counts(evsel->counts, cpu, thread) =
240
+ *perf_counts(evsel->prev_raw_counts, cpu,
241
+ thread);
242
+ }
243
+ }
244
+
245
+ evsel->counts->aggr = evsel->prev_raw_counts->aggr;
246
+}
247
+
248
+void perf_evlist__copy_prev_raw_counts(struct evlist *evlist)
249
+{
250
+ struct evsel *evsel;
251
+
252
+ evlist__for_each_entry(evlist, evsel)
253
+ perf_evsel__copy_prev_raw_counts(evsel);
254
+}
255
+
256
+void perf_evlist__save_aggr_prev_raw_counts(struct evlist *evlist)
257
+{
258
+ struct evsel *evsel;
259
+
260
+ /*
261
+ * To collect the overall statistics for interval mode,
262
+ * we copy the counts from evsel->prev_raw_counts to
263
+ * evsel->counts. The perf_stat_process_counter creates
264
+ * aggr values from per cpu values, but the per cpu values
265
+ * are 0 for AGGR_GLOBAL. So we use a trick that saves the
266
+ * previous aggr value to the first member of perf_counts,
267
+ * then aggr calculation in process_counter_values can work
268
+ * correctly.
269
+ */
270
+ evlist__for_each_entry(evlist, evsel) {
271
+ *perf_counts(evsel->prev_raw_counts, 0, 0) =
272
+ evsel->prev_raw_counts->aggr;
273
+ }
274
+}
275
+
276
+static void zero_per_pkg(struct evsel *counter)
225277 {
226278 if (counter->per_pkg_mask)
227
- memset(counter->per_pkg_mask, 0, MAX_NR_CPUS);
279
+ memset(counter->per_pkg_mask, 0, cpu__max_cpu());
228280 }
229281
230
-static int check_per_pkg(struct perf_evsel *counter,
282
+static int check_per_pkg(struct evsel *counter,
231283 struct perf_counts_values *vals, int cpu, bool *skip)
232284 {
233285 unsigned long *mask = counter->per_pkg_mask;
234
- struct cpu_map *cpus = perf_evsel__cpus(counter);
286
+ struct perf_cpu_map *cpus = evsel__cpus(counter);
235287 int s;
236288
237289 *skip = false;
....@@ -239,11 +291,11 @@
239291 if (!counter->per_pkg)
240292 return 0;
241293
242
- if (cpu_map__empty(cpus))
294
+ if (perf_cpu_map__empty(cpus))
243295 return 0;
244296
245297 if (!mask) {
246
- mask = zalloc(MAX_NR_CPUS);
298
+ mask = zalloc(cpu__max_cpu());
247299 if (!mask)
248300 return -ENOMEM;
249301
....@@ -270,7 +322,7 @@
270322 }
271323
272324 static int
273
-process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel,
325
+process_counter_values(struct perf_stat_config *config, struct evsel *evsel,
274326 int cpu, int thread,
275327 struct perf_counts_values *count)
276328 {
....@@ -289,14 +341,18 @@
289341 switch (config->aggr_mode) {
290342 case AGGR_THREAD:
291343 case AGGR_CORE:
344
+ case AGGR_DIE:
292345 case AGGR_SOCKET:
346
+ case AGGR_NODE:
293347 case AGGR_NONE:
294348 if (!evsel->snapshot)
295
- perf_evsel__compute_deltas(evsel, cpu, thread, count);
349
+ evsel__compute_deltas(evsel, cpu, thread, count);
296350 perf_counts_values__scale(count, config->scale, NULL);
297
- if (config->aggr_mode == AGGR_NONE)
298
- perf_stat__update_shadow_stats(evsel, count->val, cpu,
299
- &rt_stat);
351
+ if ((config->aggr_mode == AGGR_NONE) && (!evsel->percore)) {
352
+ perf_stat__update_shadow_stats(evsel, count->val,
353
+ cpu, &rt_stat);
354
+ }
355
+
300356 if (config->aggr_mode == AGGR_THREAD) {
301357 if (config->stats)
302358 perf_stat__update_shadow_stats(evsel,
....@@ -308,10 +364,8 @@
308364 break;
309365 case AGGR_GLOBAL:
310366 aggr->val += count->val;
311
- if (config->scale) {
312
- aggr->ena += count->ena;
313
- aggr->run += count->run;
314
- }
367
+ aggr->ena += count->ena;
368
+ aggr->run += count->run;
315369 case AGGR_UNSET:
316370 default:
317371 break;
....@@ -321,13 +375,13 @@
321375 }
322376
323377 static int process_counter_maps(struct perf_stat_config *config,
324
- struct perf_evsel *counter)
378
+ struct evsel *counter)
325379 {
326
- int nthreads = thread_map__nr(counter->threads);
327
- int ncpus = perf_evsel__nr_cpus(counter);
380
+ int nthreads = perf_thread_map__nr(counter->core.threads);
381
+ int ncpus = evsel__nr_cpus(counter);
328382 int cpu, thread;
329383
330
- if (counter->system_wide)
384
+ if (counter->core.system_wide)
331385 nthreads = 1;
332386
333387 for (thread = 0; thread < nthreads; thread++) {
....@@ -342,7 +396,7 @@
342396 }
343397
344398 int perf_stat_process_counter(struct perf_stat_config *config,
345
- struct perf_evsel *counter)
399
+ struct evsel *counter)
346400 {
347401 struct perf_counts_values *aggr = &counter->counts->aggr;
348402 struct perf_stat_evsel *ps = counter->stats;
....@@ -358,7 +412,7 @@
358412 * interval mode, otherwise overall avg running
359413 * averages will be shown for each interval.
360414 */
361
- if (config->interval) {
415
+ if (config->interval || config->summary) {
362416 for (i = 0; i < 3; i++)
363417 init_stats(&ps->res_stats[i]);
364418 }
....@@ -374,7 +428,7 @@
374428 return 0;
375429
376430 if (!counter->snapshot)
377
- perf_evsel__compute_deltas(counter, -1, -1, aggr);
431
+ evsel__compute_deltas(counter, -1, -1, aggr);
378432 perf_counts_values__scale(aggr, config->scale, &counter->counts->scaled);
379433
380434 for (i = 0; i < 3; i++)
....@@ -382,7 +436,7 @@
382436
383437 if (verbose > 0) {
384438 fprintf(config->output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
385
- perf_evsel__name(counter), count[0], count[1], count[2]);
439
+ evsel__name(counter), count[0], count[1], count[2]);
386440 }
387441
388442 /*
....@@ -393,13 +447,12 @@
393447 return 0;
394448 }
395449
396
-int perf_event__process_stat_event(struct perf_tool *tool __maybe_unused,
397
- union perf_event *event,
398
- struct perf_session *session)
450
+int perf_event__process_stat_event(struct perf_session *session,
451
+ union perf_event *event)
399452 {
400453 struct perf_counts_values count;
401
- struct stat_event *st = &event->stat;
402
- struct perf_evsel *counter;
454
+ struct perf_record_stat *st = &event->stat;
455
+ struct evsel *counter;
403456
404457 count.val = st->val;
405458 count.ena = st->ena;
....@@ -418,12 +471,12 @@
418471
419472 size_t perf_event__fprintf_stat(union perf_event *event, FILE *fp)
420473 {
421
- struct stat_event *st = (struct stat_event *) event;
474
+ struct perf_record_stat *st = (struct perf_record_stat *)event;
422475 size_t ret;
423476
424
- ret = fprintf(fp, "\n... id %" PRIu64 ", cpu %d, thread %d\n",
477
+ ret = fprintf(fp, "\n... id %" PRI_lu64 ", cpu %d, thread %d\n",
425478 st->id, st->cpu, st->thread);
426
- ret += fprintf(fp, "... value %" PRIu64 ", enabled %" PRIu64 ", running %" PRIu64 "\n",
479
+ ret += fprintf(fp, "... value %" PRI_lu64 ", enabled %" PRI_lu64 ", running %" PRI_lu64 "\n",
427480 st->val, st->ena, st->run);
428481
429482 return ret;
....@@ -431,10 +484,10 @@
431484
432485 size_t perf_event__fprintf_stat_round(union perf_event *event, FILE *fp)
433486 {
434
- struct stat_round_event *rd = (struct stat_round_event *)event;
487
+ struct perf_record_stat_round *rd = (struct perf_record_stat_round *)event;
435488 size_t ret;
436489
437
- ret = fprintf(fp, "\n... time %" PRIu64 ", type %s\n", rd->time,
490
+ ret = fprintf(fp, "\n... time %" PRI_lu64 ", type %s\n", rd->time,
438491 rd->type == PERF_STAT_ROUND_TYPE__FINAL ? "FINAL" : "INTERVAL");
439492
440493 return ret;
....@@ -454,3 +507,65 @@
454507
455508 return ret;
456509 }
510
+
511
+int create_perf_stat_counter(struct evsel *evsel,
512
+ struct perf_stat_config *config,
513
+ struct target *target,
514
+ int cpu)
515
+{
516
+ struct perf_event_attr *attr = &evsel->core.attr;
517
+ struct evsel *leader = evsel->leader;
518
+
519
+ attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
520
+ PERF_FORMAT_TOTAL_TIME_RUNNING;
521
+
522
+ /*
523
+ * The event is part of non trivial group, let's enable
524
+ * the group read (for leader) and ID retrieval for all
525
+ * members.
526
+ */
527
+ if (leader->core.nr_members > 1)
528
+ attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP;
529
+
530
+ attr->inherit = !config->no_inherit;
531
+
532
+ /*
533
+ * Some events get initialized with sample_(period/type) set,
534
+ * like tracepoints. Clear it up for counting.
535
+ */
536
+ attr->sample_period = 0;
537
+
538
+ if (config->identifier)
539
+ attr->sample_type = PERF_SAMPLE_IDENTIFIER;
540
+
541
+ if (config->all_user) {
542
+ attr->exclude_kernel = 1;
543
+ attr->exclude_user = 0;
544
+ }
545
+
546
+ if (config->all_kernel) {
547
+ attr->exclude_kernel = 0;
548
+ attr->exclude_user = 1;
549
+ }
550
+
551
+ /*
552
+ * Disabling all counters initially, they will be enabled
553
+ * either manually by us or by kernel via enable_on_exec
554
+ * set later.
555
+ */
556
+ if (evsel__is_group_leader(evsel)) {
557
+ attr->disabled = 1;
558
+
559
+ /*
560
+ * In case of initial_delay we enable tracee
561
+ * events manually.
562
+ */
563
+ if (target__none(target) && !config->initial_delay)
564
+ attr->enable_on_exec = 1;
565
+ }
566
+
567
+ if (target__has_cpu(target) && !target__has_per_thread(target))
568
+ return evsel__open_per_cpu(evsel, evsel__cpus(evsel), cpu);
569
+
570
+ return evsel__open_per_thread(evsel, evsel->core.threads);
571
+}