.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
1 | 2 | /* |
---|
2 | 3 | * intel_pt.c: Intel Processor Trace support |
---|
3 | 4 | * Copyright (c) 2013-2015, Intel Corporation. |
---|
4 | | - * |
---|
5 | | - * This program is free software; you can redistribute it and/or modify it |
---|
6 | | - * under the terms and conditions of the GNU General Public License, |
---|
7 | | - * version 2, as published by the Free Software Foundation. |
---|
8 | | - * |
---|
9 | | - * This program is distributed in the hope it will be useful, but WITHOUT |
---|
10 | | - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
---|
11 | | - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
---|
12 | | - * more details. |
---|
13 | | - * |
---|
14 | 5 | */ |
---|
15 | 6 | |
---|
16 | 7 | #include <inttypes.h> |
---|
.. | .. |
---|
18 | 9 | #include <stdbool.h> |
---|
19 | 10 | #include <errno.h> |
---|
20 | 11 | #include <linux/kernel.h> |
---|
| 12 | +#include <linux/string.h> |
---|
21 | 13 | #include <linux/types.h> |
---|
| 14 | +#include <linux/zalloc.h> |
---|
22 | 15 | |
---|
23 | | -#include "../perf.h" |
---|
24 | 16 | #include "session.h" |
---|
25 | 17 | #include "machine.h" |
---|
26 | 18 | #include "memswap.h" |
---|
.. | .. |
---|
31 | 23 | #include "evsel.h" |
---|
32 | 24 | #include "map.h" |
---|
33 | 25 | #include "color.h" |
---|
34 | | -#include "util.h" |
---|
35 | 26 | #include "thread.h" |
---|
36 | 27 | #include "thread-stack.h" |
---|
37 | 28 | #include "symbol.h" |
---|
.. | .. |
---|
42 | 33 | #include "tsc.h" |
---|
43 | 34 | #include "intel-pt.h" |
---|
44 | 35 | #include "config.h" |
---|
| 36 | +#include "util/perf_api_probe.h" |
---|
| 37 | +#include "util/synthetic-events.h" |
---|
| 38 | +#include "time-utils.h" |
---|
| 39 | + |
---|
| 40 | +#include "../arch/x86/include/uapi/asm/perf_regs.h" |
---|
45 | 41 | |
---|
46 | 42 | #include "intel-pt-decoder/intel-pt-log.h" |
---|
47 | 43 | #include "intel-pt-decoder/intel-pt-decoder.h" |
---|
.. | .. |
---|
50 | 46 | |
---|
51 | 47 | #define MAX_TIMESTAMP (~0ULL) |
---|
52 | 48 | |
---|
| 49 | +struct range { |
---|
| 50 | + u64 start; |
---|
| 51 | + u64 end; |
---|
| 52 | +}; |
---|
| 53 | + |
---|
53 | 54 | struct intel_pt { |
---|
54 | 55 | struct auxtrace auxtrace; |
---|
55 | 56 | struct auxtrace_queues queues; |
---|
.. | .. |
---|
57 | 58 | u32 auxtrace_type; |
---|
58 | 59 | struct perf_session *session; |
---|
59 | 60 | struct machine *machine; |
---|
60 | | - struct perf_evsel *switch_evsel; |
---|
| 61 | + struct evsel *switch_evsel; |
---|
61 | 62 | struct thread *unknown_thread; |
---|
62 | 63 | bool timeless_decoding; |
---|
63 | 64 | bool sampling_mode; |
---|
.. | .. |
---|
68 | 69 | bool est_tsc; |
---|
69 | 70 | bool sync_switch; |
---|
70 | 71 | bool mispred_all; |
---|
| 72 | + bool use_thread_stack; |
---|
| 73 | + bool callstack; |
---|
| 74 | + unsigned int br_stack_sz; |
---|
| 75 | + unsigned int br_stack_sz_plus; |
---|
71 | 76 | int have_sched_switch; |
---|
72 | 77 | u32 pmu_type; |
---|
73 | 78 | u64 kernel_start; |
---|
.. | .. |
---|
104 | 109 | u64 pwrx_id; |
---|
105 | 110 | u64 cbr_id; |
---|
106 | 111 | |
---|
| 112 | + bool sample_pebs; |
---|
| 113 | + struct evsel *pebs_evsel; |
---|
| 114 | + |
---|
107 | 115 | u64 tsc_bit; |
---|
108 | 116 | u64 mtc_bit; |
---|
109 | 117 | u64 mtc_freq_bits; |
---|
.. | .. |
---|
118 | 126 | |
---|
119 | 127 | char *filter; |
---|
120 | 128 | struct addr_filters filts; |
---|
| 129 | + |
---|
| 130 | + struct range *time_ranges; |
---|
| 131 | + unsigned int range_cnt; |
---|
| 132 | + |
---|
| 133 | + struct ip_callchain *chain; |
---|
| 134 | + struct branch_stack *br_stack; |
---|
121 | 135 | }; |
---|
122 | 136 | |
---|
123 | 137 | enum switch_state { |
---|
.. | .. |
---|
137 | 151 | const struct intel_pt_state *state; |
---|
138 | 152 | struct ip_callchain *chain; |
---|
139 | 153 | struct branch_stack *last_branch; |
---|
140 | | - struct branch_stack *last_branch_rb; |
---|
141 | | - size_t last_branch_pos; |
---|
142 | 154 | union perf_event *event_buf; |
---|
143 | 155 | bool on_heap; |
---|
144 | 156 | bool stop; |
---|
.. | .. |
---|
154 | 166 | bool have_sample; |
---|
155 | 167 | u64 time; |
---|
156 | 168 | u64 timestamp; |
---|
| 169 | + u64 sel_timestamp; |
---|
| 170 | + bool sel_start; |
---|
| 171 | + unsigned int sel_idx; |
---|
157 | 172 | u32 flags; |
---|
158 | 173 | u16 insn_len; |
---|
159 | 174 | u64 last_insn_cnt; |
---|
| 175 | + u64 ipc_insn_cnt; |
---|
| 176 | + u64 ipc_cyc_cnt; |
---|
| 177 | + u64 last_in_insn_cnt; |
---|
| 178 | + u64 last_in_cyc_cnt; |
---|
| 179 | + u64 last_br_insn_cnt; |
---|
| 180 | + u64 last_br_cyc_cnt; |
---|
| 181 | + unsigned int cbr_seen; |
---|
160 | 182 | char insn[INTEL_PT_INSN_BUF_SZ]; |
---|
161 | 183 | }; |
---|
162 | 184 | |
---|
.. | .. |
---|
168 | 190 | int ret, pkt_len, i; |
---|
169 | 191 | char desc[INTEL_PT_PKT_DESC_MAX]; |
---|
170 | 192 | const char *color = PERF_COLOR_BLUE; |
---|
| 193 | + enum intel_pt_pkt_ctx ctx = INTEL_PT_NO_CTX; |
---|
171 | 194 | |
---|
172 | 195 | color_fprintf(stdout, color, |
---|
173 | 196 | ". ... Intel Processor Trace data: size %zu bytes\n", |
---|
174 | 197 | len); |
---|
175 | 198 | |
---|
176 | 199 | while (len) { |
---|
177 | | - ret = intel_pt_get_packet(buf, len, &packet); |
---|
| 200 | + ret = intel_pt_get_packet(buf, len, &packet, &ctx); |
---|
178 | 201 | if (ret > 0) |
---|
179 | 202 | pkt_len = ret; |
---|
180 | 203 | else |
---|
.. | .. |
---|
206 | 229 | intel_pt_dump(pt, buf, len); |
---|
207 | 230 | } |
---|
208 | 231 | |
---|
| 232 | +static void intel_pt_log_event(union perf_event *event) |
---|
| 233 | +{ |
---|
| 234 | + FILE *f = intel_pt_log_fp(); |
---|
| 235 | + |
---|
| 236 | + if (!intel_pt_enable_logging || !f) |
---|
| 237 | + return; |
---|
| 238 | + |
---|
| 239 | + perf_event__fprintf(event, NULL, f); |
---|
| 240 | +} |
---|
| 241 | + |
---|
| 242 | +static void intel_pt_dump_sample(struct perf_session *session, |
---|
| 243 | + struct perf_sample *sample) |
---|
| 244 | +{ |
---|
| 245 | + struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, |
---|
| 246 | + auxtrace); |
---|
| 247 | + |
---|
| 248 | + printf("\n"); |
---|
| 249 | + intel_pt_dump(pt, sample->aux_sample.data, sample->aux_sample.size); |
---|
| 250 | +} |
---|
| 251 | + |
---|
| 252 | +static bool intel_pt_log_events(struct intel_pt *pt, u64 tm) |
---|
| 253 | +{ |
---|
| 254 | + struct perf_time_interval *range = pt->synth_opts.ptime_range; |
---|
| 255 | + int n = pt->synth_opts.range_num; |
---|
| 256 | + |
---|
| 257 | + if (pt->synth_opts.log_plus_flags & AUXTRACE_LOG_FLG_ALL_PERF_EVTS) |
---|
| 258 | + return true; |
---|
| 259 | + |
---|
| 260 | + if (pt->synth_opts.log_minus_flags & AUXTRACE_LOG_FLG_ALL_PERF_EVTS) |
---|
| 261 | + return false; |
---|
| 262 | + |
---|
| 263 | + /* perf_time__ranges_skip_sample does not work if time is zero */ |
---|
| 264 | + if (!tm) |
---|
| 265 | + tm = 1; |
---|
| 266 | + |
---|
| 267 | + return !n || !perf_time__ranges_skip_sample(range, n, tm); |
---|
| 268 | +} |
---|
| 269 | + |
---|
209 | 270 | static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a, |
---|
210 | 271 | struct auxtrace_buffer *b) |
---|
211 | 272 | { |
---|
.. | .. |
---|
223 | 284 | return 0; |
---|
224 | 285 | } |
---|
225 | 286 | |
---|
226 | | -/* This function assumes data is processed sequentially only */ |
---|
227 | | -static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data) |
---|
| 287 | +static int intel_pt_get_buffer(struct intel_pt_queue *ptq, |
---|
| 288 | + struct auxtrace_buffer *buffer, |
---|
| 289 | + struct auxtrace_buffer *old_buffer, |
---|
| 290 | + struct intel_pt_buffer *b) |
---|
228 | 291 | { |
---|
229 | | - struct intel_pt_queue *ptq = data; |
---|
230 | | - struct auxtrace_buffer *buffer = ptq->buffer; |
---|
231 | | - struct auxtrace_buffer *old_buffer = ptq->old_buffer; |
---|
232 | | - struct auxtrace_queue *queue; |
---|
233 | 292 | bool might_overlap; |
---|
234 | | - |
---|
235 | | - if (ptq->stop) { |
---|
236 | | - b->len = 0; |
---|
237 | | - return 0; |
---|
238 | | - } |
---|
239 | | - |
---|
240 | | - queue = &ptq->pt->queues.queue_array[ptq->queue_nr]; |
---|
241 | | - |
---|
242 | | - buffer = auxtrace_buffer__next(queue, buffer); |
---|
243 | | - if (!buffer) { |
---|
244 | | - if (old_buffer) |
---|
245 | | - auxtrace_buffer__drop_data(old_buffer); |
---|
246 | | - b->len = 0; |
---|
247 | | - return 0; |
---|
248 | | - } |
---|
249 | | - |
---|
250 | | - ptq->buffer = buffer; |
---|
251 | 293 | |
---|
252 | 294 | if (!buffer->data) { |
---|
253 | 295 | int fd = perf_data__fd(ptq->pt->session->data); |
---|
.. | .. |
---|
277 | 319 | } else { |
---|
278 | 320 | b->consecutive = true; |
---|
279 | 321 | } |
---|
| 322 | + |
---|
| 323 | + return 0; |
---|
| 324 | +} |
---|
| 325 | + |
---|
| 326 | +/* Do not drop buffers with references - refer intel_pt_get_trace() */ |
---|
| 327 | +static void intel_pt_lookahead_drop_buffer(struct intel_pt_queue *ptq, |
---|
| 328 | + struct auxtrace_buffer *buffer) |
---|
| 329 | +{ |
---|
| 330 | + if (!buffer || buffer == ptq->buffer || buffer == ptq->old_buffer) |
---|
| 331 | + return; |
---|
| 332 | + |
---|
| 333 | + auxtrace_buffer__drop_data(buffer); |
---|
| 334 | +} |
---|
| 335 | + |
---|
| 336 | +/* Must be serialized with respect to intel_pt_get_trace() */ |
---|
| 337 | +static int intel_pt_lookahead(void *data, intel_pt_lookahead_cb_t cb, |
---|
| 338 | + void *cb_data) |
---|
| 339 | +{ |
---|
| 340 | + struct intel_pt_queue *ptq = data; |
---|
| 341 | + struct auxtrace_buffer *buffer = ptq->buffer; |
---|
| 342 | + struct auxtrace_buffer *old_buffer = ptq->old_buffer; |
---|
| 343 | + struct auxtrace_queue *queue; |
---|
| 344 | + int err = 0; |
---|
| 345 | + |
---|
| 346 | + queue = &ptq->pt->queues.queue_array[ptq->queue_nr]; |
---|
| 347 | + |
---|
| 348 | + while (1) { |
---|
| 349 | + struct intel_pt_buffer b = { .len = 0 }; |
---|
| 350 | + |
---|
| 351 | + buffer = auxtrace_buffer__next(queue, buffer); |
---|
| 352 | + if (!buffer) |
---|
| 353 | + break; |
---|
| 354 | + |
---|
| 355 | + err = intel_pt_get_buffer(ptq, buffer, old_buffer, &b); |
---|
| 356 | + if (err) |
---|
| 357 | + break; |
---|
| 358 | + |
---|
| 359 | + if (b.len) { |
---|
| 360 | + intel_pt_lookahead_drop_buffer(ptq, old_buffer); |
---|
| 361 | + old_buffer = buffer; |
---|
| 362 | + } else { |
---|
| 363 | + intel_pt_lookahead_drop_buffer(ptq, buffer); |
---|
| 364 | + continue; |
---|
| 365 | + } |
---|
| 366 | + |
---|
| 367 | + err = cb(&b, cb_data); |
---|
| 368 | + if (err) |
---|
| 369 | + break; |
---|
| 370 | + } |
---|
| 371 | + |
---|
| 372 | + if (buffer != old_buffer) |
---|
| 373 | + intel_pt_lookahead_drop_buffer(ptq, buffer); |
---|
| 374 | + intel_pt_lookahead_drop_buffer(ptq, old_buffer); |
---|
| 375 | + |
---|
| 376 | + return err; |
---|
| 377 | +} |
---|
| 378 | + |
---|
| 379 | +/* |
---|
| 380 | + * This function assumes data is processed sequentially only. |
---|
| 381 | + * Must be serialized with respect to intel_pt_lookahead() |
---|
| 382 | + */ |
---|
| 383 | +static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data) |
---|
| 384 | +{ |
---|
| 385 | + struct intel_pt_queue *ptq = data; |
---|
| 386 | + struct auxtrace_buffer *buffer = ptq->buffer; |
---|
| 387 | + struct auxtrace_buffer *old_buffer = ptq->old_buffer; |
---|
| 388 | + struct auxtrace_queue *queue; |
---|
| 389 | + int err; |
---|
| 390 | + |
---|
| 391 | + if (ptq->stop) { |
---|
| 392 | + b->len = 0; |
---|
| 393 | + return 0; |
---|
| 394 | + } |
---|
| 395 | + |
---|
| 396 | + queue = &ptq->pt->queues.queue_array[ptq->queue_nr]; |
---|
| 397 | + |
---|
| 398 | + buffer = auxtrace_buffer__next(queue, buffer); |
---|
| 399 | + if (!buffer) { |
---|
| 400 | + if (old_buffer) |
---|
| 401 | + auxtrace_buffer__drop_data(old_buffer); |
---|
| 402 | + b->len = 0; |
---|
| 403 | + return 0; |
---|
| 404 | + } |
---|
| 405 | + |
---|
| 406 | + ptq->buffer = buffer; |
---|
| 407 | + |
---|
| 408 | + err = intel_pt_get_buffer(ptq, buffer, old_buffer, b); |
---|
| 409 | + if (err) |
---|
| 410 | + return err; |
---|
280 | 411 | |
---|
281 | 412 | if (ptq->step_through_buffers) |
---|
282 | 413 | ptq->stop = true; |
---|
.. | .. |
---|
405 | 536 | return NULL; |
---|
406 | 537 | |
---|
407 | 538 | return auxtrace_cache__lookup(dso->auxtrace_cache, offset); |
---|
| 539 | +} |
---|
| 540 | + |
---|
| 541 | +static void intel_pt_cache_invalidate(struct dso *dso, struct machine *machine, |
---|
| 542 | + u64 offset) |
---|
| 543 | +{ |
---|
| 544 | + struct auxtrace_cache *c = intel_pt_cache(dso, machine); |
---|
| 545 | + |
---|
| 546 | + if (!c) |
---|
| 547 | + return; |
---|
| 548 | + |
---|
| 549 | + auxtrace_cache__remove(dso->auxtrace_cache, offset); |
---|
408 | 550 | } |
---|
409 | 551 | |
---|
410 | 552 | static inline u8 intel_pt_cpumode(struct intel_pt *pt, uint64_t ip) |
---|
.. | .. |
---|
629 | 771 | |
---|
630 | 772 | static bool intel_pt_exclude_kernel(struct intel_pt *pt) |
---|
631 | 773 | { |
---|
632 | | - struct perf_evsel *evsel; |
---|
| 774 | + struct evsel *evsel; |
---|
633 | 775 | |
---|
634 | 776 | evlist__for_each_entry(pt->session->evlist, evsel) { |
---|
635 | | - if (intel_pt_get_config(pt, &evsel->attr, NULL) && |
---|
636 | | - !evsel->attr.exclude_kernel) |
---|
| 777 | + if (intel_pt_get_config(pt, &evsel->core.attr, NULL) && |
---|
| 778 | + !evsel->core.attr.exclude_kernel) |
---|
637 | 779 | return false; |
---|
638 | 780 | } |
---|
639 | 781 | return true; |
---|
.. | .. |
---|
641 | 783 | |
---|
642 | 784 | static bool intel_pt_return_compression(struct intel_pt *pt) |
---|
643 | 785 | { |
---|
644 | | - struct perf_evsel *evsel; |
---|
| 786 | + struct evsel *evsel; |
---|
645 | 787 | u64 config; |
---|
646 | 788 | |
---|
647 | 789 | if (!pt->noretcomp_bit) |
---|
648 | 790 | return true; |
---|
649 | 791 | |
---|
650 | 792 | evlist__for_each_entry(pt->session->evlist, evsel) { |
---|
651 | | - if (intel_pt_get_config(pt, &evsel->attr, &config) && |
---|
| 793 | + if (intel_pt_get_config(pt, &evsel->core.attr, &config) && |
---|
652 | 794 | (config & pt->noretcomp_bit)) |
---|
653 | 795 | return false; |
---|
654 | 796 | } |
---|
.. | .. |
---|
657 | 799 | |
---|
658 | 800 | static bool intel_pt_branch_enable(struct intel_pt *pt) |
---|
659 | 801 | { |
---|
660 | | - struct perf_evsel *evsel; |
---|
| 802 | + struct evsel *evsel; |
---|
661 | 803 | u64 config; |
---|
662 | 804 | |
---|
663 | 805 | evlist__for_each_entry(pt->session->evlist, evsel) { |
---|
664 | | - if (intel_pt_get_config(pt, &evsel->attr, &config) && |
---|
| 806 | + if (intel_pt_get_config(pt, &evsel->core.attr, &config) && |
---|
665 | 807 | (config & 1) && !(config & 0x2000)) |
---|
666 | 808 | return false; |
---|
667 | 809 | } |
---|
.. | .. |
---|
670 | 812 | |
---|
671 | 813 | static unsigned int intel_pt_mtc_period(struct intel_pt *pt) |
---|
672 | 814 | { |
---|
673 | | - struct perf_evsel *evsel; |
---|
| 815 | + struct evsel *evsel; |
---|
674 | 816 | unsigned int shift; |
---|
675 | 817 | u64 config; |
---|
676 | 818 | |
---|
.. | .. |
---|
681 | 823 | config >>= 1; |
---|
682 | 824 | |
---|
683 | 825 | evlist__for_each_entry(pt->session->evlist, evsel) { |
---|
684 | | - if (intel_pt_get_config(pt, &evsel->attr, &config)) |
---|
| 826 | + if (intel_pt_get_config(pt, &evsel->core.attr, &config)) |
---|
685 | 827 | return (config & pt->mtc_freq_bits) >> shift; |
---|
686 | 828 | } |
---|
687 | 829 | return 0; |
---|
.. | .. |
---|
689 | 831 | |
---|
690 | 832 | static bool intel_pt_timeless_decoding(struct intel_pt *pt) |
---|
691 | 833 | { |
---|
692 | | - struct perf_evsel *evsel; |
---|
| 834 | + struct evsel *evsel; |
---|
693 | 835 | bool timeless_decoding = true; |
---|
694 | 836 | u64 config; |
---|
695 | 837 | |
---|
.. | .. |
---|
697 | 839 | return true; |
---|
698 | 840 | |
---|
699 | 841 | evlist__for_each_entry(pt->session->evlist, evsel) { |
---|
700 | | - if (!(evsel->attr.sample_type & PERF_SAMPLE_TIME)) |
---|
| 842 | + if (!(evsel->core.attr.sample_type & PERF_SAMPLE_TIME)) |
---|
701 | 843 | return true; |
---|
702 | | - if (intel_pt_get_config(pt, &evsel->attr, &config)) { |
---|
| 844 | + if (intel_pt_get_config(pt, &evsel->core.attr, &config)) { |
---|
703 | 845 | if (config & pt->tsc_bit) |
---|
704 | 846 | timeless_decoding = false; |
---|
705 | 847 | else |
---|
.. | .. |
---|
711 | 853 | |
---|
712 | 854 | static bool intel_pt_tracing_kernel(struct intel_pt *pt) |
---|
713 | 855 | { |
---|
714 | | - struct perf_evsel *evsel; |
---|
| 856 | + struct evsel *evsel; |
---|
715 | 857 | |
---|
716 | 858 | evlist__for_each_entry(pt->session->evlist, evsel) { |
---|
717 | | - if (intel_pt_get_config(pt, &evsel->attr, NULL) && |
---|
718 | | - !evsel->attr.exclude_kernel) |
---|
| 859 | + if (intel_pt_get_config(pt, &evsel->core.attr, NULL) && |
---|
| 860 | + !evsel->core.attr.exclude_kernel) |
---|
719 | 861 | return true; |
---|
720 | 862 | } |
---|
721 | 863 | return false; |
---|
.. | .. |
---|
723 | 865 | |
---|
724 | 866 | static bool intel_pt_have_tsc(struct intel_pt *pt) |
---|
725 | 867 | { |
---|
726 | | - struct perf_evsel *evsel; |
---|
| 868 | + struct evsel *evsel; |
---|
727 | 869 | bool have_tsc = false; |
---|
728 | 870 | u64 config; |
---|
729 | 871 | |
---|
.. | .. |
---|
731 | 873 | return false; |
---|
732 | 874 | |
---|
733 | 875 | evlist__for_each_entry(pt->session->evlist, evsel) { |
---|
734 | | - if (intel_pt_get_config(pt, &evsel->attr, &config)) { |
---|
| 876 | + if (intel_pt_get_config(pt, &evsel->core.attr, &config)) { |
---|
735 | 877 | if (config & pt->tsc_bit) |
---|
736 | 878 | have_tsc = true; |
---|
737 | 879 | else |
---|
.. | .. |
---|
739 | 881 | } |
---|
740 | 882 | } |
---|
741 | 883 | return have_tsc; |
---|
| 884 | +} |
---|
| 885 | + |
---|
| 886 | +static bool intel_pt_sampling_mode(struct intel_pt *pt) |
---|
| 887 | +{ |
---|
| 888 | + struct evsel *evsel; |
---|
| 889 | + |
---|
| 890 | + evlist__for_each_entry(pt->session->evlist, evsel) { |
---|
| 891 | + if ((evsel->core.attr.sample_type & PERF_SAMPLE_AUX) && |
---|
| 892 | + evsel->core.attr.aux_sample_size) |
---|
| 893 | + return true; |
---|
| 894 | + } |
---|
| 895 | + return false; |
---|
| 896 | +} |
---|
| 897 | + |
---|
| 898 | +static u64 intel_pt_ctl(struct intel_pt *pt) |
---|
| 899 | +{ |
---|
| 900 | + struct evsel *evsel; |
---|
| 901 | + u64 config; |
---|
| 902 | + |
---|
| 903 | + evlist__for_each_entry(pt->session->evlist, evsel) { |
---|
| 904 | + if (intel_pt_get_config(pt, &evsel->core.attr, &config)) |
---|
| 905 | + return config; |
---|
| 906 | + } |
---|
| 907 | + return 0; |
---|
742 | 908 | } |
---|
743 | 909 | |
---|
744 | 910 | static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns) |
---|
.. | .. |
---|
750 | 916 | return (quot << pt->tc.time_shift) + (rem << pt->tc.time_shift) / |
---|
751 | 917 | pt->tc.time_mult; |
---|
752 | 918 | } |
---|
| 919 | + |
---|
| 920 | +static struct ip_callchain *intel_pt_alloc_chain(struct intel_pt *pt) |
---|
| 921 | +{ |
---|
| 922 | + size_t sz = sizeof(struct ip_callchain); |
---|
| 923 | + |
---|
| 924 | + /* Add 1 to callchain_sz for callchain context */ |
---|
| 925 | + sz += (pt->synth_opts.callchain_sz + 1) * sizeof(u64); |
---|
| 926 | + return zalloc(sz); |
---|
| 927 | +} |
---|
| 928 | + |
---|
| 929 | +static int intel_pt_callchain_init(struct intel_pt *pt) |
---|
| 930 | +{ |
---|
| 931 | + struct evsel *evsel; |
---|
| 932 | + |
---|
| 933 | + evlist__for_each_entry(pt->session->evlist, evsel) { |
---|
| 934 | + if (!(evsel->core.attr.sample_type & PERF_SAMPLE_CALLCHAIN)) |
---|
| 935 | + evsel->synth_sample_type |= PERF_SAMPLE_CALLCHAIN; |
---|
| 936 | + } |
---|
| 937 | + |
---|
| 938 | + pt->chain = intel_pt_alloc_chain(pt); |
---|
| 939 | + if (!pt->chain) |
---|
| 940 | + return -ENOMEM; |
---|
| 941 | + |
---|
| 942 | + return 0; |
---|
| 943 | +} |
---|
| 944 | + |
---|
| 945 | +static void intel_pt_add_callchain(struct intel_pt *pt, |
---|
| 946 | + struct perf_sample *sample) |
---|
| 947 | +{ |
---|
| 948 | + struct thread *thread = machine__findnew_thread(pt->machine, |
---|
| 949 | + sample->pid, |
---|
| 950 | + sample->tid); |
---|
| 951 | + |
---|
| 952 | + thread_stack__sample_late(thread, sample->cpu, pt->chain, |
---|
| 953 | + pt->synth_opts.callchain_sz + 1, sample->ip, |
---|
| 954 | + pt->kernel_start); |
---|
| 955 | + |
---|
| 956 | + sample->callchain = pt->chain; |
---|
| 957 | +} |
---|
| 958 | + |
---|
| 959 | +static struct branch_stack *intel_pt_alloc_br_stack(unsigned int entry_cnt) |
---|
| 960 | +{ |
---|
| 961 | + size_t sz = sizeof(struct branch_stack); |
---|
| 962 | + |
---|
| 963 | + sz += entry_cnt * sizeof(struct branch_entry); |
---|
| 964 | + return zalloc(sz); |
---|
| 965 | +} |
---|
| 966 | + |
---|
| 967 | +static int intel_pt_br_stack_init(struct intel_pt *pt) |
---|
| 968 | +{ |
---|
| 969 | + struct evsel *evsel; |
---|
| 970 | + |
---|
| 971 | + evlist__for_each_entry(pt->session->evlist, evsel) { |
---|
| 972 | + if (!(evsel->core.attr.sample_type & PERF_SAMPLE_BRANCH_STACK)) |
---|
| 973 | + evsel->synth_sample_type |= PERF_SAMPLE_BRANCH_STACK; |
---|
| 974 | + } |
---|
| 975 | + |
---|
| 976 | + pt->br_stack = intel_pt_alloc_br_stack(pt->br_stack_sz); |
---|
| 977 | + if (!pt->br_stack) |
---|
| 978 | + return -ENOMEM; |
---|
| 979 | + |
---|
| 980 | + return 0; |
---|
| 981 | +} |
---|
| 982 | + |
---|
| 983 | +static void intel_pt_add_br_stack(struct intel_pt *pt, |
---|
| 984 | + struct perf_sample *sample) |
---|
| 985 | +{ |
---|
| 986 | + struct thread *thread = machine__findnew_thread(pt->machine, |
---|
| 987 | + sample->pid, |
---|
| 988 | + sample->tid); |
---|
| 989 | + |
---|
| 990 | + thread_stack__br_sample_late(thread, sample->cpu, pt->br_stack, |
---|
| 991 | + pt->br_stack_sz, sample->ip, |
---|
| 992 | + pt->kernel_start); |
---|
| 993 | + |
---|
| 994 | + sample->branch_stack = pt->br_stack; |
---|
| 995 | +} |
---|
| 996 | + |
---|
| 997 | +/* INTEL_PT_LBR_0, INTEL_PT_LBR_1 and INTEL_PT_LBR_2 */ |
---|
| 998 | +#define LBRS_MAX (INTEL_PT_BLK_ITEM_ID_CNT * 3U) |
---|
753 | 999 | |
---|
754 | 1000 | static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, |
---|
755 | 1001 | unsigned int queue_nr) |
---|
.. | .. |
---|
763 | 1009 | return NULL; |
---|
764 | 1010 | |
---|
765 | 1011 | if (pt->synth_opts.callchain) { |
---|
766 | | - size_t sz = sizeof(struct ip_callchain); |
---|
767 | | - |
---|
768 | | - /* Add 1 to callchain_sz for callchain context */ |
---|
769 | | - sz += (pt->synth_opts.callchain_sz + 1) * sizeof(u64); |
---|
770 | | - ptq->chain = zalloc(sz); |
---|
| 1012 | + ptq->chain = intel_pt_alloc_chain(pt); |
---|
771 | 1013 | if (!ptq->chain) |
---|
772 | 1014 | goto out_free; |
---|
773 | 1015 | } |
---|
774 | 1016 | |
---|
775 | | - if (pt->synth_opts.last_branch) { |
---|
776 | | - size_t sz = sizeof(struct branch_stack); |
---|
| 1017 | + if (pt->synth_opts.last_branch || pt->synth_opts.other_events) { |
---|
| 1018 | + unsigned int entry_cnt = max(LBRS_MAX, pt->br_stack_sz); |
---|
777 | 1019 | |
---|
778 | | - sz += pt->synth_opts.last_branch_sz * |
---|
779 | | - sizeof(struct branch_entry); |
---|
780 | | - ptq->last_branch = zalloc(sz); |
---|
| 1020 | + ptq->last_branch = intel_pt_alloc_br_stack(entry_cnt); |
---|
781 | 1021 | if (!ptq->last_branch) |
---|
782 | | - goto out_free; |
---|
783 | | - ptq->last_branch_rb = zalloc(sz); |
---|
784 | | - if (!ptq->last_branch_rb) |
---|
785 | 1022 | goto out_free; |
---|
786 | 1023 | } |
---|
787 | 1024 | |
---|
.. | .. |
---|
799 | 1036 | |
---|
800 | 1037 | params.get_trace = intel_pt_get_trace; |
---|
801 | 1038 | params.walk_insn = intel_pt_walk_next_insn; |
---|
| 1039 | + params.lookahead = intel_pt_lookahead; |
---|
802 | 1040 | params.data = ptq; |
---|
803 | 1041 | params.return_compression = intel_pt_return_compression(pt); |
---|
804 | 1042 | params.branch_enable = intel_pt_branch_enable(pt); |
---|
| 1043 | + params.ctl = intel_pt_ctl(pt); |
---|
805 | 1044 | params.max_non_turbo_ratio = pt->max_non_turbo_ratio; |
---|
806 | 1045 | params.mtc_period = intel_pt_mtc_period(pt); |
---|
807 | 1046 | params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n; |
---|
808 | 1047 | params.tsc_ctc_ratio_d = pt->tsc_ctc_ratio_d; |
---|
| 1048 | + params.quick = pt->synth_opts.quick; |
---|
809 | 1049 | |
---|
810 | 1050 | if (pt->filts.cnt > 0) |
---|
811 | 1051 | params.pgd_ip = intel_pt_pgd_ip; |
---|
.. | .. |
---|
850 | 1090 | out_free: |
---|
851 | 1091 | zfree(&ptq->event_buf); |
---|
852 | 1092 | zfree(&ptq->last_branch); |
---|
853 | | - zfree(&ptq->last_branch_rb); |
---|
854 | 1093 | zfree(&ptq->chain); |
---|
855 | 1094 | free(ptq); |
---|
856 | 1095 | return NULL; |
---|
.. | .. |
---|
866 | 1105 | intel_pt_decoder_free(ptq->decoder); |
---|
867 | 1106 | zfree(&ptq->event_buf); |
---|
868 | 1107 | zfree(&ptq->last_branch); |
---|
869 | | - zfree(&ptq->last_branch_rb); |
---|
870 | 1108 | zfree(&ptq->chain); |
---|
871 | 1109 | free(ptq); |
---|
872 | 1110 | } |
---|
.. | .. |
---|
918 | 1156 | ptq->insn_len = ptq->state->insn_len; |
---|
919 | 1157 | memcpy(ptq->insn, ptq->state->insn, INTEL_PT_INSN_BUF_SZ); |
---|
920 | 1158 | } |
---|
| 1159 | + |
---|
| 1160 | + if (ptq->state->type & INTEL_PT_TRACE_BEGIN) |
---|
| 1161 | + ptq->flags |= PERF_IP_FLAG_TRACE_BEGIN; |
---|
| 1162 | + if (ptq->state->type & INTEL_PT_TRACE_END) |
---|
| 1163 | + ptq->flags |= PERF_IP_FLAG_TRACE_END; |
---|
| 1164 | +} |
---|
| 1165 | + |
---|
| 1166 | +static void intel_pt_setup_time_range(struct intel_pt *pt, |
---|
| 1167 | + struct intel_pt_queue *ptq) |
---|
| 1168 | +{ |
---|
| 1169 | + if (!pt->range_cnt) |
---|
| 1170 | + return; |
---|
| 1171 | + |
---|
| 1172 | + ptq->sel_timestamp = pt->time_ranges[0].start; |
---|
| 1173 | + ptq->sel_idx = 0; |
---|
| 1174 | + |
---|
| 1175 | + if (ptq->sel_timestamp) { |
---|
| 1176 | + ptq->sel_start = true; |
---|
| 1177 | + } else { |
---|
| 1178 | + ptq->sel_timestamp = pt->time_ranges[0].end; |
---|
| 1179 | + ptq->sel_start = false; |
---|
| 1180 | + } |
---|
921 | 1181 | } |
---|
922 | 1182 | |
---|
923 | 1183 | static int intel_pt_setup_queue(struct intel_pt *pt, |
---|
.. | .. |
---|
939 | 1199 | ptq->cpu = queue->cpu; |
---|
940 | 1200 | ptq->tid = queue->tid; |
---|
941 | 1201 | |
---|
| 1202 | + ptq->cbr_seen = UINT_MAX; |
---|
| 1203 | + |
---|
942 | 1204 | if (pt->sampling_mode && !pt->snapshot_mode && |
---|
943 | 1205 | pt->timeless_decoding) |
---|
944 | 1206 | ptq->step_through_buffers = true; |
---|
945 | 1207 | |
---|
946 | 1208 | ptq->sync_switch = pt->sync_switch; |
---|
| 1209 | + |
---|
| 1210 | + intel_pt_setup_time_range(pt, ptq); |
---|
947 | 1211 | } |
---|
948 | 1212 | |
---|
949 | 1213 | if (!ptq->on_heap && |
---|
.. | .. |
---|
958 | 1222 | intel_pt_log("queue %u getting timestamp\n", queue_nr); |
---|
959 | 1223 | intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n", |
---|
960 | 1224 | queue_nr, ptq->cpu, ptq->pid, ptq->tid); |
---|
| 1225 | + |
---|
| 1226 | + if (ptq->sel_start && ptq->sel_timestamp) { |
---|
| 1227 | + ret = intel_pt_fast_forward(ptq->decoder, |
---|
| 1228 | + ptq->sel_timestamp); |
---|
| 1229 | + if (ret) |
---|
| 1230 | + return ret; |
---|
| 1231 | + } |
---|
| 1232 | + |
---|
961 | 1233 | while (1) { |
---|
962 | 1234 | state = intel_pt_decode(ptq->decoder); |
---|
963 | 1235 | if (state->err) { |
---|
.. | .. |
---|
977 | 1249 | queue_nr, ptq->timestamp); |
---|
978 | 1250 | ptq->state = state; |
---|
979 | 1251 | ptq->have_sample = true; |
---|
| 1252 | + if (ptq->sel_start && ptq->sel_timestamp && |
---|
| 1253 | + ptq->timestamp < ptq->sel_timestamp) |
---|
| 1254 | + ptq->have_sample = false; |
---|
980 | 1255 | intel_pt_sample_flags(ptq); |
---|
981 | 1256 | ret = auxtrace_heap__add(&pt->heap, queue_nr, ptq->timestamp); |
---|
982 | 1257 | if (ret) |
---|
.. | .. |
---|
1000 | 1275 | return 0; |
---|
1001 | 1276 | } |
---|
1002 | 1277 | |
---|
1003 | | -static inline void intel_pt_copy_last_branch_rb(struct intel_pt_queue *ptq) |
---|
1004 | | -{ |
---|
1005 | | - struct branch_stack *bs_src = ptq->last_branch_rb; |
---|
1006 | | - struct branch_stack *bs_dst = ptq->last_branch; |
---|
1007 | | - size_t nr = 0; |
---|
1008 | | - |
---|
1009 | | - bs_dst->nr = bs_src->nr; |
---|
1010 | | - |
---|
1011 | | - if (!bs_src->nr) |
---|
1012 | | - return; |
---|
1013 | | - |
---|
1014 | | - nr = ptq->pt->synth_opts.last_branch_sz - ptq->last_branch_pos; |
---|
1015 | | - memcpy(&bs_dst->entries[0], |
---|
1016 | | - &bs_src->entries[ptq->last_branch_pos], |
---|
1017 | | - sizeof(struct branch_entry) * nr); |
---|
1018 | | - |
---|
1019 | | - if (bs_src->nr >= ptq->pt->synth_opts.last_branch_sz) { |
---|
1020 | | - memcpy(&bs_dst->entries[nr], |
---|
1021 | | - &bs_src->entries[0], |
---|
1022 | | - sizeof(struct branch_entry) * ptq->last_branch_pos); |
---|
1023 | | - } |
---|
1024 | | -} |
---|
1025 | | - |
---|
1026 | | -static inline void intel_pt_reset_last_branch_rb(struct intel_pt_queue *ptq) |
---|
1027 | | -{ |
---|
1028 | | - ptq->last_branch_pos = 0; |
---|
1029 | | - ptq->last_branch_rb->nr = 0; |
---|
1030 | | -} |
---|
1031 | | - |
---|
1032 | | -static void intel_pt_update_last_branch_rb(struct intel_pt_queue *ptq) |
---|
1033 | | -{ |
---|
1034 | | - const struct intel_pt_state *state = ptq->state; |
---|
1035 | | - struct branch_stack *bs = ptq->last_branch_rb; |
---|
1036 | | - struct branch_entry *be; |
---|
1037 | | - |
---|
1038 | | - if (!ptq->last_branch_pos) |
---|
1039 | | - ptq->last_branch_pos = ptq->pt->synth_opts.last_branch_sz; |
---|
1040 | | - |
---|
1041 | | - ptq->last_branch_pos -= 1; |
---|
1042 | | - |
---|
1043 | | - be = &bs->entries[ptq->last_branch_pos]; |
---|
1044 | | - be->from = state->from_ip; |
---|
1045 | | - be->to = state->to_ip; |
---|
1046 | | - be->flags.abort = !!(state->flags & INTEL_PT_ABORT_TX); |
---|
1047 | | - be->flags.in_tx = !!(state->flags & INTEL_PT_IN_TX); |
---|
1048 | | - /* No support for mispredict */ |
---|
1049 | | - be->flags.mispred = ptq->pt->mispred_all; |
---|
1050 | | - |
---|
1051 | | - if (bs->nr < ptq->pt->synth_opts.last_branch_sz) |
---|
1052 | | - bs->nr += 1; |
---|
1053 | | -} |
---|
1054 | | - |
---|
1055 | 1278 | static inline bool intel_pt_skip_event(struct intel_pt *pt) |
---|
1056 | 1279 | { |
---|
1057 | 1280 | return pt->synth_opts.initial_skip && |
---|
1058 | 1281 | pt->num_events++ < pt->synth_opts.initial_skip; |
---|
| 1282 | +} |
---|
| 1283 | + |
---|
| 1284 | +/* |
---|
| 1285 | + * Cannot count CBR as skipped because it won't go away until cbr == cbr_seen. |
---|
| 1286 | + * Also ensure CBR is first non-skipped event by allowing for 4 more samples |
---|
| 1287 | + * from this decoder state. |
---|
| 1288 | + */ |
---|
| 1289 | +static inline bool intel_pt_skip_cbr_event(struct intel_pt *pt) |
---|
| 1290 | +{ |
---|
| 1291 | + return pt->synth_opts.initial_skip && |
---|
| 1292 | + pt->num_events + 4 < pt->synth_opts.initial_skip; |
---|
| 1293 | +} |
---|
| 1294 | + |
---|
| 1295 | +static void intel_pt_prep_a_sample(struct intel_pt_queue *ptq, |
---|
| 1296 | + union perf_event *event, |
---|
| 1297 | + struct perf_sample *sample) |
---|
| 1298 | +{ |
---|
| 1299 | + event->sample.header.type = PERF_RECORD_SAMPLE; |
---|
| 1300 | + event->sample.header.size = sizeof(struct perf_event_header); |
---|
| 1301 | + |
---|
| 1302 | + sample->pid = ptq->pid; |
---|
| 1303 | + sample->tid = ptq->tid; |
---|
| 1304 | + sample->cpu = ptq->cpu; |
---|
| 1305 | + sample->insn_len = ptq->insn_len; |
---|
| 1306 | + memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ); |
---|
1059 | 1307 | } |
---|
1060 | 1308 | |
---|
1061 | 1309 | static void intel_pt_prep_b_sample(struct intel_pt *pt, |
---|
.. | .. |
---|
1063 | 1311 | union perf_event *event, |
---|
1064 | 1312 | struct perf_sample *sample) |
---|
1065 | 1313 | { |
---|
| 1314 | + intel_pt_prep_a_sample(ptq, event, sample); |
---|
| 1315 | + |
---|
1066 | 1316 | if (!pt->timeless_decoding) |
---|
1067 | 1317 | sample->time = tsc_to_perf_time(ptq->timestamp, &pt->tc); |
---|
1068 | 1318 | |
---|
1069 | 1319 | sample->ip = ptq->state->from_ip; |
---|
1070 | 1320 | sample->cpumode = intel_pt_cpumode(pt, sample->ip); |
---|
1071 | | - sample->pid = ptq->pid; |
---|
1072 | | - sample->tid = ptq->tid; |
---|
1073 | 1321 | sample->addr = ptq->state->to_ip; |
---|
1074 | 1322 | sample->period = 1; |
---|
1075 | | - sample->cpu = ptq->cpu; |
---|
1076 | 1323 | sample->flags = ptq->flags; |
---|
1077 | | - sample->insn_len = ptq->insn_len; |
---|
1078 | | - memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ); |
---|
1079 | 1324 | |
---|
1080 | | - event->sample.header.type = PERF_RECORD_SAMPLE; |
---|
1081 | 1325 | event->sample.header.misc = sample->cpumode; |
---|
1082 | | - event->sample.header.size = sizeof(struct perf_event_header); |
---|
1083 | 1326 | } |
---|
1084 | 1327 | |
---|
1085 | 1328 | static int intel_pt_inject_event(union perf_event *event, |
---|
.. | .. |
---|
1099 | 1342 | return intel_pt_inject_event(event, sample, type); |
---|
1100 | 1343 | } |
---|
1101 | 1344 | |
---|
1102 | | -static int intel_pt_deliver_synth_b_event(struct intel_pt *pt, |
---|
1103 | | - union perf_event *event, |
---|
1104 | | - struct perf_sample *sample, u64 type) |
---|
| 1345 | +static int intel_pt_deliver_synth_event(struct intel_pt *pt, |
---|
| 1346 | + union perf_event *event, |
---|
| 1347 | + struct perf_sample *sample, u64 type) |
---|
1105 | 1348 | { |
---|
1106 | 1349 | int ret; |
---|
1107 | 1350 | |
---|
.. | .. |
---|
1123 | 1366 | struct perf_sample sample = { .ip = 0, }; |
---|
1124 | 1367 | struct dummy_branch_stack { |
---|
1125 | 1368 | u64 nr; |
---|
| 1369 | + u64 hw_idx; |
---|
1126 | 1370 | struct branch_entry entries; |
---|
1127 | 1371 | } dummy_bs; |
---|
1128 | 1372 | |
---|
.. | .. |
---|
1144 | 1388 | if (pt->synth_opts.last_branch && sort__mode == SORT_MODE__BRANCH) { |
---|
1145 | 1389 | dummy_bs = (struct dummy_branch_stack){ |
---|
1146 | 1390 | .nr = 1, |
---|
| 1391 | + .hw_idx = -1ULL, |
---|
1147 | 1392 | .entries = { |
---|
1148 | 1393 | .from = sample.ip, |
---|
1149 | 1394 | .to = sample.addr, |
---|
.. | .. |
---|
1152 | 1397 | sample.branch_stack = (struct branch_stack *)&dummy_bs; |
---|
1153 | 1398 | } |
---|
1154 | 1399 | |
---|
1155 | | - return intel_pt_deliver_synth_b_event(pt, event, &sample, |
---|
1156 | | - pt->branches_sample_type); |
---|
| 1400 | + if (ptq->state->flags & INTEL_PT_SAMPLE_IPC) |
---|
| 1401 | + sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_br_cyc_cnt; |
---|
| 1402 | + if (sample.cyc_cnt) { |
---|
| 1403 | + sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_br_insn_cnt; |
---|
| 1404 | + ptq->last_br_insn_cnt = ptq->ipc_insn_cnt; |
---|
| 1405 | + ptq->last_br_cyc_cnt = ptq->ipc_cyc_cnt; |
---|
| 1406 | + } |
---|
| 1407 | + |
---|
| 1408 | + return intel_pt_deliver_synth_event(pt, event, &sample, |
---|
| 1409 | + pt->branches_sample_type); |
---|
1157 | 1410 | } |
---|
1158 | 1411 | |
---|
1159 | 1412 | static void intel_pt_prep_sample(struct intel_pt *pt, |
---|
.. | .. |
---|
1164 | 1417 | intel_pt_prep_b_sample(pt, ptq, event, sample); |
---|
1165 | 1418 | |
---|
1166 | 1419 | if (pt->synth_opts.callchain) { |
---|
1167 | | - thread_stack__sample(ptq->thread, ptq->chain, |
---|
| 1420 | + thread_stack__sample(ptq->thread, ptq->cpu, ptq->chain, |
---|
1168 | 1421 | pt->synth_opts.callchain_sz + 1, |
---|
1169 | 1422 | sample->ip, pt->kernel_start); |
---|
1170 | 1423 | sample->callchain = ptq->chain; |
---|
1171 | 1424 | } |
---|
1172 | 1425 | |
---|
1173 | 1426 | if (pt->synth_opts.last_branch) { |
---|
1174 | | - intel_pt_copy_last_branch_rb(ptq); |
---|
| 1427 | + thread_stack__br_sample(ptq->thread, ptq->cpu, ptq->last_branch, |
---|
| 1428 | + pt->br_stack_sz); |
---|
1175 | 1429 | sample->branch_stack = ptq->last_branch; |
---|
1176 | 1430 | } |
---|
1177 | | -} |
---|
1178 | | - |
---|
1179 | | -static inline int intel_pt_deliver_synth_event(struct intel_pt *pt, |
---|
1180 | | - struct intel_pt_queue *ptq, |
---|
1181 | | - union perf_event *event, |
---|
1182 | | - struct perf_sample *sample, |
---|
1183 | | - u64 type) |
---|
1184 | | -{ |
---|
1185 | | - int ret; |
---|
1186 | | - |
---|
1187 | | - ret = intel_pt_deliver_synth_b_event(pt, event, sample, type); |
---|
1188 | | - |
---|
1189 | | - if (pt->synth_opts.last_branch) |
---|
1190 | | - intel_pt_reset_last_branch_rb(ptq); |
---|
1191 | | - |
---|
1192 | | - return ret; |
---|
1193 | 1431 | } |
---|
1194 | 1432 | |
---|
1195 | 1433 | static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) |
---|
.. | .. |
---|
1205 | 1443 | |
---|
1206 | 1444 | sample.id = ptq->pt->instructions_id; |
---|
1207 | 1445 | sample.stream_id = ptq->pt->instructions_id; |
---|
1208 | | - sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt; |
---|
| 1446 | + if (pt->synth_opts.quick) |
---|
| 1447 | + sample.period = 1; |
---|
| 1448 | + else |
---|
| 1449 | + sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt; |
---|
| 1450 | + |
---|
| 1451 | + if (ptq->state->flags & INTEL_PT_SAMPLE_IPC) |
---|
| 1452 | + sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt; |
---|
| 1453 | + if (sample.cyc_cnt) { |
---|
| 1454 | + sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_in_insn_cnt; |
---|
| 1455 | + ptq->last_in_insn_cnt = ptq->ipc_insn_cnt; |
---|
| 1456 | + ptq->last_in_cyc_cnt = ptq->ipc_cyc_cnt; |
---|
| 1457 | + } |
---|
1209 | 1458 | |
---|
1210 | 1459 | ptq->last_insn_cnt = ptq->state->tot_insn_cnt; |
---|
1211 | 1460 | |
---|
1212 | | - return intel_pt_deliver_synth_event(pt, ptq, event, &sample, |
---|
| 1461 | + return intel_pt_deliver_synth_event(pt, event, &sample, |
---|
1213 | 1462 | pt->instructions_sample_type); |
---|
1214 | 1463 | } |
---|
1215 | 1464 | |
---|
.. | .. |
---|
1227 | 1476 | sample.id = ptq->pt->transactions_id; |
---|
1228 | 1477 | sample.stream_id = ptq->pt->transactions_id; |
---|
1229 | 1478 | |
---|
1230 | | - return intel_pt_deliver_synth_event(pt, ptq, event, &sample, |
---|
| 1479 | + return intel_pt_deliver_synth_event(pt, event, &sample, |
---|
1231 | 1480 | pt->transactions_sample_type); |
---|
1232 | 1481 | } |
---|
1233 | 1482 | |
---|
.. | .. |
---|
1268 | 1517 | sample.raw_size = perf_synth__raw_size(raw); |
---|
1269 | 1518 | sample.raw_data = perf_synth__raw_data(&raw); |
---|
1270 | 1519 | |
---|
1271 | | - return intel_pt_deliver_synth_event(pt, ptq, event, &sample, |
---|
| 1520 | + return intel_pt_deliver_synth_event(pt, event, &sample, |
---|
1272 | 1521 | pt->ptwrites_sample_type); |
---|
1273 | 1522 | } |
---|
1274 | 1523 | |
---|
.. | .. |
---|
1280 | 1529 | struct perf_synth_intel_cbr raw; |
---|
1281 | 1530 | u32 flags; |
---|
1282 | 1531 | |
---|
1283 | | - if (intel_pt_skip_event(pt)) |
---|
| 1532 | + if (intel_pt_skip_cbr_event(pt)) |
---|
1284 | 1533 | return 0; |
---|
| 1534 | + |
---|
| 1535 | + ptq->cbr_seen = ptq->state->cbr; |
---|
1285 | 1536 | |
---|
1286 | 1537 | intel_pt_prep_p_sample(pt, ptq, event, &sample); |
---|
1287 | 1538 | |
---|
.. | .. |
---|
1296 | 1547 | sample.raw_size = perf_synth__raw_size(raw); |
---|
1297 | 1548 | sample.raw_data = perf_synth__raw_data(&raw); |
---|
1298 | 1549 | |
---|
1299 | | - return intel_pt_deliver_synth_event(pt, ptq, event, &sample, |
---|
| 1550 | + return intel_pt_deliver_synth_event(pt, event, &sample, |
---|
1300 | 1551 | pt->pwr_events_sample_type); |
---|
1301 | 1552 | } |
---|
1302 | 1553 | |
---|
.. | .. |
---|
1321 | 1572 | sample.raw_size = perf_synth__raw_size(raw); |
---|
1322 | 1573 | sample.raw_data = perf_synth__raw_data(&raw); |
---|
1323 | 1574 | |
---|
1324 | | - return intel_pt_deliver_synth_event(pt, ptq, event, &sample, |
---|
| 1575 | + return intel_pt_deliver_synth_event(pt, event, &sample, |
---|
1325 | 1576 | pt->pwr_events_sample_type); |
---|
1326 | 1577 | } |
---|
1327 | 1578 | |
---|
.. | .. |
---|
1346 | 1597 | sample.raw_size = perf_synth__raw_size(raw); |
---|
1347 | 1598 | sample.raw_data = perf_synth__raw_data(&raw); |
---|
1348 | 1599 | |
---|
1349 | | - return intel_pt_deliver_synth_event(pt, ptq, event, &sample, |
---|
| 1600 | + return intel_pt_deliver_synth_event(pt, event, &sample, |
---|
1350 | 1601 | pt->pwr_events_sample_type); |
---|
1351 | 1602 | } |
---|
1352 | 1603 | |
---|
.. | .. |
---|
1371 | 1622 | sample.raw_size = perf_synth__raw_size(raw); |
---|
1372 | 1623 | sample.raw_data = perf_synth__raw_data(&raw); |
---|
1373 | 1624 | |
---|
1374 | | - return intel_pt_deliver_synth_event(pt, ptq, event, &sample, |
---|
| 1625 | + return intel_pt_deliver_synth_event(pt, event, &sample, |
---|
1375 | 1626 | pt->pwr_events_sample_type); |
---|
1376 | 1627 | } |
---|
1377 | 1628 | |
---|
.. | .. |
---|
1396 | 1647 | sample.raw_size = perf_synth__raw_size(raw); |
---|
1397 | 1648 | sample.raw_data = perf_synth__raw_data(&raw); |
---|
1398 | 1649 | |
---|
1399 | | - return intel_pt_deliver_synth_event(pt, ptq, event, &sample, |
---|
| 1650 | + return intel_pt_deliver_synth_event(pt, event, &sample, |
---|
1400 | 1651 | pt->pwr_events_sample_type); |
---|
1401 | 1652 | } |
---|
1402 | 1653 | |
---|
| 1654 | +/* |
---|
| 1655 | + * PEBS gp_regs array indexes plus 1 so that 0 means not present. Refer |
---|
| 1656 | + * intel_pt_add_gp_regs(). |
---|
| 1657 | + */ |
---|
| 1658 | +static const int pebs_gp_regs[] = { |
---|
| 1659 | + [PERF_REG_X86_FLAGS] = 1, |
---|
| 1660 | + [PERF_REG_X86_IP] = 2, |
---|
| 1661 | + [PERF_REG_X86_AX] = 3, |
---|
| 1662 | + [PERF_REG_X86_CX] = 4, |
---|
| 1663 | + [PERF_REG_X86_DX] = 5, |
---|
| 1664 | + [PERF_REG_X86_BX] = 6, |
---|
| 1665 | + [PERF_REG_X86_SP] = 7, |
---|
| 1666 | + [PERF_REG_X86_BP] = 8, |
---|
| 1667 | + [PERF_REG_X86_SI] = 9, |
---|
| 1668 | + [PERF_REG_X86_DI] = 10, |
---|
| 1669 | + [PERF_REG_X86_R8] = 11, |
---|
| 1670 | + [PERF_REG_X86_R9] = 12, |
---|
| 1671 | + [PERF_REG_X86_R10] = 13, |
---|
| 1672 | + [PERF_REG_X86_R11] = 14, |
---|
| 1673 | + [PERF_REG_X86_R12] = 15, |
---|
| 1674 | + [PERF_REG_X86_R13] = 16, |
---|
| 1675 | + [PERF_REG_X86_R14] = 17, |
---|
| 1676 | + [PERF_REG_X86_R15] = 18, |
---|
| 1677 | +}; |
---|
| 1678 | + |
---|
| 1679 | +static u64 *intel_pt_add_gp_regs(struct regs_dump *intr_regs, u64 *pos, |
---|
| 1680 | + const struct intel_pt_blk_items *items, |
---|
| 1681 | + u64 regs_mask) |
---|
| 1682 | +{ |
---|
| 1683 | + const u64 *gp_regs = items->val[INTEL_PT_GP_REGS_POS]; |
---|
| 1684 | + u32 mask = items->mask[INTEL_PT_GP_REGS_POS]; |
---|
| 1685 | + u32 bit; |
---|
| 1686 | + int i; |
---|
| 1687 | + |
---|
| 1688 | + for (i = 0, bit = 1; i < PERF_REG_X86_64_MAX; i++, bit <<= 1) { |
---|
| 1689 | + /* Get the PEBS gp_regs array index */ |
---|
| 1690 | + int n = pebs_gp_regs[i] - 1; |
---|
| 1691 | + |
---|
| 1692 | + if (n < 0) |
---|
| 1693 | + continue; |
---|
| 1694 | + /* |
---|
| 1695 | + * Add only registers that were requested (i.e. 'regs_mask') and |
---|
| 1696 | + * that were provided (i.e. 'mask'), and update the resulting |
---|
| 1697 | + * mask (i.e. 'intr_regs->mask') accordingly. |
---|
| 1698 | + */ |
---|
| 1699 | + if (mask & 1 << n && regs_mask & bit) { |
---|
| 1700 | + intr_regs->mask |= bit; |
---|
| 1701 | + *pos++ = gp_regs[n]; |
---|
| 1702 | + } |
---|
| 1703 | + } |
---|
| 1704 | + |
---|
| 1705 | + return pos; |
---|
| 1706 | +} |
---|
| 1707 | + |
---|
| 1708 | +#ifndef PERF_REG_X86_XMM0 |
---|
| 1709 | +#define PERF_REG_X86_XMM0 32 |
---|
| 1710 | +#endif |
---|
| 1711 | + |
---|
| 1712 | +static void intel_pt_add_xmm(struct regs_dump *intr_regs, u64 *pos, |
---|
| 1713 | + const struct intel_pt_blk_items *items, |
---|
| 1714 | + u64 regs_mask) |
---|
| 1715 | +{ |
---|
| 1716 | + u32 mask = items->has_xmm & (regs_mask >> PERF_REG_X86_XMM0); |
---|
| 1717 | + const u64 *xmm = items->xmm; |
---|
| 1718 | + |
---|
| 1719 | + /* |
---|
| 1720 | + * If there are any XMM registers, then there should be all of them. |
---|
| 1721 | + * Nevertheless, follow the logic to add only registers that were |
---|
| 1722 | + * requested (i.e. 'regs_mask') and that were provided (i.e. 'mask'), |
---|
| 1723 | + * and update the resulting mask (i.e. 'intr_regs->mask') accordingly. |
---|
| 1724 | + */ |
---|
| 1725 | + intr_regs->mask |= (u64)mask << PERF_REG_X86_XMM0; |
---|
| 1726 | + |
---|
| 1727 | + for (; mask; mask >>= 1, xmm++) { |
---|
| 1728 | + if (mask & 1) |
---|
| 1729 | + *pos++ = *xmm; |
---|
| 1730 | + } |
---|
| 1731 | +} |
---|
| 1732 | + |
---|
| 1733 | +#define LBR_INFO_MISPRED (1ULL << 63) |
---|
| 1734 | +#define LBR_INFO_IN_TX (1ULL << 62) |
---|
| 1735 | +#define LBR_INFO_ABORT (1ULL << 61) |
---|
| 1736 | +#define LBR_INFO_CYCLES 0xffff |
---|
| 1737 | + |
---|
| 1738 | +/* Refer kernel's intel_pmu_store_pebs_lbrs() */ |
---|
| 1739 | +static u64 intel_pt_lbr_flags(u64 info) |
---|
| 1740 | +{ |
---|
| 1741 | + union { |
---|
| 1742 | + struct branch_flags flags; |
---|
| 1743 | + u64 result; |
---|
| 1744 | + } u; |
---|
| 1745 | + |
---|
| 1746 | + u.result = 0; |
---|
| 1747 | + u.flags.mispred = !!(info & LBR_INFO_MISPRED); |
---|
| 1748 | + u.flags.predicted = !(info & LBR_INFO_MISPRED); |
---|
| 1749 | + u.flags.in_tx = !!(info & LBR_INFO_IN_TX); |
---|
| 1750 | + u.flags.abort = !!(info & LBR_INFO_ABORT); |
---|
| 1751 | + u.flags.cycles = info & LBR_INFO_CYCLES; |
---|
| 1752 | + |
---|
| 1753 | + return u.result; |
---|
| 1754 | +} |
---|
| 1755 | + |
---|
| 1756 | +static void intel_pt_add_lbrs(struct branch_stack *br_stack, |
---|
| 1757 | + const struct intel_pt_blk_items *items) |
---|
| 1758 | +{ |
---|
| 1759 | + u64 *to; |
---|
| 1760 | + int i; |
---|
| 1761 | + |
---|
| 1762 | + br_stack->nr = 0; |
---|
| 1763 | + |
---|
| 1764 | + to = &br_stack->entries[0].from; |
---|
| 1765 | + |
---|
| 1766 | + for (i = INTEL_PT_LBR_0_POS; i <= INTEL_PT_LBR_2_POS; i++) { |
---|
| 1767 | + u32 mask = items->mask[i]; |
---|
| 1768 | + const u64 *from = items->val[i]; |
---|
| 1769 | + |
---|
| 1770 | + for (; mask; mask >>= 3, from += 3) { |
---|
| 1771 | + if ((mask & 7) == 7) { |
---|
| 1772 | + *to++ = from[0]; |
---|
| 1773 | + *to++ = from[1]; |
---|
| 1774 | + *to++ = intel_pt_lbr_flags(from[2]); |
---|
| 1775 | + br_stack->nr += 1; |
---|
| 1776 | + } |
---|
| 1777 | + } |
---|
| 1778 | + } |
---|
| 1779 | +} |
---|
| 1780 | + |
---|
| 1781 | +static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) |
---|
| 1782 | +{ |
---|
| 1783 | + const struct intel_pt_blk_items *items = &ptq->state->items; |
---|
| 1784 | + struct perf_sample sample = { .ip = 0, }; |
---|
| 1785 | + union perf_event *event = ptq->event_buf; |
---|
| 1786 | + struct intel_pt *pt = ptq->pt; |
---|
| 1787 | + struct evsel *evsel = pt->pebs_evsel; |
---|
| 1788 | + u64 sample_type = evsel->core.attr.sample_type; |
---|
| 1789 | + u64 id = evsel->core.id[0]; |
---|
| 1790 | + u8 cpumode; |
---|
| 1791 | + u64 regs[8 * sizeof(sample.intr_regs.mask)]; |
---|
| 1792 | + |
---|
| 1793 | + if (intel_pt_skip_event(pt)) |
---|
| 1794 | + return 0; |
---|
| 1795 | + |
---|
| 1796 | + intel_pt_prep_a_sample(ptq, event, &sample); |
---|
| 1797 | + |
---|
| 1798 | + sample.id = id; |
---|
| 1799 | + sample.stream_id = id; |
---|
| 1800 | + |
---|
| 1801 | + if (!evsel->core.attr.freq) |
---|
| 1802 | + sample.period = evsel->core.attr.sample_period; |
---|
| 1803 | + |
---|
| 1804 | + /* No support for non-zero CS base */ |
---|
| 1805 | + if (items->has_ip) |
---|
| 1806 | + sample.ip = items->ip; |
---|
| 1807 | + else if (items->has_rip) |
---|
| 1808 | + sample.ip = items->rip; |
---|
| 1809 | + else |
---|
| 1810 | + sample.ip = ptq->state->from_ip; |
---|
| 1811 | + |
---|
| 1812 | + /* No support for guest mode at this time */ |
---|
| 1813 | + cpumode = sample.ip < ptq->pt->kernel_start ? |
---|
| 1814 | + PERF_RECORD_MISC_USER : |
---|
| 1815 | + PERF_RECORD_MISC_KERNEL; |
---|
| 1816 | + |
---|
| 1817 | + event->sample.header.misc = cpumode | PERF_RECORD_MISC_EXACT_IP; |
---|
| 1818 | + |
---|
| 1819 | + sample.cpumode = cpumode; |
---|
| 1820 | + |
---|
| 1821 | + if (sample_type & PERF_SAMPLE_TIME) { |
---|
| 1822 | + u64 timestamp = 0; |
---|
| 1823 | + |
---|
| 1824 | + if (items->has_timestamp) |
---|
| 1825 | + timestamp = items->timestamp; |
---|
| 1826 | + else if (!pt->timeless_decoding) |
---|
| 1827 | + timestamp = ptq->timestamp; |
---|
| 1828 | + if (timestamp) |
---|
| 1829 | + sample.time = tsc_to_perf_time(timestamp, &pt->tc); |
---|
| 1830 | + } |
---|
| 1831 | + |
---|
| 1832 | + if (sample_type & PERF_SAMPLE_CALLCHAIN && |
---|
| 1833 | + pt->synth_opts.callchain) { |
---|
| 1834 | + thread_stack__sample(ptq->thread, ptq->cpu, ptq->chain, |
---|
| 1835 | + pt->synth_opts.callchain_sz, sample.ip, |
---|
| 1836 | + pt->kernel_start); |
---|
| 1837 | + sample.callchain = ptq->chain; |
---|
| 1838 | + } |
---|
| 1839 | + |
---|
| 1840 | + if (sample_type & PERF_SAMPLE_REGS_INTR && |
---|
| 1841 | + (items->mask[INTEL_PT_GP_REGS_POS] || |
---|
| 1842 | + items->mask[INTEL_PT_XMM_POS])) { |
---|
| 1843 | + u64 regs_mask = evsel->core.attr.sample_regs_intr; |
---|
| 1844 | + u64 *pos; |
---|
| 1845 | + |
---|
| 1846 | + sample.intr_regs.abi = items->is_32_bit ? |
---|
| 1847 | + PERF_SAMPLE_REGS_ABI_32 : |
---|
| 1848 | + PERF_SAMPLE_REGS_ABI_64; |
---|
| 1849 | + sample.intr_regs.regs = regs; |
---|
| 1850 | + |
---|
| 1851 | + pos = intel_pt_add_gp_regs(&sample.intr_regs, regs, items, regs_mask); |
---|
| 1852 | + |
---|
| 1853 | + intel_pt_add_xmm(&sample.intr_regs, pos, items, regs_mask); |
---|
| 1854 | + } |
---|
| 1855 | + |
---|
| 1856 | + if (sample_type & PERF_SAMPLE_BRANCH_STACK) { |
---|
| 1857 | + if (items->mask[INTEL_PT_LBR_0_POS] || |
---|
| 1858 | + items->mask[INTEL_PT_LBR_1_POS] || |
---|
| 1859 | + items->mask[INTEL_PT_LBR_2_POS]) { |
---|
| 1860 | + intel_pt_add_lbrs(ptq->last_branch, items); |
---|
| 1861 | + } else if (pt->synth_opts.last_branch) { |
---|
| 1862 | + thread_stack__br_sample(ptq->thread, ptq->cpu, |
---|
| 1863 | + ptq->last_branch, |
---|
| 1864 | + pt->br_stack_sz); |
---|
| 1865 | + } else { |
---|
| 1866 | + ptq->last_branch->nr = 0; |
---|
| 1867 | + } |
---|
| 1868 | + sample.branch_stack = ptq->last_branch; |
---|
| 1869 | + } |
---|
| 1870 | + |
---|
| 1871 | + if (sample_type & PERF_SAMPLE_ADDR && items->has_mem_access_address) |
---|
| 1872 | + sample.addr = items->mem_access_address; |
---|
| 1873 | + |
---|
| 1874 | + if (sample_type & PERF_SAMPLE_WEIGHT) { |
---|
| 1875 | + /* |
---|
| 1876 | + * Refer kernel's setup_pebs_adaptive_sample_data() and |
---|
| 1877 | + * intel_hsw_weight(). |
---|
| 1878 | + */ |
---|
| 1879 | + if (items->has_mem_access_latency) |
---|
| 1880 | + sample.weight = items->mem_access_latency; |
---|
| 1881 | + if (!sample.weight && items->has_tsx_aux_info) { |
---|
| 1882 | + /* Cycles last block */ |
---|
| 1883 | + sample.weight = (u32)items->tsx_aux_info; |
---|
| 1884 | + } |
---|
| 1885 | + } |
---|
| 1886 | + |
---|
| 1887 | + if (sample_type & PERF_SAMPLE_TRANSACTION && items->has_tsx_aux_info) { |
---|
| 1888 | + u64 ax = items->has_rax ? items->rax : 0; |
---|
| 1889 | + /* Refer kernel's intel_hsw_transaction() */ |
---|
| 1890 | + u64 txn = (u8)(items->tsx_aux_info >> 32); |
---|
| 1891 | + |
---|
| 1892 | + /* For RTM XABORTs also log the abort code from AX */ |
---|
| 1893 | + if (txn & PERF_TXN_TRANSACTION && ax & 1) |
---|
| 1894 | + txn |= ((ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT; |
---|
| 1895 | + sample.transaction = txn; |
---|
| 1896 | + } |
---|
| 1897 | + |
---|
| 1898 | + return intel_pt_deliver_synth_event(pt, event, &sample, sample_type); |
---|
| 1899 | +} |
---|
| 1900 | + |
---|
1403 | 1901 | static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, |
---|
1404 | | - pid_t pid, pid_t tid, u64 ip) |
---|
| 1902 | + pid_t pid, pid_t tid, u64 ip, u64 timestamp) |
---|
1405 | 1903 | { |
---|
1406 | 1904 | union perf_event event; |
---|
1407 | 1905 | char msg[MAX_AUXTRACE_ERROR_MSG]; |
---|
1408 | 1906 | int err; |
---|
1409 | 1907 | |
---|
| 1908 | + if (pt->synth_opts.error_minus_flags) { |
---|
| 1909 | + if (code == INTEL_PT_ERR_OVR && |
---|
| 1910 | + pt->synth_opts.error_minus_flags & AUXTRACE_ERR_FLG_OVERFLOW) |
---|
| 1911 | + return 0; |
---|
| 1912 | + if (code == INTEL_PT_ERR_LOST && |
---|
| 1913 | + pt->synth_opts.error_minus_flags & AUXTRACE_ERR_FLG_DATA_LOST) |
---|
| 1914 | + return 0; |
---|
| 1915 | + } |
---|
| 1916 | + |
---|
1410 | 1917 | intel_pt__strerror(code, msg, MAX_AUXTRACE_ERROR_MSG); |
---|
1411 | 1918 | |
---|
1412 | 1919 | auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, |
---|
1413 | | - code, cpu, pid, tid, ip, msg); |
---|
| 1920 | + code, cpu, pid, tid, ip, msg, timestamp); |
---|
1414 | 1921 | |
---|
1415 | 1922 | err = perf_session__deliver_synth_event(pt->session, &event, NULL); |
---|
1416 | 1923 | if (err) |
---|
.. | .. |
---|
1418 | 1925 | err); |
---|
1419 | 1926 | |
---|
1420 | 1927 | return err; |
---|
| 1928 | +} |
---|
| 1929 | + |
---|
| 1930 | +static int intel_ptq_synth_error(struct intel_pt_queue *ptq, |
---|
| 1931 | + const struct intel_pt_state *state) |
---|
| 1932 | +{ |
---|
| 1933 | + struct intel_pt *pt = ptq->pt; |
---|
| 1934 | + u64 tm = ptq->timestamp; |
---|
| 1935 | + |
---|
| 1936 | + tm = pt->timeless_decoding ? 0 : tsc_to_perf_time(tm, &pt->tc); |
---|
| 1937 | + |
---|
| 1938 | + return intel_pt_synth_error(pt, state->err, ptq->cpu, ptq->pid, |
---|
| 1939 | + ptq->tid, state->from_ip, tm); |
---|
1421 | 1940 | } |
---|
1422 | 1941 | |
---|
1423 | 1942 | static int intel_pt_next_tid(struct intel_pt *pt, struct intel_pt_queue *ptq) |
---|
.. | .. |
---|
1452 | 1971 | } |
---|
1453 | 1972 | |
---|
1454 | 1973 | #define INTEL_PT_PWR_EVT (INTEL_PT_MWAIT_OP | INTEL_PT_PWR_ENTRY | \ |
---|
1455 | | - INTEL_PT_EX_STOP | INTEL_PT_PWR_EXIT | \ |
---|
1456 | | - INTEL_PT_CBR_CHG) |
---|
| 1974 | + INTEL_PT_EX_STOP | INTEL_PT_PWR_EXIT) |
---|
1457 | 1975 | |
---|
1458 | 1976 | static int intel_pt_sample(struct intel_pt_queue *ptq) |
---|
1459 | 1977 | { |
---|
.. | .. |
---|
1466 | 1984 | |
---|
1467 | 1985 | ptq->have_sample = false; |
---|
1468 | 1986 | |
---|
1469 | | - if (pt->sample_pwr_events && (state->type & INTEL_PT_PWR_EVT)) { |
---|
1470 | | - if (state->type & INTEL_PT_CBR_CHG) { |
---|
| 1987 | + ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt; |
---|
| 1988 | + ptq->ipc_cyc_cnt = ptq->state->tot_cyc_cnt; |
---|
| 1989 | + |
---|
| 1990 | + /* |
---|
| 1991 | + * Do PEBS first to allow for the possibility that the PEBS timestamp |
---|
| 1992 | + * precedes the current timestamp. |
---|
| 1993 | + */ |
---|
| 1994 | + if (pt->sample_pebs && state->type & INTEL_PT_BLK_ITEMS) { |
---|
| 1995 | + err = intel_pt_synth_pebs_sample(ptq); |
---|
| 1996 | + if (err) |
---|
| 1997 | + return err; |
---|
| 1998 | + } |
---|
| 1999 | + |
---|
| 2000 | + if (pt->sample_pwr_events) { |
---|
| 2001 | + if (ptq->state->cbr != ptq->cbr_seen) { |
---|
1471 | 2002 | err = intel_pt_synth_cbr_sample(ptq); |
---|
1472 | 2003 | if (err) |
---|
1473 | 2004 | return err; |
---|
1474 | 2005 | } |
---|
1475 | | - if (state->type & INTEL_PT_MWAIT_OP) { |
---|
1476 | | - err = intel_pt_synth_mwait_sample(ptq); |
---|
1477 | | - if (err) |
---|
1478 | | - return err; |
---|
1479 | | - } |
---|
1480 | | - if (state->type & INTEL_PT_PWR_ENTRY) { |
---|
1481 | | - err = intel_pt_synth_pwre_sample(ptq); |
---|
1482 | | - if (err) |
---|
1483 | | - return err; |
---|
1484 | | - } |
---|
1485 | | - if (state->type & INTEL_PT_EX_STOP) { |
---|
1486 | | - err = intel_pt_synth_exstop_sample(ptq); |
---|
1487 | | - if (err) |
---|
1488 | | - return err; |
---|
1489 | | - } |
---|
1490 | | - if (state->type & INTEL_PT_PWR_EXIT) { |
---|
1491 | | - err = intel_pt_synth_pwrx_sample(ptq); |
---|
1492 | | - if (err) |
---|
1493 | | - return err; |
---|
| 2006 | + if (state->type & INTEL_PT_PWR_EVT) { |
---|
| 2007 | + if (state->type & INTEL_PT_MWAIT_OP) { |
---|
| 2008 | + err = intel_pt_synth_mwait_sample(ptq); |
---|
| 2009 | + if (err) |
---|
| 2010 | + return err; |
---|
| 2011 | + } |
---|
| 2012 | + if (state->type & INTEL_PT_PWR_ENTRY) { |
---|
| 2013 | + err = intel_pt_synth_pwre_sample(ptq); |
---|
| 2014 | + if (err) |
---|
| 2015 | + return err; |
---|
| 2016 | + } |
---|
| 2017 | + if (state->type & INTEL_PT_EX_STOP) { |
---|
| 2018 | + err = intel_pt_synth_exstop_sample(ptq); |
---|
| 2019 | + if (err) |
---|
| 2020 | + return err; |
---|
| 2021 | + } |
---|
| 2022 | + if (state->type & INTEL_PT_PWR_EXIT) { |
---|
| 2023 | + err = intel_pt_synth_pwrx_sample(ptq); |
---|
| 2024 | + if (err) |
---|
| 2025 | + return err; |
---|
| 2026 | + } |
---|
1494 | 2027 | } |
---|
1495 | 2028 | } |
---|
1496 | 2029 | |
---|
.. | .. |
---|
1515 | 2048 | if (!(state->type & INTEL_PT_BRANCH)) |
---|
1516 | 2049 | return 0; |
---|
1517 | 2050 | |
---|
1518 | | - if (pt->synth_opts.callchain || pt->synth_opts.thread_stack) |
---|
1519 | | - thread_stack__event(ptq->thread, ptq->flags, state->from_ip, |
---|
1520 | | - state->to_ip, ptq->insn_len, |
---|
1521 | | - state->trace_nr); |
---|
1522 | | - else |
---|
1523 | | - thread_stack__set_trace_nr(ptq->thread, state->trace_nr); |
---|
| 2051 | + if (pt->use_thread_stack) { |
---|
| 2052 | + thread_stack__event(ptq->thread, ptq->cpu, ptq->flags, |
---|
| 2053 | + state->from_ip, state->to_ip, ptq->insn_len, |
---|
| 2054 | + state->trace_nr, pt->callstack, |
---|
| 2055 | + pt->br_stack_sz_plus, |
---|
| 2056 | + pt->mispred_all); |
---|
| 2057 | + } else { |
---|
| 2058 | + thread_stack__set_trace_nr(ptq->thread, ptq->cpu, state->trace_nr); |
---|
| 2059 | + } |
---|
1524 | 2060 | |
---|
1525 | 2061 | if (pt->sample_branches) { |
---|
1526 | 2062 | err = intel_pt_synth_branch_sample(ptq); |
---|
1527 | 2063 | if (err) |
---|
1528 | 2064 | return err; |
---|
1529 | 2065 | } |
---|
1530 | | - |
---|
1531 | | - if (pt->synth_opts.last_branch) |
---|
1532 | | - intel_pt_update_last_branch_rb(ptq); |
---|
1533 | 2066 | |
---|
1534 | 2067 | if (!ptq->sync_switch) |
---|
1535 | 2068 | return 0; |
---|
.. | .. |
---|
1628 | 2161 | } |
---|
1629 | 2162 | } |
---|
1630 | 2163 | |
---|
| 2164 | +/* |
---|
| 2165 | + * To filter against time ranges, it is only necessary to look at the next start |
---|
| 2166 | + * or end time. |
---|
| 2167 | + */ |
---|
| 2168 | +static bool intel_pt_next_time(struct intel_pt_queue *ptq) |
---|
| 2169 | +{ |
---|
| 2170 | + struct intel_pt *pt = ptq->pt; |
---|
| 2171 | + |
---|
| 2172 | + if (ptq->sel_start) { |
---|
| 2173 | + /* Next time is an end time */ |
---|
| 2174 | + ptq->sel_start = false; |
---|
| 2175 | + ptq->sel_timestamp = pt->time_ranges[ptq->sel_idx].end; |
---|
| 2176 | + return true; |
---|
| 2177 | + } else if (ptq->sel_idx + 1 < pt->range_cnt) { |
---|
| 2178 | + /* Next time is a start time */ |
---|
| 2179 | + ptq->sel_start = true; |
---|
| 2180 | + ptq->sel_idx += 1; |
---|
| 2181 | + ptq->sel_timestamp = pt->time_ranges[ptq->sel_idx].start; |
---|
| 2182 | + return true; |
---|
| 2183 | + } |
---|
| 2184 | + |
---|
| 2185 | + /* No next time */ |
---|
| 2186 | + return false; |
---|
| 2187 | +} |
---|
| 2188 | + |
---|
| 2189 | +static int intel_pt_time_filter(struct intel_pt_queue *ptq, u64 *ff_timestamp) |
---|
| 2190 | +{ |
---|
| 2191 | + int err; |
---|
| 2192 | + |
---|
| 2193 | + while (1) { |
---|
| 2194 | + if (ptq->sel_start) { |
---|
| 2195 | + if (ptq->timestamp >= ptq->sel_timestamp) { |
---|
| 2196 | + /* After start time, so consider next time */ |
---|
| 2197 | + intel_pt_next_time(ptq); |
---|
| 2198 | + if (!ptq->sel_timestamp) { |
---|
| 2199 | + /* No end time */ |
---|
| 2200 | + return 0; |
---|
| 2201 | + } |
---|
| 2202 | + /* Check against end time */ |
---|
| 2203 | + continue; |
---|
| 2204 | + } |
---|
| 2205 | + /* Before start time, so fast forward */ |
---|
| 2206 | + ptq->have_sample = false; |
---|
| 2207 | + if (ptq->sel_timestamp > *ff_timestamp) { |
---|
| 2208 | + if (ptq->sync_switch) { |
---|
| 2209 | + intel_pt_next_tid(ptq->pt, ptq); |
---|
| 2210 | + ptq->switch_state = INTEL_PT_SS_UNKNOWN; |
---|
| 2211 | + } |
---|
| 2212 | + *ff_timestamp = ptq->sel_timestamp; |
---|
| 2213 | + err = intel_pt_fast_forward(ptq->decoder, |
---|
| 2214 | + ptq->sel_timestamp); |
---|
| 2215 | + if (err) |
---|
| 2216 | + return err; |
---|
| 2217 | + } |
---|
| 2218 | + return 0; |
---|
| 2219 | + } else if (ptq->timestamp > ptq->sel_timestamp) { |
---|
| 2220 | + /* After end time, so consider next time */ |
---|
| 2221 | + if (!intel_pt_next_time(ptq)) { |
---|
| 2222 | + /* No next time range, so stop decoding */ |
---|
| 2223 | + ptq->have_sample = false; |
---|
| 2224 | + ptq->switch_state = INTEL_PT_SS_NOT_TRACING; |
---|
| 2225 | + return 1; |
---|
| 2226 | + } |
---|
| 2227 | + /* Check against next start time */ |
---|
| 2228 | + continue; |
---|
| 2229 | + } else { |
---|
| 2230 | + /* Before end time */ |
---|
| 2231 | + return 0; |
---|
| 2232 | + } |
---|
| 2233 | + } |
---|
| 2234 | +} |
---|
| 2235 | + |
---|
1631 | 2236 | static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp) |
---|
1632 | 2237 | { |
---|
1633 | 2238 | const struct intel_pt_state *state = ptq->state; |
---|
1634 | 2239 | struct intel_pt *pt = ptq->pt; |
---|
| 2240 | + u64 ff_timestamp = 0; |
---|
1635 | 2241 | int err; |
---|
1636 | 2242 | |
---|
1637 | 2243 | if (!pt->kernel_start) { |
---|
.. | .. |
---|
1665 | 2271 | ptq->sync_switch = false; |
---|
1666 | 2272 | intel_pt_next_tid(pt, ptq); |
---|
1667 | 2273 | } |
---|
| 2274 | + ptq->timestamp = state->est_timestamp; |
---|
1668 | 2275 | if (pt->synth_opts.errors) { |
---|
1669 | | - err = intel_pt_synth_error(pt, state->err, |
---|
1670 | | - ptq->cpu, ptq->pid, |
---|
1671 | | - ptq->tid, |
---|
1672 | | - state->from_ip); |
---|
| 2276 | + err = intel_ptq_synth_error(ptq, state); |
---|
1673 | 2277 | if (err) |
---|
1674 | 2278 | return err; |
---|
1675 | 2279 | } |
---|
.. | .. |
---|
1697 | 2301 | ptq->timestamp = state->est_timestamp; |
---|
1698 | 2302 | } else if (state->timestamp > ptq->timestamp) { |
---|
1699 | 2303 | ptq->timestamp = state->timestamp; |
---|
| 2304 | + } |
---|
| 2305 | + |
---|
| 2306 | + if (ptq->sel_timestamp) { |
---|
| 2307 | + err = intel_pt_time_filter(ptq, &ff_timestamp); |
---|
| 2308 | + if (err) |
---|
| 2309 | + return err; |
---|
1700 | 2310 | } |
---|
1701 | 2311 | |
---|
1702 | 2312 | if (!pt->timeless_decoding && ptq->timestamp >= *timestamp) { |
---|
.. | .. |
---|
1791 | 2401 | return 0; |
---|
1792 | 2402 | } |
---|
1793 | 2403 | |
---|
| 2404 | +static void intel_pt_sample_set_pid_tid_cpu(struct intel_pt_queue *ptq, |
---|
| 2405 | + struct auxtrace_queue *queue, |
---|
| 2406 | + struct perf_sample *sample) |
---|
| 2407 | +{ |
---|
| 2408 | + struct machine *m = ptq->pt->machine; |
---|
| 2409 | + |
---|
| 2410 | + ptq->pid = sample->pid; |
---|
| 2411 | + ptq->tid = sample->tid; |
---|
| 2412 | + ptq->cpu = queue->cpu; |
---|
| 2413 | + |
---|
| 2414 | + intel_pt_log("queue %u cpu %d pid %d tid %d\n", |
---|
| 2415 | + ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid); |
---|
| 2416 | + |
---|
| 2417 | + thread__zput(ptq->thread); |
---|
| 2418 | + |
---|
| 2419 | + if (ptq->tid == -1) |
---|
| 2420 | + return; |
---|
| 2421 | + |
---|
| 2422 | + if (ptq->pid == -1) { |
---|
| 2423 | + ptq->thread = machine__find_thread(m, -1, ptq->tid); |
---|
| 2424 | + if (ptq->thread) |
---|
| 2425 | + ptq->pid = ptq->thread->pid_; |
---|
| 2426 | + return; |
---|
| 2427 | + } |
---|
| 2428 | + |
---|
| 2429 | + ptq->thread = machine__findnew_thread(m, ptq->pid, ptq->tid); |
---|
| 2430 | +} |
---|
| 2431 | + |
---|
| 2432 | +static int intel_pt_process_timeless_sample(struct intel_pt *pt, |
---|
| 2433 | + struct perf_sample *sample) |
---|
| 2434 | +{ |
---|
| 2435 | + struct auxtrace_queue *queue; |
---|
| 2436 | + struct intel_pt_queue *ptq; |
---|
| 2437 | + u64 ts = 0; |
---|
| 2438 | + |
---|
| 2439 | + queue = auxtrace_queues__sample_queue(&pt->queues, sample, pt->session); |
---|
| 2440 | + if (!queue) |
---|
| 2441 | + return -EINVAL; |
---|
| 2442 | + |
---|
| 2443 | + ptq = queue->priv; |
---|
| 2444 | + if (!ptq) |
---|
| 2445 | + return 0; |
---|
| 2446 | + |
---|
| 2447 | + ptq->stop = false; |
---|
| 2448 | + ptq->time = sample->time; |
---|
| 2449 | + intel_pt_sample_set_pid_tid_cpu(ptq, queue, sample); |
---|
| 2450 | + intel_pt_run_decoder(ptq, &ts); |
---|
| 2451 | + return 0; |
---|
| 2452 | +} |
---|
| 2453 | + |
---|
1794 | 2454 | static int intel_pt_lost(struct intel_pt *pt, struct perf_sample *sample) |
---|
1795 | 2455 | { |
---|
1796 | 2456 | return intel_pt_synth_error(pt, INTEL_PT_ERR_LOST, sample->cpu, |
---|
1797 | | - sample->pid, sample->tid, 0); |
---|
| 2457 | + sample->pid, sample->tid, 0, sample->time); |
---|
1798 | 2458 | } |
---|
1799 | 2459 | |
---|
1800 | 2460 | static struct intel_pt_queue *intel_pt_cpu_to_ptq(struct intel_pt *pt, int cpu) |
---|
.. | .. |
---|
1840 | 2500 | |
---|
1841 | 2501 | switch (ptq->switch_state) { |
---|
1842 | 2502 | case INTEL_PT_SS_NOT_TRACING: |
---|
1843 | | - ptq->next_tid = -1; |
---|
1844 | 2503 | break; |
---|
1845 | 2504 | case INTEL_PT_SS_UNKNOWN: |
---|
1846 | 2505 | case INTEL_PT_SS_TRACING: |
---|
.. | .. |
---|
1860 | 2519 | ptq->switch_state = INTEL_PT_SS_TRACING; |
---|
1861 | 2520 | break; |
---|
1862 | 2521 | case INTEL_PT_SS_EXPECTING_SWITCH_IP: |
---|
1863 | | - ptq->next_tid = tid; |
---|
1864 | 2522 | intel_pt_log("ERROR: cpu %d expecting switch ip\n", cpu); |
---|
1865 | 2523 | break; |
---|
1866 | 2524 | default: |
---|
1867 | 2525 | break; |
---|
1868 | 2526 | } |
---|
| 2527 | + |
---|
| 2528 | + ptq->next_tid = -1; |
---|
1869 | 2529 | |
---|
1870 | 2530 | return 1; |
---|
1871 | 2531 | } |
---|
.. | .. |
---|
1873 | 2533 | static int intel_pt_process_switch(struct intel_pt *pt, |
---|
1874 | 2534 | struct perf_sample *sample) |
---|
1875 | 2535 | { |
---|
1876 | | - struct perf_evsel *evsel; |
---|
| 2536 | + struct evsel *evsel; |
---|
1877 | 2537 | pid_t tid; |
---|
1878 | 2538 | int cpu, ret; |
---|
1879 | 2539 | |
---|
.. | .. |
---|
1881 | 2541 | if (evsel != pt->switch_evsel) |
---|
1882 | 2542 | return 0; |
---|
1883 | 2543 | |
---|
1884 | | - tid = perf_evsel__intval(evsel, sample, "next_pid"); |
---|
| 2544 | + tid = evsel__intval(evsel, sample, "next_pid"); |
---|
1885 | 2545 | cpu = sample->cpu; |
---|
1886 | 2546 | |
---|
1887 | 2547 | intel_pt_log("sched_switch: cpu %d tid %d time %"PRIu64" tsc %#"PRIx64"\n", |
---|
.. | .. |
---|
1895 | 2555 | return machine__set_current_tid(pt->machine, cpu, -1, tid); |
---|
1896 | 2556 | } |
---|
1897 | 2557 | |
---|
| 2558 | +static int intel_pt_context_switch_in(struct intel_pt *pt, |
---|
| 2559 | + struct perf_sample *sample) |
---|
| 2560 | +{ |
---|
| 2561 | + pid_t pid = sample->pid; |
---|
| 2562 | + pid_t tid = sample->tid; |
---|
| 2563 | + int cpu = sample->cpu; |
---|
| 2564 | + |
---|
| 2565 | + if (pt->sync_switch) { |
---|
| 2566 | + struct intel_pt_queue *ptq; |
---|
| 2567 | + |
---|
| 2568 | + ptq = intel_pt_cpu_to_ptq(pt, cpu); |
---|
| 2569 | + if (ptq && ptq->sync_switch) { |
---|
| 2570 | + ptq->next_tid = -1; |
---|
| 2571 | + switch (ptq->switch_state) { |
---|
| 2572 | + case INTEL_PT_SS_NOT_TRACING: |
---|
| 2573 | + case INTEL_PT_SS_UNKNOWN: |
---|
| 2574 | + case INTEL_PT_SS_TRACING: |
---|
| 2575 | + break; |
---|
| 2576 | + case INTEL_PT_SS_EXPECTING_SWITCH_EVENT: |
---|
| 2577 | + case INTEL_PT_SS_EXPECTING_SWITCH_IP: |
---|
| 2578 | + ptq->switch_state = INTEL_PT_SS_TRACING; |
---|
| 2579 | + break; |
---|
| 2580 | + default: |
---|
| 2581 | + break; |
---|
| 2582 | + } |
---|
| 2583 | + } |
---|
| 2584 | + } |
---|
| 2585 | + |
---|
| 2586 | + /* |
---|
| 2587 | + * If the current tid has not been updated yet, ensure it is now that |
---|
| 2588 | + * a "switch in" event has occurred. |
---|
| 2589 | + */ |
---|
| 2590 | + if (machine__get_current_tid(pt->machine, cpu) == tid) |
---|
| 2591 | + return 0; |
---|
| 2592 | + |
---|
| 2593 | + return machine__set_current_tid(pt->machine, cpu, pid, tid); |
---|
| 2594 | +} |
---|
| 2595 | + |
---|
1898 | 2596 | static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event, |
---|
1899 | 2597 | struct perf_sample *sample) |
---|
1900 | 2598 | { |
---|
.. | .. |
---|
1906 | 2604 | |
---|
1907 | 2605 | if (pt->have_sched_switch == 3) { |
---|
1908 | 2606 | if (!out) |
---|
1909 | | - return 0; |
---|
| 2607 | + return intel_pt_context_switch_in(pt, sample); |
---|
1910 | 2608 | if (event->header.type != PERF_RECORD_SWITCH_CPU_WIDE) { |
---|
1911 | 2609 | pr_err("Expecting CPU-wide context switch event\n"); |
---|
1912 | 2610 | return -EINVAL; |
---|
.. | .. |
---|
1922 | 2620 | |
---|
1923 | 2621 | if (tid == -1) |
---|
1924 | 2622 | intel_pt_log("context_switch event has no tid\n"); |
---|
1925 | | - |
---|
1926 | | - intel_pt_log("context_switch: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n", |
---|
1927 | | - cpu, pid, tid, sample->time, perf_time_to_tsc(sample->time, |
---|
1928 | | - &pt->tc)); |
---|
1929 | 2623 | |
---|
1930 | 2624 | ret = intel_pt_sync_switch(pt, cpu, tid, sample->time); |
---|
1931 | 2625 | if (ret <= 0) |
---|
.. | .. |
---|
1949 | 2643 | return machine__set_current_tid(pt->machine, sample->cpu, |
---|
1950 | 2644 | event->itrace_start.pid, |
---|
1951 | 2645 | event->itrace_start.tid); |
---|
| 2646 | +} |
---|
| 2647 | + |
---|
| 2648 | +static int intel_pt_find_map(struct thread *thread, u8 cpumode, u64 addr, |
---|
| 2649 | + struct addr_location *al) |
---|
| 2650 | +{ |
---|
| 2651 | + if (!al->map || addr < al->map->start || addr >= al->map->end) { |
---|
| 2652 | + if (!thread__find_map(thread, cpumode, addr, al)) |
---|
| 2653 | + return -1; |
---|
| 2654 | + } |
---|
| 2655 | + |
---|
| 2656 | + return 0; |
---|
| 2657 | +} |
---|
| 2658 | + |
---|
| 2659 | +/* Invalidate all instruction cache entries that overlap the text poke */ |
---|
| 2660 | +static int intel_pt_text_poke(struct intel_pt *pt, union perf_event *event) |
---|
| 2661 | +{ |
---|
| 2662 | + u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; |
---|
| 2663 | + u64 addr = event->text_poke.addr + event->text_poke.new_len - 1; |
---|
| 2664 | + /* Assume text poke begins in a basic block no more than 4096 bytes */ |
---|
| 2665 | + int cnt = 4096 + event->text_poke.new_len; |
---|
| 2666 | + struct thread *thread = pt->unknown_thread; |
---|
| 2667 | + struct addr_location al = { .map = NULL }; |
---|
| 2668 | + struct machine *machine = pt->machine; |
---|
| 2669 | + struct intel_pt_cache_entry *e; |
---|
| 2670 | + u64 offset; |
---|
| 2671 | + |
---|
| 2672 | + if (!event->text_poke.new_len) |
---|
| 2673 | + return 0; |
---|
| 2674 | + |
---|
| 2675 | + for (; cnt; cnt--, addr--) { |
---|
| 2676 | + if (intel_pt_find_map(thread, cpumode, addr, &al)) { |
---|
| 2677 | + if (addr < event->text_poke.addr) |
---|
| 2678 | + return 0; |
---|
| 2679 | + continue; |
---|
| 2680 | + } |
---|
| 2681 | + |
---|
| 2682 | + if (!al.map->dso || !al.map->dso->auxtrace_cache) |
---|
| 2683 | + continue; |
---|
| 2684 | + |
---|
| 2685 | + offset = al.map->map_ip(al.map, addr); |
---|
| 2686 | + |
---|
| 2687 | + e = intel_pt_cache_lookup(al.map->dso, machine, offset); |
---|
| 2688 | + if (!e) |
---|
| 2689 | + continue; |
---|
| 2690 | + |
---|
| 2691 | + if (addr + e->byte_cnt + e->length <= event->text_poke.addr) { |
---|
| 2692 | + /* |
---|
| 2693 | + * No overlap. Working backwards there cannot be another |
---|
| 2694 | + * basic block that overlaps the text poke if there is a |
---|
| 2695 | + * branch instruction before the text poke address. |
---|
| 2696 | + */ |
---|
| 2697 | + if (e->branch != INTEL_PT_BR_NO_BRANCH) |
---|
| 2698 | + return 0; |
---|
| 2699 | + } else { |
---|
| 2700 | + intel_pt_cache_invalidate(al.map->dso, machine, offset); |
---|
| 2701 | + intel_pt_log("Invalidated instruction cache for %s at %#"PRIx64"\n", |
---|
| 2702 | + al.map->dso->long_name, addr); |
---|
| 2703 | + } |
---|
| 2704 | + } |
---|
| 2705 | + |
---|
| 2706 | + return 0; |
---|
1952 | 2707 | } |
---|
1953 | 2708 | |
---|
1954 | 2709 | static int intel_pt_process_event(struct perf_session *session, |
---|
.. | .. |
---|
1981 | 2736 | } |
---|
1982 | 2737 | |
---|
1983 | 2738 | if (pt->timeless_decoding) { |
---|
1984 | | - if (event->header.type == PERF_RECORD_EXIT) { |
---|
| 2739 | + if (pt->sampling_mode) { |
---|
| 2740 | + if (sample->aux_sample.size) |
---|
| 2741 | + err = intel_pt_process_timeless_sample(pt, |
---|
| 2742 | + sample); |
---|
| 2743 | + } else if (event->header.type == PERF_RECORD_EXIT) { |
---|
1985 | 2744 | err = intel_pt_process_timeless_queues(pt, |
---|
1986 | 2745 | event->fork.tid, |
---|
1987 | 2746 | sample->time); |
---|
.. | .. |
---|
1991 | 2750 | } |
---|
1992 | 2751 | if (err) |
---|
1993 | 2752 | return err; |
---|
| 2753 | + |
---|
| 2754 | + if (event->header.type == PERF_RECORD_SAMPLE) { |
---|
| 2755 | + if (pt->synth_opts.add_callchain && !sample->callchain) |
---|
| 2756 | + intel_pt_add_callchain(pt, sample); |
---|
| 2757 | + if (pt->synth_opts.add_last_branch && !sample->branch_stack) |
---|
| 2758 | + intel_pt_add_br_stack(pt, sample); |
---|
| 2759 | + } |
---|
1994 | 2760 | |
---|
1995 | 2761 | if (event->header.type == PERF_RECORD_AUX && |
---|
1996 | 2762 | (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) && |
---|
.. | .. |
---|
2008 | 2774 | event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) |
---|
2009 | 2775 | err = intel_pt_context_switch(pt, event, sample); |
---|
2010 | 2776 | |
---|
2011 | | - intel_pt_log("event %s (%u): cpu %d time %"PRIu64" tsc %#"PRIx64"\n", |
---|
2012 | | - perf_event__name(event->header.type), event->header.type, |
---|
2013 | | - sample->cpu, sample->time, timestamp); |
---|
| 2777 | + if (!err && event->header.type == PERF_RECORD_TEXT_POKE) |
---|
| 2778 | + err = intel_pt_text_poke(pt, event); |
---|
| 2779 | + |
---|
| 2780 | + if (intel_pt_enable_logging && intel_pt_log_events(pt, sample->time)) { |
---|
| 2781 | + intel_pt_log("event %u: cpu %d time %"PRIu64" tsc %#"PRIx64" ", |
---|
| 2782 | + event->header.type, sample->cpu, sample->time, timestamp); |
---|
| 2783 | + intel_pt_log_event(event); |
---|
| 2784 | + } |
---|
2014 | 2785 | |
---|
2015 | 2786 | return err; |
---|
2016 | 2787 | } |
---|
.. | .. |
---|
2063 | 2834 | session->auxtrace = NULL; |
---|
2064 | 2835 | thread__put(pt->unknown_thread); |
---|
2065 | 2836 | addr_filters__exit(&pt->filts); |
---|
| 2837 | + zfree(&pt->chain); |
---|
2066 | 2838 | zfree(&pt->filter); |
---|
| 2839 | + zfree(&pt->time_ranges); |
---|
2067 | 2840 | free(pt); |
---|
| 2841 | +} |
---|
| 2842 | + |
---|
| 2843 | +static bool intel_pt_evsel_is_auxtrace(struct perf_session *session, |
---|
| 2844 | + struct evsel *evsel) |
---|
| 2845 | +{ |
---|
| 2846 | + struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, |
---|
| 2847 | + auxtrace); |
---|
| 2848 | + |
---|
| 2849 | + return evsel->core.attr.type == pt->pmu_type; |
---|
2068 | 2850 | } |
---|
2069 | 2851 | |
---|
2070 | 2852 | static int intel_pt_process_auxtrace_event(struct perf_session *session, |
---|
.. | .. |
---|
2106 | 2888 | return 0; |
---|
2107 | 2889 | } |
---|
2108 | 2890 | |
---|
| 2891 | +static int intel_pt_queue_data(struct perf_session *session, |
---|
| 2892 | + struct perf_sample *sample, |
---|
| 2893 | + union perf_event *event, u64 data_offset) |
---|
| 2894 | +{ |
---|
| 2895 | + struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, |
---|
| 2896 | + auxtrace); |
---|
| 2897 | + u64 timestamp; |
---|
| 2898 | + |
---|
| 2899 | + if (event) { |
---|
| 2900 | + return auxtrace_queues__add_event(&pt->queues, session, event, |
---|
| 2901 | + data_offset, NULL); |
---|
| 2902 | + } |
---|
| 2903 | + |
---|
| 2904 | + if (sample->time && sample->time != (u64)-1) |
---|
| 2905 | + timestamp = perf_time_to_tsc(sample->time, &pt->tc); |
---|
| 2906 | + else |
---|
| 2907 | + timestamp = 0; |
---|
| 2908 | + |
---|
| 2909 | + return auxtrace_queues__add_sample(&pt->queues, session, sample, |
---|
| 2910 | + data_offset, timestamp); |
---|
| 2911 | +} |
---|
| 2912 | + |
---|
2109 | 2913 | struct intel_pt_synth { |
---|
2110 | 2914 | struct perf_tool dummy_tool; |
---|
2111 | 2915 | struct perf_session *session; |
---|
.. | .. |
---|
2144 | 2948 | return err; |
---|
2145 | 2949 | } |
---|
2146 | 2950 | |
---|
2147 | | -static void intel_pt_set_event_name(struct perf_evlist *evlist, u64 id, |
---|
| 2951 | +static void intel_pt_set_event_name(struct evlist *evlist, u64 id, |
---|
2148 | 2952 | const char *name) |
---|
2149 | 2953 | { |
---|
2150 | | - struct perf_evsel *evsel; |
---|
| 2954 | + struct evsel *evsel; |
---|
2151 | 2955 | |
---|
2152 | 2956 | evlist__for_each_entry(evlist, evsel) { |
---|
2153 | | - if (evsel->id && evsel->id[0] == id) { |
---|
| 2957 | + if (evsel->core.id && evsel->core.id[0] == id) { |
---|
2154 | 2958 | if (evsel->name) |
---|
2155 | 2959 | zfree(&evsel->name); |
---|
2156 | 2960 | evsel->name = strdup(name); |
---|
.. | .. |
---|
2159 | 2963 | } |
---|
2160 | 2964 | } |
---|
2161 | 2965 | |
---|
2162 | | -static struct perf_evsel *intel_pt_evsel(struct intel_pt *pt, |
---|
2163 | | - struct perf_evlist *evlist) |
---|
| 2966 | +static struct evsel *intel_pt_evsel(struct intel_pt *pt, |
---|
| 2967 | + struct evlist *evlist) |
---|
2164 | 2968 | { |
---|
2165 | | - struct perf_evsel *evsel; |
---|
| 2969 | + struct evsel *evsel; |
---|
2166 | 2970 | |
---|
2167 | 2971 | evlist__for_each_entry(evlist, evsel) { |
---|
2168 | | - if (evsel->attr.type == pt->pmu_type && evsel->ids) |
---|
| 2972 | + if (evsel->core.attr.type == pt->pmu_type && evsel->core.ids) |
---|
2169 | 2973 | return evsel; |
---|
2170 | 2974 | } |
---|
2171 | 2975 | |
---|
.. | .. |
---|
2175 | 2979 | static int intel_pt_synth_events(struct intel_pt *pt, |
---|
2176 | 2980 | struct perf_session *session) |
---|
2177 | 2981 | { |
---|
2178 | | - struct perf_evlist *evlist = session->evlist; |
---|
2179 | | - struct perf_evsel *evsel = intel_pt_evsel(pt, evlist); |
---|
| 2982 | + struct evlist *evlist = session->evlist; |
---|
| 2983 | + struct evsel *evsel = intel_pt_evsel(pt, evlist); |
---|
2180 | 2984 | struct perf_event_attr attr; |
---|
2181 | 2985 | u64 id; |
---|
2182 | 2986 | int err; |
---|
.. | .. |
---|
2189 | 2993 | memset(&attr, 0, sizeof(struct perf_event_attr)); |
---|
2190 | 2994 | attr.size = sizeof(struct perf_event_attr); |
---|
2191 | 2995 | attr.type = PERF_TYPE_HARDWARE; |
---|
2192 | | - attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK; |
---|
| 2996 | + attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK; |
---|
2193 | 2997 | attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | |
---|
2194 | 2998 | PERF_SAMPLE_PERIOD; |
---|
2195 | 2999 | if (pt->timeless_decoding) |
---|
.. | .. |
---|
2198 | 3002 | attr.sample_type |= PERF_SAMPLE_TIME; |
---|
2199 | 3003 | if (!pt->per_cpu_mmaps) |
---|
2200 | 3004 | attr.sample_type &= ~(u64)PERF_SAMPLE_CPU; |
---|
2201 | | - attr.exclude_user = evsel->attr.exclude_user; |
---|
2202 | | - attr.exclude_kernel = evsel->attr.exclude_kernel; |
---|
2203 | | - attr.exclude_hv = evsel->attr.exclude_hv; |
---|
2204 | | - attr.exclude_host = evsel->attr.exclude_host; |
---|
2205 | | - attr.exclude_guest = evsel->attr.exclude_guest; |
---|
2206 | | - attr.sample_id_all = evsel->attr.sample_id_all; |
---|
2207 | | - attr.read_format = evsel->attr.read_format; |
---|
| 3005 | + attr.exclude_user = evsel->core.attr.exclude_user; |
---|
| 3006 | + attr.exclude_kernel = evsel->core.attr.exclude_kernel; |
---|
| 3007 | + attr.exclude_hv = evsel->core.attr.exclude_hv; |
---|
| 3008 | + attr.exclude_host = evsel->core.attr.exclude_host; |
---|
| 3009 | + attr.exclude_guest = evsel->core.attr.exclude_guest; |
---|
| 3010 | + attr.sample_id_all = evsel->core.attr.sample_id_all; |
---|
| 3011 | + attr.read_format = evsel->core.attr.read_format; |
---|
2208 | 3012 | |
---|
2209 | | - id = evsel->id[0] + 1000000000; |
---|
| 3013 | + id = evsel->core.id[0] + 1000000000; |
---|
2210 | 3014 | if (!id) |
---|
2211 | 3015 | id = 1; |
---|
2212 | 3016 | |
---|
.. | .. |
---|
2226 | 3030 | |
---|
2227 | 3031 | if (pt->synth_opts.callchain) |
---|
2228 | 3032 | attr.sample_type |= PERF_SAMPLE_CALLCHAIN; |
---|
2229 | | - if (pt->synth_opts.last_branch) |
---|
| 3033 | + if (pt->synth_opts.last_branch) { |
---|
2230 | 3034 | attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; |
---|
| 3035 | + /* |
---|
| 3036 | + * We don't use the hardware index, but the sample generation |
---|
| 3037 | + * code uses the new format branch_stack with this field, |
---|
| 3038 | + * so the event attributes must indicate that it's present. |
---|
| 3039 | + */ |
---|
| 3040 | + attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX; |
---|
| 3041 | + } |
---|
2231 | 3042 | |
---|
2232 | 3043 | if (pt->synth_opts.instructions) { |
---|
2233 | 3044 | attr.config = PERF_COUNT_HW_INSTRUCTIONS; |
---|
.. | .. |
---|
2288 | 3099 | id += 1; |
---|
2289 | 3100 | } |
---|
2290 | 3101 | |
---|
2291 | | - if (pt->synth_opts.pwr_events && (evsel->attr.config & 0x10)) { |
---|
| 3102 | + if (pt->synth_opts.pwr_events && (evsel->core.attr.config & 0x10)) { |
---|
2292 | 3103 | attr.config = PERF_SYNTH_INTEL_MWAIT; |
---|
2293 | 3104 | err = intel_pt_synth_event(session, "mwait", &attr, id); |
---|
2294 | 3105 | if (err) |
---|
.. | .. |
---|
2325 | 3136 | return 0; |
---|
2326 | 3137 | } |
---|
2327 | 3138 | |
---|
2328 | | -static struct perf_evsel *intel_pt_find_sched_switch(struct perf_evlist *evlist) |
---|
| 3139 | +static void intel_pt_setup_pebs_events(struct intel_pt *pt) |
---|
2329 | 3140 | { |
---|
2330 | | - struct perf_evsel *evsel; |
---|
| 3141 | + struct evsel *evsel; |
---|
| 3142 | + |
---|
| 3143 | + if (!pt->synth_opts.other_events) |
---|
| 3144 | + return; |
---|
| 3145 | + |
---|
| 3146 | + evlist__for_each_entry(pt->session->evlist, evsel) { |
---|
| 3147 | + if (evsel->core.attr.aux_output && evsel->core.id) { |
---|
| 3148 | + pt->sample_pebs = true; |
---|
| 3149 | + pt->pebs_evsel = evsel; |
---|
| 3150 | + return; |
---|
| 3151 | + } |
---|
| 3152 | + } |
---|
| 3153 | +} |
---|
| 3154 | + |
---|
| 3155 | +static struct evsel *intel_pt_find_sched_switch(struct evlist *evlist) |
---|
| 3156 | +{ |
---|
| 3157 | + struct evsel *evsel; |
---|
2331 | 3158 | |
---|
2332 | 3159 | evlist__for_each_entry_reverse(evlist, evsel) { |
---|
2333 | | - const char *name = perf_evsel__name(evsel); |
---|
| 3160 | + const char *name = evsel__name(evsel); |
---|
2334 | 3161 | |
---|
2335 | 3162 | if (!strcmp(name, "sched:sched_switch")) |
---|
2336 | 3163 | return evsel; |
---|
.. | .. |
---|
2339 | 3166 | return NULL; |
---|
2340 | 3167 | } |
---|
2341 | 3168 | |
---|
2342 | | -static bool intel_pt_find_switch(struct perf_evlist *evlist) |
---|
| 3169 | +static bool intel_pt_find_switch(struct evlist *evlist) |
---|
2343 | 3170 | { |
---|
2344 | | - struct perf_evsel *evsel; |
---|
| 3171 | + struct evsel *evsel; |
---|
2345 | 3172 | |
---|
2346 | 3173 | evlist__for_each_entry(evlist, evsel) { |
---|
2347 | | - if (evsel->attr.context_switch) |
---|
| 3174 | + if (evsel->core.attr.context_switch) |
---|
2348 | 3175 | return true; |
---|
2349 | 3176 | } |
---|
2350 | 3177 | |
---|
.. | .. |
---|
2361 | 3188 | return 0; |
---|
2362 | 3189 | } |
---|
2363 | 3190 | |
---|
| 3191 | +/* Find least TSC which converts to ns or later */ |
---|
| 3192 | +static u64 intel_pt_tsc_start(u64 ns, struct intel_pt *pt) |
---|
| 3193 | +{ |
---|
| 3194 | + u64 tsc, tm; |
---|
| 3195 | + |
---|
| 3196 | + tsc = perf_time_to_tsc(ns, &pt->tc); |
---|
| 3197 | + |
---|
| 3198 | + while (1) { |
---|
| 3199 | + tm = tsc_to_perf_time(tsc, &pt->tc); |
---|
| 3200 | + if (tm < ns) |
---|
| 3201 | + break; |
---|
| 3202 | + tsc -= 1; |
---|
| 3203 | + } |
---|
| 3204 | + |
---|
| 3205 | + while (tm < ns) |
---|
| 3206 | + tm = tsc_to_perf_time(++tsc, &pt->tc); |
---|
| 3207 | + |
---|
| 3208 | + return tsc; |
---|
| 3209 | +} |
---|
| 3210 | + |
---|
| 3211 | +/* Find greatest TSC which converts to ns or earlier */ |
---|
| 3212 | +static u64 intel_pt_tsc_end(u64 ns, struct intel_pt *pt) |
---|
| 3213 | +{ |
---|
| 3214 | + u64 tsc, tm; |
---|
| 3215 | + |
---|
| 3216 | + tsc = perf_time_to_tsc(ns, &pt->tc); |
---|
| 3217 | + |
---|
| 3218 | + while (1) { |
---|
| 3219 | + tm = tsc_to_perf_time(tsc, &pt->tc); |
---|
| 3220 | + if (tm > ns) |
---|
| 3221 | + break; |
---|
| 3222 | + tsc += 1; |
---|
| 3223 | + } |
---|
| 3224 | + |
---|
| 3225 | + while (tm > ns) |
---|
| 3226 | + tm = tsc_to_perf_time(--tsc, &pt->tc); |
---|
| 3227 | + |
---|
| 3228 | + return tsc; |
---|
| 3229 | +} |
---|
| 3230 | + |
---|
| 3231 | +static int intel_pt_setup_time_ranges(struct intel_pt *pt, |
---|
| 3232 | + struct itrace_synth_opts *opts) |
---|
| 3233 | +{ |
---|
| 3234 | + struct perf_time_interval *p = opts->ptime_range; |
---|
| 3235 | + int n = opts->range_num; |
---|
| 3236 | + int i; |
---|
| 3237 | + |
---|
| 3238 | + if (!n || !p || pt->timeless_decoding) |
---|
| 3239 | + return 0; |
---|
| 3240 | + |
---|
| 3241 | + pt->time_ranges = calloc(n, sizeof(struct range)); |
---|
| 3242 | + if (!pt->time_ranges) |
---|
| 3243 | + return -ENOMEM; |
---|
| 3244 | + |
---|
| 3245 | + pt->range_cnt = n; |
---|
| 3246 | + |
---|
| 3247 | + intel_pt_log("%s: %u range(s)\n", __func__, n); |
---|
| 3248 | + |
---|
| 3249 | + for (i = 0; i < n; i++) { |
---|
| 3250 | + struct range *r = &pt->time_ranges[i]; |
---|
| 3251 | + u64 ts = p[i].start; |
---|
| 3252 | + u64 te = p[i].end; |
---|
| 3253 | + |
---|
| 3254 | + /* |
---|
| 3255 | + * Take care to ensure the TSC range matches the perf-time range |
---|
| 3256 | + * when converted back to perf-time. |
---|
| 3257 | + */ |
---|
| 3258 | + r->start = ts ? intel_pt_tsc_start(ts, pt) : 0; |
---|
| 3259 | + r->end = te ? intel_pt_tsc_end(te, pt) : 0; |
---|
| 3260 | + |
---|
| 3261 | + intel_pt_log("range %d: perf time interval: %"PRIu64" to %"PRIu64"\n", |
---|
| 3262 | + i, ts, te); |
---|
| 3263 | + intel_pt_log("range %d: TSC time interval: %#"PRIx64" to %#"PRIx64"\n", |
---|
| 3264 | + i, r->start, r->end); |
---|
| 3265 | + } |
---|
| 3266 | + |
---|
| 3267 | + return 0; |
---|
| 3268 | +} |
---|
| 3269 | + |
---|
2364 | 3270 | static const char * const intel_pt_info_fmts[] = { |
---|
2365 | 3271 | [INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n", |
---|
2366 | 3272 | [INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n", |
---|
.. | .. |
---|
2373 | 3279 | [INTEL_PT_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n", |
---|
2374 | 3280 | [INTEL_PT_PER_CPU_MMAPS] = " Per-cpu maps %"PRId64"\n", |
---|
2375 | 3281 | [INTEL_PT_MTC_BIT] = " MTC bit %#"PRIx64"\n", |
---|
| 3282 | + [INTEL_PT_MTC_FREQ_BITS] = " MTC freq bits %#"PRIx64"\n", |
---|
2376 | 3283 | [INTEL_PT_TSC_CTC_N] = " TSC:CTC numerator %"PRIu64"\n", |
---|
2377 | 3284 | [INTEL_PT_TSC_CTC_D] = " TSC:CTC denominator %"PRIu64"\n", |
---|
2378 | 3285 | [INTEL_PT_CYC_BIT] = " CYC bit %#"PRIx64"\n", |
---|
.. | .. |
---|
2380 | 3287 | [INTEL_PT_FILTER_STR_LEN] = " Filter string len. %"PRIu64"\n", |
---|
2381 | 3288 | }; |
---|
2382 | 3289 | |
---|
2383 | | -static void intel_pt_print_info(u64 *arr, int start, int finish) |
---|
| 3290 | +static void intel_pt_print_info(__u64 *arr, int start, int finish) |
---|
2384 | 3291 | { |
---|
2385 | 3292 | int i; |
---|
2386 | 3293 | |
---|
2387 | 3294 | if (!dump_trace) |
---|
2388 | 3295 | return; |
---|
2389 | 3296 | |
---|
2390 | | - for (i = start; i <= finish; i++) |
---|
2391 | | - fprintf(stdout, intel_pt_info_fmts[i], arr[i]); |
---|
| 3297 | + for (i = start; i <= finish; i++) { |
---|
| 3298 | + const char *fmt = intel_pt_info_fmts[i]; |
---|
| 3299 | + |
---|
| 3300 | + if (fmt) |
---|
| 3301 | + fprintf(stdout, fmt, arr[i]); |
---|
| 3302 | + } |
---|
2392 | 3303 | } |
---|
2393 | 3304 | |
---|
2394 | 3305 | static void intel_pt_print_info_str(const char *name, const char *str) |
---|
.. | .. |
---|
2399 | 3310 | fprintf(stdout, " %-20s%s\n", name, str ? str : ""); |
---|
2400 | 3311 | } |
---|
2401 | 3312 | |
---|
2402 | | -static bool intel_pt_has(struct auxtrace_info_event *auxtrace_info, int pos) |
---|
| 3313 | +static bool intel_pt_has(struct perf_record_auxtrace_info *auxtrace_info, int pos) |
---|
2403 | 3314 | { |
---|
2404 | 3315 | return auxtrace_info->header.size >= |
---|
2405 | | - sizeof(struct auxtrace_info_event) + (sizeof(u64) * (pos + 1)); |
---|
| 3316 | + sizeof(struct perf_record_auxtrace_info) + (sizeof(u64) * (pos + 1)); |
---|
2406 | 3317 | } |
---|
2407 | 3318 | |
---|
2408 | 3319 | int intel_pt_process_auxtrace_info(union perf_event *event, |
---|
2409 | 3320 | struct perf_session *session) |
---|
2410 | 3321 | { |
---|
2411 | | - struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info; |
---|
| 3322 | + struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info; |
---|
2412 | 3323 | size_t min_sz = sizeof(u64) * INTEL_PT_PER_CPU_MMAPS; |
---|
2413 | 3324 | struct intel_pt *pt; |
---|
2414 | 3325 | void *info_end; |
---|
2415 | | - u64 *info; |
---|
| 3326 | + __u64 *info; |
---|
2416 | 3327 | int err; |
---|
2417 | 3328 | |
---|
2418 | | - if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) + |
---|
| 3329 | + if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) + |
---|
2419 | 3330 | min_sz) |
---|
2420 | 3331 | return -EINVAL; |
---|
2421 | 3332 | |
---|
.. | .. |
---|
2513 | 3424 | if (pt->timeless_decoding && !pt->tc.time_mult) |
---|
2514 | 3425 | pt->tc.time_mult = 1; |
---|
2515 | 3426 | pt->have_tsc = intel_pt_have_tsc(pt); |
---|
2516 | | - pt->sampling_mode = false; |
---|
| 3427 | + pt->sampling_mode = intel_pt_sampling_mode(pt); |
---|
2517 | 3428 | pt->est_tsc = !pt->timeless_decoding; |
---|
2518 | 3429 | |
---|
2519 | 3430 | pt->unknown_thread = thread__new(999999999, 999999999); |
---|
.. | .. |
---|
2533 | 3444 | err = thread__set_comm(pt->unknown_thread, "unknown", 0); |
---|
2534 | 3445 | if (err) |
---|
2535 | 3446 | goto err_delete_thread; |
---|
2536 | | - if (thread__init_map_groups(pt->unknown_thread, pt->machine)) { |
---|
| 3447 | + if (thread__init_maps(pt->unknown_thread, pt->machine)) { |
---|
2537 | 3448 | err = -ENOMEM; |
---|
2538 | 3449 | goto err_delete_thread; |
---|
2539 | 3450 | } |
---|
2540 | 3451 | |
---|
2541 | 3452 | pt->auxtrace.process_event = intel_pt_process_event; |
---|
2542 | 3453 | pt->auxtrace.process_auxtrace_event = intel_pt_process_auxtrace_event; |
---|
| 3454 | + pt->auxtrace.queue_data = intel_pt_queue_data; |
---|
| 3455 | + pt->auxtrace.dump_auxtrace_sample = intel_pt_dump_sample; |
---|
2543 | 3456 | pt->auxtrace.flush_events = intel_pt_flush; |
---|
2544 | 3457 | pt->auxtrace.free_events = intel_pt_free_events; |
---|
2545 | 3458 | pt->auxtrace.free = intel_pt_free; |
---|
| 3459 | + pt->auxtrace.evsel_is_auxtrace = intel_pt_evsel_is_auxtrace; |
---|
2546 | 3460 | session->auxtrace = &pt->auxtrace; |
---|
2547 | 3461 | |
---|
2548 | 3462 | if (dump_trace) |
---|
.. | .. |
---|
2562 | 3476 | goto err_delete_thread; |
---|
2563 | 3477 | } |
---|
2564 | 3478 | |
---|
2565 | | - if (session->itrace_synth_opts && session->itrace_synth_opts->set) { |
---|
| 3479 | + if (session->itrace_synth_opts->set) { |
---|
2566 | 3480 | pt->synth_opts = *session->itrace_synth_opts; |
---|
2567 | 3481 | } else { |
---|
2568 | | - itrace_synth_opts__set_default(&pt->synth_opts); |
---|
2569 | | - if (use_browser != -1) { |
---|
| 3482 | + itrace_synth_opts__set_default(&pt->synth_opts, |
---|
| 3483 | + session->itrace_synth_opts->default_no_sample); |
---|
| 3484 | + if (!session->itrace_synth_opts->default_no_sample && |
---|
| 3485 | + !session->itrace_synth_opts->inject) { |
---|
2570 | 3486 | pt->synth_opts.branches = false; |
---|
2571 | 3487 | pt->synth_opts.callchain = true; |
---|
| 3488 | + pt->synth_opts.add_callchain = true; |
---|
2572 | 3489 | } |
---|
2573 | | - if (session->itrace_synth_opts) |
---|
2574 | | - pt->synth_opts.thread_stack = |
---|
| 3490 | + pt->synth_opts.thread_stack = |
---|
2575 | 3491 | session->itrace_synth_opts->thread_stack; |
---|
2576 | 3492 | } |
---|
2577 | 3493 | |
---|
.. | .. |
---|
2591 | 3507 | pt->cbr2khz = tsc_freq / pt->max_non_turbo_ratio / 1000; |
---|
2592 | 3508 | } |
---|
2593 | 3509 | |
---|
| 3510 | + err = intel_pt_setup_time_ranges(pt, session->itrace_synth_opts); |
---|
| 3511 | + if (err) |
---|
| 3512 | + goto err_delete_thread; |
---|
| 3513 | + |
---|
2594 | 3514 | if (pt->synth_opts.calls) |
---|
2595 | 3515 | pt->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC | |
---|
2596 | 3516 | PERF_IP_FLAG_TRACE_END; |
---|
.. | .. |
---|
2598 | 3518 | pt->branches_filter |= PERF_IP_FLAG_RETURN | |
---|
2599 | 3519 | PERF_IP_FLAG_TRACE_BEGIN; |
---|
2600 | 3520 | |
---|
2601 | | - if (pt->synth_opts.callchain && !symbol_conf.use_callchain) { |
---|
| 3521 | + if ((pt->synth_opts.callchain || pt->synth_opts.add_callchain) && |
---|
| 3522 | + !symbol_conf.use_callchain) { |
---|
2602 | 3523 | symbol_conf.use_callchain = true; |
---|
2603 | 3524 | if (callchain_register_param(&callchain_param) < 0) { |
---|
2604 | 3525 | symbol_conf.use_callchain = false; |
---|
2605 | 3526 | pt->synth_opts.callchain = false; |
---|
| 3527 | + pt->synth_opts.add_callchain = false; |
---|
2606 | 3528 | } |
---|
2607 | 3529 | } |
---|
| 3530 | + |
---|
| 3531 | + if (pt->synth_opts.add_callchain) { |
---|
| 3532 | + err = intel_pt_callchain_init(pt); |
---|
| 3533 | + if (err) |
---|
| 3534 | + goto err_delete_thread; |
---|
| 3535 | + } |
---|
| 3536 | + |
---|
| 3537 | + if (pt->synth_opts.last_branch || pt->synth_opts.add_last_branch) { |
---|
| 3538 | + pt->br_stack_sz = pt->synth_opts.last_branch_sz; |
---|
| 3539 | + pt->br_stack_sz_plus = pt->br_stack_sz; |
---|
| 3540 | + } |
---|
| 3541 | + |
---|
| 3542 | + if (pt->synth_opts.add_last_branch) { |
---|
| 3543 | + err = intel_pt_br_stack_init(pt); |
---|
| 3544 | + if (err) |
---|
| 3545 | + goto err_delete_thread; |
---|
| 3546 | + /* |
---|
| 3547 | + * Additional branch stack size to cater for tracing from the |
---|
| 3548 | + * actual sample ip to where the sample time is recorded. |
---|
| 3549 | + * Measured at about 200 branches, but generously set to 1024. |
---|
| 3550 | + * If kernel space is not being traced, then add just 1 for the |
---|
| 3551 | + * branch to kernel space. |
---|
| 3552 | + */ |
---|
| 3553 | + if (intel_pt_tracing_kernel(pt)) |
---|
| 3554 | + pt->br_stack_sz_plus += 1024; |
---|
| 3555 | + else |
---|
| 3556 | + pt->br_stack_sz_plus += 1; |
---|
| 3557 | + } |
---|
| 3558 | + |
---|
| 3559 | + pt->use_thread_stack = pt->synth_opts.callchain || |
---|
| 3560 | + pt->synth_opts.add_callchain || |
---|
| 3561 | + pt->synth_opts.thread_stack || |
---|
| 3562 | + pt->synth_opts.last_branch || |
---|
| 3563 | + pt->synth_opts.add_last_branch; |
---|
| 3564 | + |
---|
| 3565 | + pt->callstack = pt->synth_opts.callchain || |
---|
| 3566 | + pt->synth_opts.add_callchain || |
---|
| 3567 | + pt->synth_opts.thread_stack; |
---|
2608 | 3568 | |
---|
2609 | 3569 | err = intel_pt_synth_events(pt, session); |
---|
2610 | 3570 | if (err) |
---|
2611 | 3571 | goto err_delete_thread; |
---|
2612 | 3572 | |
---|
2613 | | - err = auxtrace_queues__process_index(&pt->queues, session); |
---|
| 3573 | + intel_pt_setup_pebs_events(pt); |
---|
| 3574 | + |
---|
| 3575 | + if (pt->sampling_mode || list_empty(&session->auxtrace_index)) |
---|
| 3576 | + err = auxtrace_queue_data(session, true, true); |
---|
| 3577 | + else |
---|
| 3578 | + err = auxtrace_queues__process_index(&pt->queues, session); |
---|
2614 | 3579 | if (err) |
---|
2615 | 3580 | goto err_delete_thread; |
---|
2616 | 3581 | |
---|
.. | .. |
---|
2623 | 3588 | return 0; |
---|
2624 | 3589 | |
---|
2625 | 3590 | err_delete_thread: |
---|
| 3591 | + zfree(&pt->chain); |
---|
2626 | 3592 | thread__zput(pt->unknown_thread); |
---|
2627 | 3593 | err_free_queues: |
---|
2628 | 3594 | intel_pt_log_disable(); |
---|
.. | .. |
---|
2631 | 3597 | err_free: |
---|
2632 | 3598 | addr_filters__exit(&pt->filts); |
---|
2633 | 3599 | zfree(&pt->filter); |
---|
| 3600 | + zfree(&pt->time_ranges); |
---|
2634 | 3601 | free(pt); |
---|
2635 | 3602 | return err; |
---|
2636 | 3603 | } |
---|