hc
2024-05-10 37f49e37ab4cb5d0bc4c60eb5c6d4dd57db767bb
kernel/tools/perf/util/intel-pt.c
....@@ -1,16 +1,7 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * intel_pt.c: Intel Processor Trace support
34 * Copyright (c) 2013-2015, Intel Corporation.
4
- *
5
- * This program is free software; you can redistribute it and/or modify it
6
- * under the terms and conditions of the GNU General Public License,
7
- * version 2, as published by the Free Software Foundation.
8
- *
9
- * This program is distributed in the hope it will be useful, but WITHOUT
10
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12
- * more details.
13
- *
145 */
156
167 #include <inttypes.h>
....@@ -18,9 +9,10 @@
189 #include <stdbool.h>
1910 #include <errno.h>
2011 #include <linux/kernel.h>
12
+#include <linux/string.h>
2113 #include <linux/types.h>
14
+#include <linux/zalloc.h>
2215
23
-#include "../perf.h"
2416 #include "session.h"
2517 #include "machine.h"
2618 #include "memswap.h"
....@@ -31,7 +23,6 @@
3123 #include "evsel.h"
3224 #include "map.h"
3325 #include "color.h"
34
-#include "util.h"
3526 #include "thread.h"
3627 #include "thread-stack.h"
3728 #include "symbol.h"
....@@ -42,6 +33,11 @@
4233 #include "tsc.h"
4334 #include "intel-pt.h"
4435 #include "config.h"
36
+#include "util/perf_api_probe.h"
37
+#include "util/synthetic-events.h"
38
+#include "time-utils.h"
39
+
40
+#include "../arch/x86/include/uapi/asm/perf_regs.h"
4541
4642 #include "intel-pt-decoder/intel-pt-log.h"
4743 #include "intel-pt-decoder/intel-pt-decoder.h"
....@@ -50,6 +46,11 @@
5046
5147 #define MAX_TIMESTAMP (~0ULL)
5248
49
+struct range {
50
+ u64 start;
51
+ u64 end;
52
+};
53
+
5354 struct intel_pt {
5455 struct auxtrace auxtrace;
5556 struct auxtrace_queues queues;
....@@ -57,7 +58,7 @@
5758 u32 auxtrace_type;
5859 struct perf_session *session;
5960 struct machine *machine;
60
- struct perf_evsel *switch_evsel;
61
+ struct evsel *switch_evsel;
6162 struct thread *unknown_thread;
6263 bool timeless_decoding;
6364 bool sampling_mode;
....@@ -68,6 +69,10 @@
6869 bool est_tsc;
6970 bool sync_switch;
7071 bool mispred_all;
72
+ bool use_thread_stack;
73
+ bool callstack;
74
+ unsigned int br_stack_sz;
75
+ unsigned int br_stack_sz_plus;
7176 int have_sched_switch;
7277 u32 pmu_type;
7378 u64 kernel_start;
....@@ -104,6 +109,9 @@
104109 u64 pwrx_id;
105110 u64 cbr_id;
106111
112
+ bool sample_pebs;
113
+ struct evsel *pebs_evsel;
114
+
107115 u64 tsc_bit;
108116 u64 mtc_bit;
109117 u64 mtc_freq_bits;
....@@ -118,6 +126,12 @@
118126
119127 char *filter;
120128 struct addr_filters filts;
129
+
130
+ struct range *time_ranges;
131
+ unsigned int range_cnt;
132
+
133
+ struct ip_callchain *chain;
134
+ struct branch_stack *br_stack;
121135 };
122136
123137 enum switch_state {
....@@ -137,8 +151,6 @@
137151 const struct intel_pt_state *state;
138152 struct ip_callchain *chain;
139153 struct branch_stack *last_branch;
140
- struct branch_stack *last_branch_rb;
141
- size_t last_branch_pos;
142154 union perf_event *event_buf;
143155 bool on_heap;
144156 bool stop;
....@@ -154,9 +166,19 @@
154166 bool have_sample;
155167 u64 time;
156168 u64 timestamp;
169
+ u64 sel_timestamp;
170
+ bool sel_start;
171
+ unsigned int sel_idx;
157172 u32 flags;
158173 u16 insn_len;
159174 u64 last_insn_cnt;
175
+ u64 ipc_insn_cnt;
176
+ u64 ipc_cyc_cnt;
177
+ u64 last_in_insn_cnt;
178
+ u64 last_in_cyc_cnt;
179
+ u64 last_br_insn_cnt;
180
+ u64 last_br_cyc_cnt;
181
+ unsigned int cbr_seen;
160182 char insn[INTEL_PT_INSN_BUF_SZ];
161183 };
162184
....@@ -168,13 +190,14 @@
168190 int ret, pkt_len, i;
169191 char desc[INTEL_PT_PKT_DESC_MAX];
170192 const char *color = PERF_COLOR_BLUE;
193
+ enum intel_pt_pkt_ctx ctx = INTEL_PT_NO_CTX;
171194
172195 color_fprintf(stdout, color,
173196 ". ... Intel Processor Trace data: size %zu bytes\n",
174197 len);
175198
176199 while (len) {
177
- ret = intel_pt_get_packet(buf, len, &packet);
200
+ ret = intel_pt_get_packet(buf, len, &packet, &ctx);
178201 if (ret > 0)
179202 pkt_len = ret;
180203 else
....@@ -206,6 +229,44 @@
206229 intel_pt_dump(pt, buf, len);
207230 }
208231
232
+static void intel_pt_log_event(union perf_event *event)
233
+{
234
+ FILE *f = intel_pt_log_fp();
235
+
236
+ if (!intel_pt_enable_logging || !f)
237
+ return;
238
+
239
+ perf_event__fprintf(event, NULL, f);
240
+}
241
+
242
+static void intel_pt_dump_sample(struct perf_session *session,
243
+ struct perf_sample *sample)
244
+{
245
+ struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
246
+ auxtrace);
247
+
248
+ printf("\n");
249
+ intel_pt_dump(pt, sample->aux_sample.data, sample->aux_sample.size);
250
+}
251
+
252
+static bool intel_pt_log_events(struct intel_pt *pt, u64 tm)
253
+{
254
+ struct perf_time_interval *range = pt->synth_opts.ptime_range;
255
+ int n = pt->synth_opts.range_num;
256
+
257
+ if (pt->synth_opts.log_plus_flags & AUXTRACE_LOG_FLG_ALL_PERF_EVTS)
258
+ return true;
259
+
260
+ if (pt->synth_opts.log_minus_flags & AUXTRACE_LOG_FLG_ALL_PERF_EVTS)
261
+ return false;
262
+
263
+ /* perf_time__ranges_skip_sample does not work if time is zero */
264
+ if (!tm)
265
+ tm = 1;
266
+
267
+ return !n || !perf_time__ranges_skip_sample(range, n, tm);
268
+}
269
+
209270 static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a,
210271 struct auxtrace_buffer *b)
211272 {
....@@ -223,31 +284,12 @@
223284 return 0;
224285 }
225286
226
-/* This function assumes data is processed sequentially only */
227
-static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
287
+static int intel_pt_get_buffer(struct intel_pt_queue *ptq,
288
+ struct auxtrace_buffer *buffer,
289
+ struct auxtrace_buffer *old_buffer,
290
+ struct intel_pt_buffer *b)
228291 {
229
- struct intel_pt_queue *ptq = data;
230
- struct auxtrace_buffer *buffer = ptq->buffer;
231
- struct auxtrace_buffer *old_buffer = ptq->old_buffer;
232
- struct auxtrace_queue *queue;
233292 bool might_overlap;
234
-
235
- if (ptq->stop) {
236
- b->len = 0;
237
- return 0;
238
- }
239
-
240
- queue = &ptq->pt->queues.queue_array[ptq->queue_nr];
241
-
242
- buffer = auxtrace_buffer__next(queue, buffer);
243
- if (!buffer) {
244
- if (old_buffer)
245
- auxtrace_buffer__drop_data(old_buffer);
246
- b->len = 0;
247
- return 0;
248
- }
249
-
250
- ptq->buffer = buffer;
251293
252294 if (!buffer->data) {
253295 int fd = perf_data__fd(ptq->pt->session->data);
....@@ -277,6 +319,95 @@
277319 } else {
278320 b->consecutive = true;
279321 }
322
+
323
+ return 0;
324
+}
325
+
326
+/* Do not drop buffers with references - refer intel_pt_get_trace() */
327
+static void intel_pt_lookahead_drop_buffer(struct intel_pt_queue *ptq,
328
+ struct auxtrace_buffer *buffer)
329
+{
330
+ if (!buffer || buffer == ptq->buffer || buffer == ptq->old_buffer)
331
+ return;
332
+
333
+ auxtrace_buffer__drop_data(buffer);
334
+}
335
+
336
+/* Must be serialized with respect to intel_pt_get_trace() */
337
+static int intel_pt_lookahead(void *data, intel_pt_lookahead_cb_t cb,
338
+ void *cb_data)
339
+{
340
+ struct intel_pt_queue *ptq = data;
341
+ struct auxtrace_buffer *buffer = ptq->buffer;
342
+ struct auxtrace_buffer *old_buffer = ptq->old_buffer;
343
+ struct auxtrace_queue *queue;
344
+ int err = 0;
345
+
346
+ queue = &ptq->pt->queues.queue_array[ptq->queue_nr];
347
+
348
+ while (1) {
349
+ struct intel_pt_buffer b = { .len = 0 };
350
+
351
+ buffer = auxtrace_buffer__next(queue, buffer);
352
+ if (!buffer)
353
+ break;
354
+
355
+ err = intel_pt_get_buffer(ptq, buffer, old_buffer, &b);
356
+ if (err)
357
+ break;
358
+
359
+ if (b.len) {
360
+ intel_pt_lookahead_drop_buffer(ptq, old_buffer);
361
+ old_buffer = buffer;
362
+ } else {
363
+ intel_pt_lookahead_drop_buffer(ptq, buffer);
364
+ continue;
365
+ }
366
+
367
+ err = cb(&b, cb_data);
368
+ if (err)
369
+ break;
370
+ }
371
+
372
+ if (buffer != old_buffer)
373
+ intel_pt_lookahead_drop_buffer(ptq, buffer);
374
+ intel_pt_lookahead_drop_buffer(ptq, old_buffer);
375
+
376
+ return err;
377
+}
378
+
379
+/*
380
+ * This function assumes data is processed sequentially only.
381
+ * Must be serialized with respect to intel_pt_lookahead()
382
+ */
383
+static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
384
+{
385
+ struct intel_pt_queue *ptq = data;
386
+ struct auxtrace_buffer *buffer = ptq->buffer;
387
+ struct auxtrace_buffer *old_buffer = ptq->old_buffer;
388
+ struct auxtrace_queue *queue;
389
+ int err;
390
+
391
+ if (ptq->stop) {
392
+ b->len = 0;
393
+ return 0;
394
+ }
395
+
396
+ queue = &ptq->pt->queues.queue_array[ptq->queue_nr];
397
+
398
+ buffer = auxtrace_buffer__next(queue, buffer);
399
+ if (!buffer) {
400
+ if (old_buffer)
401
+ auxtrace_buffer__drop_data(old_buffer);
402
+ b->len = 0;
403
+ return 0;
404
+ }
405
+
406
+ ptq->buffer = buffer;
407
+
408
+ err = intel_pt_get_buffer(ptq, buffer, old_buffer, b);
409
+ if (err)
410
+ return err;
280411
281412 if (ptq->step_through_buffers)
282413 ptq->stop = true;
....@@ -405,6 +536,17 @@
405536 return NULL;
406537
407538 return auxtrace_cache__lookup(dso->auxtrace_cache, offset);
539
+}
540
+
541
+static void intel_pt_cache_invalidate(struct dso *dso, struct machine *machine,
542
+ u64 offset)
543
+{
544
+ struct auxtrace_cache *c = intel_pt_cache(dso, machine);
545
+
546
+ if (!c)
547
+ return;
548
+
549
+ auxtrace_cache__remove(dso->auxtrace_cache, offset);
408550 }
409551
410552 static inline u8 intel_pt_cpumode(struct intel_pt *pt, uint64_t ip)
....@@ -629,11 +771,11 @@
629771
630772 static bool intel_pt_exclude_kernel(struct intel_pt *pt)
631773 {
632
- struct perf_evsel *evsel;
774
+ struct evsel *evsel;
633775
634776 evlist__for_each_entry(pt->session->evlist, evsel) {
635
- if (intel_pt_get_config(pt, &evsel->attr, NULL) &&
636
- !evsel->attr.exclude_kernel)
777
+ if (intel_pt_get_config(pt, &evsel->core.attr, NULL) &&
778
+ !evsel->core.attr.exclude_kernel)
637779 return false;
638780 }
639781 return true;
....@@ -641,14 +783,14 @@
641783
642784 static bool intel_pt_return_compression(struct intel_pt *pt)
643785 {
644
- struct perf_evsel *evsel;
786
+ struct evsel *evsel;
645787 u64 config;
646788
647789 if (!pt->noretcomp_bit)
648790 return true;
649791
650792 evlist__for_each_entry(pt->session->evlist, evsel) {
651
- if (intel_pt_get_config(pt, &evsel->attr, &config) &&
793
+ if (intel_pt_get_config(pt, &evsel->core.attr, &config) &&
652794 (config & pt->noretcomp_bit))
653795 return false;
654796 }
....@@ -657,11 +799,11 @@
657799
658800 static bool intel_pt_branch_enable(struct intel_pt *pt)
659801 {
660
- struct perf_evsel *evsel;
802
+ struct evsel *evsel;
661803 u64 config;
662804
663805 evlist__for_each_entry(pt->session->evlist, evsel) {
664
- if (intel_pt_get_config(pt, &evsel->attr, &config) &&
806
+ if (intel_pt_get_config(pt, &evsel->core.attr, &config) &&
665807 (config & 1) && !(config & 0x2000))
666808 return false;
667809 }
....@@ -670,7 +812,7 @@
670812
671813 static unsigned int intel_pt_mtc_period(struct intel_pt *pt)
672814 {
673
- struct perf_evsel *evsel;
815
+ struct evsel *evsel;
674816 unsigned int shift;
675817 u64 config;
676818
....@@ -681,7 +823,7 @@
681823 config >>= 1;
682824
683825 evlist__for_each_entry(pt->session->evlist, evsel) {
684
- if (intel_pt_get_config(pt, &evsel->attr, &config))
826
+ if (intel_pt_get_config(pt, &evsel->core.attr, &config))
685827 return (config & pt->mtc_freq_bits) >> shift;
686828 }
687829 return 0;
....@@ -689,7 +831,7 @@
689831
690832 static bool intel_pt_timeless_decoding(struct intel_pt *pt)
691833 {
692
- struct perf_evsel *evsel;
834
+ struct evsel *evsel;
693835 bool timeless_decoding = true;
694836 u64 config;
695837
....@@ -697,9 +839,9 @@
697839 return true;
698840
699841 evlist__for_each_entry(pt->session->evlist, evsel) {
700
- if (!(evsel->attr.sample_type & PERF_SAMPLE_TIME))
842
+ if (!(evsel->core.attr.sample_type & PERF_SAMPLE_TIME))
701843 return true;
702
- if (intel_pt_get_config(pt, &evsel->attr, &config)) {
844
+ if (intel_pt_get_config(pt, &evsel->core.attr, &config)) {
703845 if (config & pt->tsc_bit)
704846 timeless_decoding = false;
705847 else
....@@ -711,11 +853,11 @@
711853
712854 static bool intel_pt_tracing_kernel(struct intel_pt *pt)
713855 {
714
- struct perf_evsel *evsel;
856
+ struct evsel *evsel;
715857
716858 evlist__for_each_entry(pt->session->evlist, evsel) {
717
- if (intel_pt_get_config(pt, &evsel->attr, NULL) &&
718
- !evsel->attr.exclude_kernel)
859
+ if (intel_pt_get_config(pt, &evsel->core.attr, NULL) &&
860
+ !evsel->core.attr.exclude_kernel)
719861 return true;
720862 }
721863 return false;
....@@ -723,7 +865,7 @@
723865
724866 static bool intel_pt_have_tsc(struct intel_pt *pt)
725867 {
726
- struct perf_evsel *evsel;
868
+ struct evsel *evsel;
727869 bool have_tsc = false;
728870 u64 config;
729871
....@@ -731,7 +873,7 @@
731873 return false;
732874
733875 evlist__for_each_entry(pt->session->evlist, evsel) {
734
- if (intel_pt_get_config(pt, &evsel->attr, &config)) {
876
+ if (intel_pt_get_config(pt, &evsel->core.attr, &config)) {
735877 if (config & pt->tsc_bit)
736878 have_tsc = true;
737879 else
....@@ -739,6 +881,30 @@
739881 }
740882 }
741883 return have_tsc;
884
+}
885
+
886
+static bool intel_pt_sampling_mode(struct intel_pt *pt)
887
+{
888
+ struct evsel *evsel;
889
+
890
+ evlist__for_each_entry(pt->session->evlist, evsel) {
891
+ if ((evsel->core.attr.sample_type & PERF_SAMPLE_AUX) &&
892
+ evsel->core.attr.aux_sample_size)
893
+ return true;
894
+ }
895
+ return false;
896
+}
897
+
898
+static u64 intel_pt_ctl(struct intel_pt *pt)
899
+{
900
+ struct evsel *evsel;
901
+ u64 config;
902
+
903
+ evlist__for_each_entry(pt->session->evlist, evsel) {
904
+ if (intel_pt_get_config(pt, &evsel->core.attr, &config))
905
+ return config;
906
+ }
907
+ return 0;
742908 }
743909
744910 static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns)
....@@ -750,6 +916,86 @@
750916 return (quot << pt->tc.time_shift) + (rem << pt->tc.time_shift) /
751917 pt->tc.time_mult;
752918 }
919
+
920
+static struct ip_callchain *intel_pt_alloc_chain(struct intel_pt *pt)
921
+{
922
+ size_t sz = sizeof(struct ip_callchain);
923
+
924
+ /* Add 1 to callchain_sz for callchain context */
925
+ sz += (pt->synth_opts.callchain_sz + 1) * sizeof(u64);
926
+ return zalloc(sz);
927
+}
928
+
929
+static int intel_pt_callchain_init(struct intel_pt *pt)
930
+{
931
+ struct evsel *evsel;
932
+
933
+ evlist__for_each_entry(pt->session->evlist, evsel) {
934
+ if (!(evsel->core.attr.sample_type & PERF_SAMPLE_CALLCHAIN))
935
+ evsel->synth_sample_type |= PERF_SAMPLE_CALLCHAIN;
936
+ }
937
+
938
+ pt->chain = intel_pt_alloc_chain(pt);
939
+ if (!pt->chain)
940
+ return -ENOMEM;
941
+
942
+ return 0;
943
+}
944
+
945
+static void intel_pt_add_callchain(struct intel_pt *pt,
946
+ struct perf_sample *sample)
947
+{
948
+ struct thread *thread = machine__findnew_thread(pt->machine,
949
+ sample->pid,
950
+ sample->tid);
951
+
952
+ thread_stack__sample_late(thread, sample->cpu, pt->chain,
953
+ pt->synth_opts.callchain_sz + 1, sample->ip,
954
+ pt->kernel_start);
955
+
956
+ sample->callchain = pt->chain;
957
+}
958
+
959
+static struct branch_stack *intel_pt_alloc_br_stack(unsigned int entry_cnt)
960
+{
961
+ size_t sz = sizeof(struct branch_stack);
962
+
963
+ sz += entry_cnt * sizeof(struct branch_entry);
964
+ return zalloc(sz);
965
+}
966
+
967
+static int intel_pt_br_stack_init(struct intel_pt *pt)
968
+{
969
+ struct evsel *evsel;
970
+
971
+ evlist__for_each_entry(pt->session->evlist, evsel) {
972
+ if (!(evsel->core.attr.sample_type & PERF_SAMPLE_BRANCH_STACK))
973
+ evsel->synth_sample_type |= PERF_SAMPLE_BRANCH_STACK;
974
+ }
975
+
976
+ pt->br_stack = intel_pt_alloc_br_stack(pt->br_stack_sz);
977
+ if (!pt->br_stack)
978
+ return -ENOMEM;
979
+
980
+ return 0;
981
+}
982
+
983
+static void intel_pt_add_br_stack(struct intel_pt *pt,
984
+ struct perf_sample *sample)
985
+{
986
+ struct thread *thread = machine__findnew_thread(pt->machine,
987
+ sample->pid,
988
+ sample->tid);
989
+
990
+ thread_stack__br_sample_late(thread, sample->cpu, pt->br_stack,
991
+ pt->br_stack_sz, sample->ip,
992
+ pt->kernel_start);
993
+
994
+ sample->branch_stack = pt->br_stack;
995
+}
996
+
997
+/* INTEL_PT_LBR_0, INTEL_PT_LBR_1 and INTEL_PT_LBR_2 */
998
+#define LBRS_MAX (INTEL_PT_BLK_ITEM_ID_CNT * 3U)
753999
7541000 static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
7551001 unsigned int queue_nr)
....@@ -763,25 +1009,16 @@
7631009 return NULL;
7641010
7651011 if (pt->synth_opts.callchain) {
766
- size_t sz = sizeof(struct ip_callchain);
767
-
768
- /* Add 1 to callchain_sz for callchain context */
769
- sz += (pt->synth_opts.callchain_sz + 1) * sizeof(u64);
770
- ptq->chain = zalloc(sz);
1012
+ ptq->chain = intel_pt_alloc_chain(pt);
7711013 if (!ptq->chain)
7721014 goto out_free;
7731015 }
7741016
775
- if (pt->synth_opts.last_branch) {
776
- size_t sz = sizeof(struct branch_stack);
1017
+ if (pt->synth_opts.last_branch || pt->synth_opts.other_events) {
1018
+ unsigned int entry_cnt = max(LBRS_MAX, pt->br_stack_sz);
7771019
778
- sz += pt->synth_opts.last_branch_sz *
779
- sizeof(struct branch_entry);
780
- ptq->last_branch = zalloc(sz);
1020
+ ptq->last_branch = intel_pt_alloc_br_stack(entry_cnt);
7811021 if (!ptq->last_branch)
782
- goto out_free;
783
- ptq->last_branch_rb = zalloc(sz);
784
- if (!ptq->last_branch_rb)
7851022 goto out_free;
7861023 }
7871024
....@@ -799,13 +1036,16 @@
7991036
8001037 params.get_trace = intel_pt_get_trace;
8011038 params.walk_insn = intel_pt_walk_next_insn;
1039
+ params.lookahead = intel_pt_lookahead;
8021040 params.data = ptq;
8031041 params.return_compression = intel_pt_return_compression(pt);
8041042 params.branch_enable = intel_pt_branch_enable(pt);
1043
+ params.ctl = intel_pt_ctl(pt);
8051044 params.max_non_turbo_ratio = pt->max_non_turbo_ratio;
8061045 params.mtc_period = intel_pt_mtc_period(pt);
8071046 params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n;
8081047 params.tsc_ctc_ratio_d = pt->tsc_ctc_ratio_d;
1048
+ params.quick = pt->synth_opts.quick;
8091049
8101050 if (pt->filts.cnt > 0)
8111051 params.pgd_ip = intel_pt_pgd_ip;
....@@ -850,7 +1090,6 @@
8501090 out_free:
8511091 zfree(&ptq->event_buf);
8521092 zfree(&ptq->last_branch);
853
- zfree(&ptq->last_branch_rb);
8541093 zfree(&ptq->chain);
8551094 free(ptq);
8561095 return NULL;
....@@ -866,7 +1105,6 @@
8661105 intel_pt_decoder_free(ptq->decoder);
8671106 zfree(&ptq->event_buf);
8681107 zfree(&ptq->last_branch);
869
- zfree(&ptq->last_branch_rb);
8701108 zfree(&ptq->chain);
8711109 free(ptq);
8721110 }
....@@ -918,6 +1156,28 @@
9181156 ptq->insn_len = ptq->state->insn_len;
9191157 memcpy(ptq->insn, ptq->state->insn, INTEL_PT_INSN_BUF_SZ);
9201158 }
1159
+
1160
+ if (ptq->state->type & INTEL_PT_TRACE_BEGIN)
1161
+ ptq->flags |= PERF_IP_FLAG_TRACE_BEGIN;
1162
+ if (ptq->state->type & INTEL_PT_TRACE_END)
1163
+ ptq->flags |= PERF_IP_FLAG_TRACE_END;
1164
+}
1165
+
1166
+static void intel_pt_setup_time_range(struct intel_pt *pt,
1167
+ struct intel_pt_queue *ptq)
1168
+{
1169
+ if (!pt->range_cnt)
1170
+ return;
1171
+
1172
+ ptq->sel_timestamp = pt->time_ranges[0].start;
1173
+ ptq->sel_idx = 0;
1174
+
1175
+ if (ptq->sel_timestamp) {
1176
+ ptq->sel_start = true;
1177
+ } else {
1178
+ ptq->sel_timestamp = pt->time_ranges[0].end;
1179
+ ptq->sel_start = false;
1180
+ }
9211181 }
9221182
9231183 static int intel_pt_setup_queue(struct intel_pt *pt,
....@@ -939,11 +1199,15 @@
9391199 ptq->cpu = queue->cpu;
9401200 ptq->tid = queue->tid;
9411201
1202
+ ptq->cbr_seen = UINT_MAX;
1203
+
9421204 if (pt->sampling_mode && !pt->snapshot_mode &&
9431205 pt->timeless_decoding)
9441206 ptq->step_through_buffers = true;
9451207
9461208 ptq->sync_switch = pt->sync_switch;
1209
+
1210
+ intel_pt_setup_time_range(pt, ptq);
9471211 }
9481212
9491213 if (!ptq->on_heap &&
....@@ -958,6 +1222,14 @@
9581222 intel_pt_log("queue %u getting timestamp\n", queue_nr);
9591223 intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
9601224 queue_nr, ptq->cpu, ptq->pid, ptq->tid);
1225
+
1226
+ if (ptq->sel_start && ptq->sel_timestamp) {
1227
+ ret = intel_pt_fast_forward(ptq->decoder,
1228
+ ptq->sel_timestamp);
1229
+ if (ret)
1230
+ return ret;
1231
+ }
1232
+
9611233 while (1) {
9621234 state = intel_pt_decode(ptq->decoder);
9631235 if (state->err) {
....@@ -977,6 +1249,9 @@
9771249 queue_nr, ptq->timestamp);
9781250 ptq->state = state;
9791251 ptq->have_sample = true;
1252
+ if (ptq->sel_start && ptq->sel_timestamp &&
1253
+ ptq->timestamp < ptq->sel_timestamp)
1254
+ ptq->have_sample = false;
9801255 intel_pt_sample_flags(ptq);
9811256 ret = auxtrace_heap__add(&pt->heap, queue_nr, ptq->timestamp);
9821257 if (ret)
....@@ -1000,62 +1275,35 @@
10001275 return 0;
10011276 }
10021277
1003
-static inline void intel_pt_copy_last_branch_rb(struct intel_pt_queue *ptq)
1004
-{
1005
- struct branch_stack *bs_src = ptq->last_branch_rb;
1006
- struct branch_stack *bs_dst = ptq->last_branch;
1007
- size_t nr = 0;
1008
-
1009
- bs_dst->nr = bs_src->nr;
1010
-
1011
- if (!bs_src->nr)
1012
- return;
1013
-
1014
- nr = ptq->pt->synth_opts.last_branch_sz - ptq->last_branch_pos;
1015
- memcpy(&bs_dst->entries[0],
1016
- &bs_src->entries[ptq->last_branch_pos],
1017
- sizeof(struct branch_entry) * nr);
1018
-
1019
- if (bs_src->nr >= ptq->pt->synth_opts.last_branch_sz) {
1020
- memcpy(&bs_dst->entries[nr],
1021
- &bs_src->entries[0],
1022
- sizeof(struct branch_entry) * ptq->last_branch_pos);
1023
- }
1024
-}
1025
-
1026
-static inline void intel_pt_reset_last_branch_rb(struct intel_pt_queue *ptq)
1027
-{
1028
- ptq->last_branch_pos = 0;
1029
- ptq->last_branch_rb->nr = 0;
1030
-}
1031
-
1032
-static void intel_pt_update_last_branch_rb(struct intel_pt_queue *ptq)
1033
-{
1034
- const struct intel_pt_state *state = ptq->state;
1035
- struct branch_stack *bs = ptq->last_branch_rb;
1036
- struct branch_entry *be;
1037
-
1038
- if (!ptq->last_branch_pos)
1039
- ptq->last_branch_pos = ptq->pt->synth_opts.last_branch_sz;
1040
-
1041
- ptq->last_branch_pos -= 1;
1042
-
1043
- be = &bs->entries[ptq->last_branch_pos];
1044
- be->from = state->from_ip;
1045
- be->to = state->to_ip;
1046
- be->flags.abort = !!(state->flags & INTEL_PT_ABORT_TX);
1047
- be->flags.in_tx = !!(state->flags & INTEL_PT_IN_TX);
1048
- /* No support for mispredict */
1049
- be->flags.mispred = ptq->pt->mispred_all;
1050
-
1051
- if (bs->nr < ptq->pt->synth_opts.last_branch_sz)
1052
- bs->nr += 1;
1053
-}
1054
-
10551278 static inline bool intel_pt_skip_event(struct intel_pt *pt)
10561279 {
10571280 return pt->synth_opts.initial_skip &&
10581281 pt->num_events++ < pt->synth_opts.initial_skip;
1282
+}
1283
+
1284
+/*
1285
+ * Cannot count CBR as skipped because it won't go away until cbr == cbr_seen.
1286
+ * Also ensure CBR is first non-skipped event by allowing for 4 more samples
1287
+ * from this decoder state.
1288
+ */
1289
+static inline bool intel_pt_skip_cbr_event(struct intel_pt *pt)
1290
+{
1291
+ return pt->synth_opts.initial_skip &&
1292
+ pt->num_events + 4 < pt->synth_opts.initial_skip;
1293
+}
1294
+
1295
+static void intel_pt_prep_a_sample(struct intel_pt_queue *ptq,
1296
+ union perf_event *event,
1297
+ struct perf_sample *sample)
1298
+{
1299
+ event->sample.header.type = PERF_RECORD_SAMPLE;
1300
+ event->sample.header.size = sizeof(struct perf_event_header);
1301
+
1302
+ sample->pid = ptq->pid;
1303
+ sample->tid = ptq->tid;
1304
+ sample->cpu = ptq->cpu;
1305
+ sample->insn_len = ptq->insn_len;
1306
+ memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
10591307 }
10601308
10611309 static void intel_pt_prep_b_sample(struct intel_pt *pt,
....@@ -1063,23 +1311,18 @@
10631311 union perf_event *event,
10641312 struct perf_sample *sample)
10651313 {
1314
+ intel_pt_prep_a_sample(ptq, event, sample);
1315
+
10661316 if (!pt->timeless_decoding)
10671317 sample->time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
10681318
10691319 sample->ip = ptq->state->from_ip;
10701320 sample->cpumode = intel_pt_cpumode(pt, sample->ip);
1071
- sample->pid = ptq->pid;
1072
- sample->tid = ptq->tid;
10731321 sample->addr = ptq->state->to_ip;
10741322 sample->period = 1;
1075
- sample->cpu = ptq->cpu;
10761323 sample->flags = ptq->flags;
1077
- sample->insn_len = ptq->insn_len;
1078
- memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
10791324
1080
- event->sample.header.type = PERF_RECORD_SAMPLE;
10811325 event->sample.header.misc = sample->cpumode;
1082
- event->sample.header.size = sizeof(struct perf_event_header);
10831326 }
10841327
10851328 static int intel_pt_inject_event(union perf_event *event,
....@@ -1099,9 +1342,9 @@
10991342 return intel_pt_inject_event(event, sample, type);
11001343 }
11011344
1102
-static int intel_pt_deliver_synth_b_event(struct intel_pt *pt,
1103
- union perf_event *event,
1104
- struct perf_sample *sample, u64 type)
1345
+static int intel_pt_deliver_synth_event(struct intel_pt *pt,
1346
+ union perf_event *event,
1347
+ struct perf_sample *sample, u64 type)
11051348 {
11061349 int ret;
11071350
....@@ -1123,6 +1366,7 @@
11231366 struct perf_sample sample = { .ip = 0, };
11241367 struct dummy_branch_stack {
11251368 u64 nr;
1369
+ u64 hw_idx;
11261370 struct branch_entry entries;
11271371 } dummy_bs;
11281372
....@@ -1144,6 +1388,7 @@
11441388 if (pt->synth_opts.last_branch && sort__mode == SORT_MODE__BRANCH) {
11451389 dummy_bs = (struct dummy_branch_stack){
11461390 .nr = 1,
1391
+ .hw_idx = -1ULL,
11471392 .entries = {
11481393 .from = sample.ip,
11491394 .to = sample.addr,
....@@ -1152,8 +1397,16 @@
11521397 sample.branch_stack = (struct branch_stack *)&dummy_bs;
11531398 }
11541399
1155
- return intel_pt_deliver_synth_b_event(pt, event, &sample,
1156
- pt->branches_sample_type);
1400
+ if (ptq->state->flags & INTEL_PT_SAMPLE_IPC)
1401
+ sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_br_cyc_cnt;
1402
+ if (sample.cyc_cnt) {
1403
+ sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_br_insn_cnt;
1404
+ ptq->last_br_insn_cnt = ptq->ipc_insn_cnt;
1405
+ ptq->last_br_cyc_cnt = ptq->ipc_cyc_cnt;
1406
+ }
1407
+
1408
+ return intel_pt_deliver_synth_event(pt, event, &sample,
1409
+ pt->branches_sample_type);
11571410 }
11581411
11591412 static void intel_pt_prep_sample(struct intel_pt *pt,
....@@ -1164,32 +1417,17 @@
11641417 intel_pt_prep_b_sample(pt, ptq, event, sample);
11651418
11661419 if (pt->synth_opts.callchain) {
1167
- thread_stack__sample(ptq->thread, ptq->chain,
1420
+ thread_stack__sample(ptq->thread, ptq->cpu, ptq->chain,
11681421 pt->synth_opts.callchain_sz + 1,
11691422 sample->ip, pt->kernel_start);
11701423 sample->callchain = ptq->chain;
11711424 }
11721425
11731426 if (pt->synth_opts.last_branch) {
1174
- intel_pt_copy_last_branch_rb(ptq);
1427
+ thread_stack__br_sample(ptq->thread, ptq->cpu, ptq->last_branch,
1428
+ pt->br_stack_sz);
11751429 sample->branch_stack = ptq->last_branch;
11761430 }
1177
-}
1178
-
1179
-static inline int intel_pt_deliver_synth_event(struct intel_pt *pt,
1180
- struct intel_pt_queue *ptq,
1181
- union perf_event *event,
1182
- struct perf_sample *sample,
1183
- u64 type)
1184
-{
1185
- int ret;
1186
-
1187
- ret = intel_pt_deliver_synth_b_event(pt, event, sample, type);
1188
-
1189
- if (pt->synth_opts.last_branch)
1190
- intel_pt_reset_last_branch_rb(ptq);
1191
-
1192
- return ret;
11931431 }
11941432
11951433 static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
....@@ -1205,11 +1443,22 @@
12051443
12061444 sample.id = ptq->pt->instructions_id;
12071445 sample.stream_id = ptq->pt->instructions_id;
1208
- sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt;
1446
+ if (pt->synth_opts.quick)
1447
+ sample.period = 1;
1448
+ else
1449
+ sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt;
1450
+
1451
+ if (ptq->state->flags & INTEL_PT_SAMPLE_IPC)
1452
+ sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt;
1453
+ if (sample.cyc_cnt) {
1454
+ sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_in_insn_cnt;
1455
+ ptq->last_in_insn_cnt = ptq->ipc_insn_cnt;
1456
+ ptq->last_in_cyc_cnt = ptq->ipc_cyc_cnt;
1457
+ }
12091458
12101459 ptq->last_insn_cnt = ptq->state->tot_insn_cnt;
12111460
1212
- return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
1461
+ return intel_pt_deliver_synth_event(pt, event, &sample,
12131462 pt->instructions_sample_type);
12141463 }
12151464
....@@ -1227,7 +1476,7 @@
12271476 sample.id = ptq->pt->transactions_id;
12281477 sample.stream_id = ptq->pt->transactions_id;
12291478
1230
- return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
1479
+ return intel_pt_deliver_synth_event(pt, event, &sample,
12311480 pt->transactions_sample_type);
12321481 }
12331482
....@@ -1268,7 +1517,7 @@
12681517 sample.raw_size = perf_synth__raw_size(raw);
12691518 sample.raw_data = perf_synth__raw_data(&raw);
12701519
1271
- return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
1520
+ return intel_pt_deliver_synth_event(pt, event, &sample,
12721521 pt->ptwrites_sample_type);
12731522 }
12741523
....@@ -1280,8 +1529,10 @@
12801529 struct perf_synth_intel_cbr raw;
12811530 u32 flags;
12821531
1283
- if (intel_pt_skip_event(pt))
1532
+ if (intel_pt_skip_cbr_event(pt))
12841533 return 0;
1534
+
1535
+ ptq->cbr_seen = ptq->state->cbr;
12851536
12861537 intel_pt_prep_p_sample(pt, ptq, event, &sample);
12871538
....@@ -1296,7 +1547,7 @@
12961547 sample.raw_size = perf_synth__raw_size(raw);
12971548 sample.raw_data = perf_synth__raw_data(&raw);
12981549
1299
- return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
1550
+ return intel_pt_deliver_synth_event(pt, event, &sample,
13001551 pt->pwr_events_sample_type);
13011552 }
13021553
....@@ -1321,7 +1572,7 @@
13211572 sample.raw_size = perf_synth__raw_size(raw);
13221573 sample.raw_data = perf_synth__raw_data(&raw);
13231574
1324
- return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
1575
+ return intel_pt_deliver_synth_event(pt, event, &sample,
13251576 pt->pwr_events_sample_type);
13261577 }
13271578
....@@ -1346,7 +1597,7 @@
13461597 sample.raw_size = perf_synth__raw_size(raw);
13471598 sample.raw_data = perf_synth__raw_data(&raw);
13481599
1349
- return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
1600
+ return intel_pt_deliver_synth_event(pt, event, &sample,
13501601 pt->pwr_events_sample_type);
13511602 }
13521603
....@@ -1371,7 +1622,7 @@
13711622 sample.raw_size = perf_synth__raw_size(raw);
13721623 sample.raw_data = perf_synth__raw_data(&raw);
13731624
1374
- return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
1625
+ return intel_pt_deliver_synth_event(pt, event, &sample,
13751626 pt->pwr_events_sample_type);
13761627 }
13771628
....@@ -1396,21 +1647,277 @@
13961647 sample.raw_size = perf_synth__raw_size(raw);
13971648 sample.raw_data = perf_synth__raw_data(&raw);
13981649
1399
- return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
1650
+ return intel_pt_deliver_synth_event(pt, event, &sample,
14001651 pt->pwr_events_sample_type);
14011652 }
14021653
1654
+/*
1655
+ * PEBS gp_regs array indexes plus 1 so that 0 means not present. Refer
1656
+ * intel_pt_add_gp_regs().
1657
+ */
1658
+static const int pebs_gp_regs[] = {
1659
+ [PERF_REG_X86_FLAGS] = 1,
1660
+ [PERF_REG_X86_IP] = 2,
1661
+ [PERF_REG_X86_AX] = 3,
1662
+ [PERF_REG_X86_CX] = 4,
1663
+ [PERF_REG_X86_DX] = 5,
1664
+ [PERF_REG_X86_BX] = 6,
1665
+ [PERF_REG_X86_SP] = 7,
1666
+ [PERF_REG_X86_BP] = 8,
1667
+ [PERF_REG_X86_SI] = 9,
1668
+ [PERF_REG_X86_DI] = 10,
1669
+ [PERF_REG_X86_R8] = 11,
1670
+ [PERF_REG_X86_R9] = 12,
1671
+ [PERF_REG_X86_R10] = 13,
1672
+ [PERF_REG_X86_R11] = 14,
1673
+ [PERF_REG_X86_R12] = 15,
1674
+ [PERF_REG_X86_R13] = 16,
1675
+ [PERF_REG_X86_R14] = 17,
1676
+ [PERF_REG_X86_R15] = 18,
1677
+};
1678
+
1679
+static u64 *intel_pt_add_gp_regs(struct regs_dump *intr_regs, u64 *pos,
1680
+ const struct intel_pt_blk_items *items,
1681
+ u64 regs_mask)
1682
+{
1683
+ const u64 *gp_regs = items->val[INTEL_PT_GP_REGS_POS];
1684
+ u32 mask = items->mask[INTEL_PT_GP_REGS_POS];
1685
+ u32 bit;
1686
+ int i;
1687
+
1688
+ for (i = 0, bit = 1; i < PERF_REG_X86_64_MAX; i++, bit <<= 1) {
1689
+ /* Get the PEBS gp_regs array index */
1690
+ int n = pebs_gp_regs[i] - 1;
1691
+
1692
+ if (n < 0)
1693
+ continue;
1694
+ /*
1695
+ * Add only registers that were requested (i.e. 'regs_mask') and
1696
+ * that were provided (i.e. 'mask'), and update the resulting
1697
+ * mask (i.e. 'intr_regs->mask') accordingly.
1698
+ */
1699
+ if (mask & 1 << n && regs_mask & bit) {
1700
+ intr_regs->mask |= bit;
1701
+ *pos++ = gp_regs[n];
1702
+ }
1703
+ }
1704
+
1705
+ return pos;
1706
+}
1707
+
1708
+#ifndef PERF_REG_X86_XMM0
1709
+#define PERF_REG_X86_XMM0 32
1710
+#endif
1711
+
1712
+static void intel_pt_add_xmm(struct regs_dump *intr_regs, u64 *pos,
1713
+ const struct intel_pt_blk_items *items,
1714
+ u64 regs_mask)
1715
+{
1716
+ u32 mask = items->has_xmm & (regs_mask >> PERF_REG_X86_XMM0);
1717
+ const u64 *xmm = items->xmm;
1718
+
1719
+ /*
1720
+ * If there are any XMM registers, then there should be all of them.
1721
+ * Nevertheless, follow the logic to add only registers that were
1722
+ * requested (i.e. 'regs_mask') and that were provided (i.e. 'mask'),
1723
+ * and update the resulting mask (i.e. 'intr_regs->mask') accordingly.
1724
+ */
1725
+ intr_regs->mask |= (u64)mask << PERF_REG_X86_XMM0;
1726
+
1727
+ for (; mask; mask >>= 1, xmm++) {
1728
+ if (mask & 1)
1729
+ *pos++ = *xmm;
1730
+ }
1731
+}
1732
+
1733
+#define LBR_INFO_MISPRED (1ULL << 63)
1734
+#define LBR_INFO_IN_TX (1ULL << 62)
1735
+#define LBR_INFO_ABORT (1ULL << 61)
1736
+#define LBR_INFO_CYCLES 0xffff
1737
+
1738
+/* Refer kernel's intel_pmu_store_pebs_lbrs() */
1739
+static u64 intel_pt_lbr_flags(u64 info)
1740
+{
1741
+ union {
1742
+ struct branch_flags flags;
1743
+ u64 result;
1744
+ } u;
1745
+
1746
+ u.result = 0;
1747
+ u.flags.mispred = !!(info & LBR_INFO_MISPRED);
1748
+ u.flags.predicted = !(info & LBR_INFO_MISPRED);
1749
+ u.flags.in_tx = !!(info & LBR_INFO_IN_TX);
1750
+ u.flags.abort = !!(info & LBR_INFO_ABORT);
1751
+ u.flags.cycles = info & LBR_INFO_CYCLES;
1752
+
1753
+ return u.result;
1754
+}
1755
+
1756
+static void intel_pt_add_lbrs(struct branch_stack *br_stack,
1757
+ const struct intel_pt_blk_items *items)
1758
+{
1759
+ u64 *to;
1760
+ int i;
1761
+
1762
+ br_stack->nr = 0;
1763
+
1764
+ to = &br_stack->entries[0].from;
1765
+
1766
+ for (i = INTEL_PT_LBR_0_POS; i <= INTEL_PT_LBR_2_POS; i++) {
1767
+ u32 mask = items->mask[i];
1768
+ const u64 *from = items->val[i];
1769
+
1770
+ for (; mask; mask >>= 3, from += 3) {
1771
+ if ((mask & 7) == 7) {
1772
+ *to++ = from[0];
1773
+ *to++ = from[1];
1774
+ *to++ = intel_pt_lbr_flags(from[2]);
1775
+ br_stack->nr += 1;
1776
+ }
1777
+ }
1778
+ }
1779
+}
1780
+
1781
+static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq)
1782
+{
1783
+ const struct intel_pt_blk_items *items = &ptq->state->items;
1784
+ struct perf_sample sample = { .ip = 0, };
1785
+ union perf_event *event = ptq->event_buf;
1786
+ struct intel_pt *pt = ptq->pt;
1787
+ struct evsel *evsel = pt->pebs_evsel;
1788
+ u64 sample_type = evsel->core.attr.sample_type;
1789
+ u64 id = evsel->core.id[0];
1790
+ u8 cpumode;
1791
+ u64 regs[8 * sizeof(sample.intr_regs.mask)];
1792
+
1793
+ if (intel_pt_skip_event(pt))
1794
+ return 0;
1795
+
1796
+ intel_pt_prep_a_sample(ptq, event, &sample);
1797
+
1798
+ sample.id = id;
1799
+ sample.stream_id = id;
1800
+
1801
+ if (!evsel->core.attr.freq)
1802
+ sample.period = evsel->core.attr.sample_period;
1803
+
1804
+ /* No support for non-zero CS base */
1805
+ if (items->has_ip)
1806
+ sample.ip = items->ip;
1807
+ else if (items->has_rip)
1808
+ sample.ip = items->rip;
1809
+ else
1810
+ sample.ip = ptq->state->from_ip;
1811
+
1812
+ /* No support for guest mode at this time */
1813
+ cpumode = sample.ip < ptq->pt->kernel_start ?
1814
+ PERF_RECORD_MISC_USER :
1815
+ PERF_RECORD_MISC_KERNEL;
1816
+
1817
+ event->sample.header.misc = cpumode | PERF_RECORD_MISC_EXACT_IP;
1818
+
1819
+ sample.cpumode = cpumode;
1820
+
1821
+ if (sample_type & PERF_SAMPLE_TIME) {
1822
+ u64 timestamp = 0;
1823
+
1824
+ if (items->has_timestamp)
1825
+ timestamp = items->timestamp;
1826
+ else if (!pt->timeless_decoding)
1827
+ timestamp = ptq->timestamp;
1828
+ if (timestamp)
1829
+ sample.time = tsc_to_perf_time(timestamp, &pt->tc);
1830
+ }
1831
+
1832
+ if (sample_type & PERF_SAMPLE_CALLCHAIN &&
1833
+ pt->synth_opts.callchain) {
1834
+ thread_stack__sample(ptq->thread, ptq->cpu, ptq->chain,
1835
+ pt->synth_opts.callchain_sz, sample.ip,
1836
+ pt->kernel_start);
1837
+ sample.callchain = ptq->chain;
1838
+ }
1839
+
1840
+ if (sample_type & PERF_SAMPLE_REGS_INTR &&
1841
+ (items->mask[INTEL_PT_GP_REGS_POS] ||
1842
+ items->mask[INTEL_PT_XMM_POS])) {
1843
+ u64 regs_mask = evsel->core.attr.sample_regs_intr;
1844
+ u64 *pos;
1845
+
1846
+ sample.intr_regs.abi = items->is_32_bit ?
1847
+ PERF_SAMPLE_REGS_ABI_32 :
1848
+ PERF_SAMPLE_REGS_ABI_64;
1849
+ sample.intr_regs.regs = regs;
1850
+
1851
+ pos = intel_pt_add_gp_regs(&sample.intr_regs, regs, items, regs_mask);
1852
+
1853
+ intel_pt_add_xmm(&sample.intr_regs, pos, items, regs_mask);
1854
+ }
1855
+
1856
+ if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
1857
+ if (items->mask[INTEL_PT_LBR_0_POS] ||
1858
+ items->mask[INTEL_PT_LBR_1_POS] ||
1859
+ items->mask[INTEL_PT_LBR_2_POS]) {
1860
+ intel_pt_add_lbrs(ptq->last_branch, items);
1861
+ } else if (pt->synth_opts.last_branch) {
1862
+ thread_stack__br_sample(ptq->thread, ptq->cpu,
1863
+ ptq->last_branch,
1864
+ pt->br_stack_sz);
1865
+ } else {
1866
+ ptq->last_branch->nr = 0;
1867
+ }
1868
+ sample.branch_stack = ptq->last_branch;
1869
+ }
1870
+
1871
+ if (sample_type & PERF_SAMPLE_ADDR && items->has_mem_access_address)
1872
+ sample.addr = items->mem_access_address;
1873
+
1874
+ if (sample_type & PERF_SAMPLE_WEIGHT) {
1875
+ /*
1876
+ * Refer kernel's setup_pebs_adaptive_sample_data() and
1877
+ * intel_hsw_weight().
1878
+ */
1879
+ if (items->has_mem_access_latency)
1880
+ sample.weight = items->mem_access_latency;
1881
+ if (!sample.weight && items->has_tsx_aux_info) {
1882
+ /* Cycles last block */
1883
+ sample.weight = (u32)items->tsx_aux_info;
1884
+ }
1885
+ }
1886
+
1887
+ if (sample_type & PERF_SAMPLE_TRANSACTION && items->has_tsx_aux_info) {
1888
+ u64 ax = items->has_rax ? items->rax : 0;
1889
+ /* Refer kernel's intel_hsw_transaction() */
1890
+ u64 txn = (u8)(items->tsx_aux_info >> 32);
1891
+
1892
+ /* For RTM XABORTs also log the abort code from AX */
1893
+ if (txn & PERF_TXN_TRANSACTION && ax & 1)
1894
+ txn |= ((ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT;
1895
+ sample.transaction = txn;
1896
+ }
1897
+
1898
+ return intel_pt_deliver_synth_event(pt, event, &sample, sample_type);
1899
+}
1900
+
14031901 static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
1404
- pid_t pid, pid_t tid, u64 ip)
1902
+ pid_t pid, pid_t tid, u64 ip, u64 timestamp)
14051903 {
14061904 union perf_event event;
14071905 char msg[MAX_AUXTRACE_ERROR_MSG];
14081906 int err;
14091907
1908
+ if (pt->synth_opts.error_minus_flags) {
1909
+ if (code == INTEL_PT_ERR_OVR &&
1910
+ pt->synth_opts.error_minus_flags & AUXTRACE_ERR_FLG_OVERFLOW)
1911
+ return 0;
1912
+ if (code == INTEL_PT_ERR_LOST &&
1913
+ pt->synth_opts.error_minus_flags & AUXTRACE_ERR_FLG_DATA_LOST)
1914
+ return 0;
1915
+ }
1916
+
14101917 intel_pt__strerror(code, msg, MAX_AUXTRACE_ERROR_MSG);
14111918
14121919 auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
1413
- code, cpu, pid, tid, ip, msg);
1920
+ code, cpu, pid, tid, ip, msg, timestamp);
14141921
14151922 err = perf_session__deliver_synth_event(pt->session, &event, NULL);
14161923 if (err)
....@@ -1418,6 +1925,18 @@
14181925 err);
14191926
14201927 return err;
1928
+}
1929
+
1930
+static int intel_ptq_synth_error(struct intel_pt_queue *ptq,
1931
+ const struct intel_pt_state *state)
1932
+{
1933
+ struct intel_pt *pt = ptq->pt;
1934
+ u64 tm = ptq->timestamp;
1935
+
1936
+ tm = pt->timeless_decoding ? 0 : tsc_to_perf_time(tm, &pt->tc);
1937
+
1938
+ return intel_pt_synth_error(pt, state->err, ptq->cpu, ptq->pid,
1939
+ ptq->tid, state->from_ip, tm);
14211940 }
14221941
14231942 static int intel_pt_next_tid(struct intel_pt *pt, struct intel_pt_queue *ptq)
....@@ -1452,8 +1971,7 @@
14521971 }
14531972
14541973 #define INTEL_PT_PWR_EVT (INTEL_PT_MWAIT_OP | INTEL_PT_PWR_ENTRY | \
1455
- INTEL_PT_EX_STOP | INTEL_PT_PWR_EXIT | \
1456
- INTEL_PT_CBR_CHG)
1974
+ INTEL_PT_EX_STOP | INTEL_PT_PWR_EXIT)
14571975
14581976 static int intel_pt_sample(struct intel_pt_queue *ptq)
14591977 {
....@@ -1466,31 +1984,46 @@
14661984
14671985 ptq->have_sample = false;
14681986
1469
- if (pt->sample_pwr_events && (state->type & INTEL_PT_PWR_EVT)) {
1470
- if (state->type & INTEL_PT_CBR_CHG) {
1987
+ ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt;
1988
+ ptq->ipc_cyc_cnt = ptq->state->tot_cyc_cnt;
1989
+
1990
+ /*
1991
+ * Do PEBS first to allow for the possibility that the PEBS timestamp
1992
+ * precedes the current timestamp.
1993
+ */
1994
+ if (pt->sample_pebs && state->type & INTEL_PT_BLK_ITEMS) {
1995
+ err = intel_pt_synth_pebs_sample(ptq);
1996
+ if (err)
1997
+ return err;
1998
+ }
1999
+
2000
+ if (pt->sample_pwr_events) {
2001
+ if (ptq->state->cbr != ptq->cbr_seen) {
14712002 err = intel_pt_synth_cbr_sample(ptq);
14722003 if (err)
14732004 return err;
14742005 }
1475
- if (state->type & INTEL_PT_MWAIT_OP) {
1476
- err = intel_pt_synth_mwait_sample(ptq);
1477
- if (err)
1478
- return err;
1479
- }
1480
- if (state->type & INTEL_PT_PWR_ENTRY) {
1481
- err = intel_pt_synth_pwre_sample(ptq);
1482
- if (err)
1483
- return err;
1484
- }
1485
- if (state->type & INTEL_PT_EX_STOP) {
1486
- err = intel_pt_synth_exstop_sample(ptq);
1487
- if (err)
1488
- return err;
1489
- }
1490
- if (state->type & INTEL_PT_PWR_EXIT) {
1491
- err = intel_pt_synth_pwrx_sample(ptq);
1492
- if (err)
1493
- return err;
2006
+ if (state->type & INTEL_PT_PWR_EVT) {
2007
+ if (state->type & INTEL_PT_MWAIT_OP) {
2008
+ err = intel_pt_synth_mwait_sample(ptq);
2009
+ if (err)
2010
+ return err;
2011
+ }
2012
+ if (state->type & INTEL_PT_PWR_ENTRY) {
2013
+ err = intel_pt_synth_pwre_sample(ptq);
2014
+ if (err)
2015
+ return err;
2016
+ }
2017
+ if (state->type & INTEL_PT_EX_STOP) {
2018
+ err = intel_pt_synth_exstop_sample(ptq);
2019
+ if (err)
2020
+ return err;
2021
+ }
2022
+ if (state->type & INTEL_PT_PWR_EXIT) {
2023
+ err = intel_pt_synth_pwrx_sample(ptq);
2024
+ if (err)
2025
+ return err;
2026
+ }
14942027 }
14952028 }
14962029
....@@ -1515,21 +2048,21 @@
15152048 if (!(state->type & INTEL_PT_BRANCH))
15162049 return 0;
15172050
1518
- if (pt->synth_opts.callchain || pt->synth_opts.thread_stack)
1519
- thread_stack__event(ptq->thread, ptq->flags, state->from_ip,
1520
- state->to_ip, ptq->insn_len,
1521
- state->trace_nr);
1522
- else
1523
- thread_stack__set_trace_nr(ptq->thread, state->trace_nr);
2051
+ if (pt->use_thread_stack) {
2052
+ thread_stack__event(ptq->thread, ptq->cpu, ptq->flags,
2053
+ state->from_ip, state->to_ip, ptq->insn_len,
2054
+ state->trace_nr, pt->callstack,
2055
+ pt->br_stack_sz_plus,
2056
+ pt->mispred_all);
2057
+ } else {
2058
+ thread_stack__set_trace_nr(ptq->thread, ptq->cpu, state->trace_nr);
2059
+ }
15242060
15252061 if (pt->sample_branches) {
15262062 err = intel_pt_synth_branch_sample(ptq);
15272063 if (err)
15282064 return err;
15292065 }
1530
-
1531
- if (pt->synth_opts.last_branch)
1532
- intel_pt_update_last_branch_rb(ptq);
15332066
15342067 if (!ptq->sync_switch)
15352068 return 0;
....@@ -1628,10 +2161,83 @@
16282161 }
16292162 }
16302163
2164
+/*
2165
+ * To filter against time ranges, it is only necessary to look at the next start
2166
+ * or end time.
2167
+ */
2168
+static bool intel_pt_next_time(struct intel_pt_queue *ptq)
2169
+{
2170
+ struct intel_pt *pt = ptq->pt;
2171
+
2172
+ if (ptq->sel_start) {
2173
+ /* Next time is an end time */
2174
+ ptq->sel_start = false;
2175
+ ptq->sel_timestamp = pt->time_ranges[ptq->sel_idx].end;
2176
+ return true;
2177
+ } else if (ptq->sel_idx + 1 < pt->range_cnt) {
2178
+ /* Next time is a start time */
2179
+ ptq->sel_start = true;
2180
+ ptq->sel_idx += 1;
2181
+ ptq->sel_timestamp = pt->time_ranges[ptq->sel_idx].start;
2182
+ return true;
2183
+ }
2184
+
2185
+ /* No next time */
2186
+ return false;
2187
+}
2188
+
2189
+static int intel_pt_time_filter(struct intel_pt_queue *ptq, u64 *ff_timestamp)
2190
+{
2191
+ int err;
2192
+
2193
+ while (1) {
2194
+ if (ptq->sel_start) {
2195
+ if (ptq->timestamp >= ptq->sel_timestamp) {
2196
+ /* After start time, so consider next time */
2197
+ intel_pt_next_time(ptq);
2198
+ if (!ptq->sel_timestamp) {
2199
+ /* No end time */
2200
+ return 0;
2201
+ }
2202
+ /* Check against end time */
2203
+ continue;
2204
+ }
2205
+ /* Before start time, so fast forward */
2206
+ ptq->have_sample = false;
2207
+ if (ptq->sel_timestamp > *ff_timestamp) {
2208
+ if (ptq->sync_switch) {
2209
+ intel_pt_next_tid(ptq->pt, ptq);
2210
+ ptq->switch_state = INTEL_PT_SS_UNKNOWN;
2211
+ }
2212
+ *ff_timestamp = ptq->sel_timestamp;
2213
+ err = intel_pt_fast_forward(ptq->decoder,
2214
+ ptq->sel_timestamp);
2215
+ if (err)
2216
+ return err;
2217
+ }
2218
+ return 0;
2219
+ } else if (ptq->timestamp > ptq->sel_timestamp) {
2220
+ /* After end time, so consider next time */
2221
+ if (!intel_pt_next_time(ptq)) {
2222
+ /* No next time range, so stop decoding */
2223
+ ptq->have_sample = false;
2224
+ ptq->switch_state = INTEL_PT_SS_NOT_TRACING;
2225
+ return 1;
2226
+ }
2227
+ /* Check against next start time */
2228
+ continue;
2229
+ } else {
2230
+ /* Before end time */
2231
+ return 0;
2232
+ }
2233
+ }
2234
+}
2235
+
16312236 static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
16322237 {
16332238 const struct intel_pt_state *state = ptq->state;
16342239 struct intel_pt *pt = ptq->pt;
2240
+ u64 ff_timestamp = 0;
16352241 int err;
16362242
16372243 if (!pt->kernel_start) {
....@@ -1665,11 +2271,9 @@
16652271 ptq->sync_switch = false;
16662272 intel_pt_next_tid(pt, ptq);
16672273 }
2274
+ ptq->timestamp = state->est_timestamp;
16682275 if (pt->synth_opts.errors) {
1669
- err = intel_pt_synth_error(pt, state->err,
1670
- ptq->cpu, ptq->pid,
1671
- ptq->tid,
1672
- state->from_ip);
2276
+ err = intel_ptq_synth_error(ptq, state);
16732277 if (err)
16742278 return err;
16752279 }
....@@ -1697,6 +2301,12 @@
16972301 ptq->timestamp = state->est_timestamp;
16982302 } else if (state->timestamp > ptq->timestamp) {
16992303 ptq->timestamp = state->timestamp;
2304
+ }
2305
+
2306
+ if (ptq->sel_timestamp) {
2307
+ err = intel_pt_time_filter(ptq, &ff_timestamp);
2308
+ if (err)
2309
+ return err;
17002310 }
17012311
17022312 if (!pt->timeless_decoding && ptq->timestamp >= *timestamp) {
....@@ -1791,10 +2401,60 @@
17912401 return 0;
17922402 }
17932403
2404
+static void intel_pt_sample_set_pid_tid_cpu(struct intel_pt_queue *ptq,
2405
+ struct auxtrace_queue *queue,
2406
+ struct perf_sample *sample)
2407
+{
2408
+ struct machine *m = ptq->pt->machine;
2409
+
2410
+ ptq->pid = sample->pid;
2411
+ ptq->tid = sample->tid;
2412
+ ptq->cpu = queue->cpu;
2413
+
2414
+ intel_pt_log("queue %u cpu %d pid %d tid %d\n",
2415
+ ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid);
2416
+
2417
+ thread__zput(ptq->thread);
2418
+
2419
+ if (ptq->tid == -1)
2420
+ return;
2421
+
2422
+ if (ptq->pid == -1) {
2423
+ ptq->thread = machine__find_thread(m, -1, ptq->tid);
2424
+ if (ptq->thread)
2425
+ ptq->pid = ptq->thread->pid_;
2426
+ return;
2427
+ }
2428
+
2429
+ ptq->thread = machine__findnew_thread(m, ptq->pid, ptq->tid);
2430
+}
2431
+
2432
+static int intel_pt_process_timeless_sample(struct intel_pt *pt,
2433
+ struct perf_sample *sample)
2434
+{
2435
+ struct auxtrace_queue *queue;
2436
+ struct intel_pt_queue *ptq;
2437
+ u64 ts = 0;
2438
+
2439
+ queue = auxtrace_queues__sample_queue(&pt->queues, sample, pt->session);
2440
+ if (!queue)
2441
+ return -EINVAL;
2442
+
2443
+ ptq = queue->priv;
2444
+ if (!ptq)
2445
+ return 0;
2446
+
2447
+ ptq->stop = false;
2448
+ ptq->time = sample->time;
2449
+ intel_pt_sample_set_pid_tid_cpu(ptq, queue, sample);
2450
+ intel_pt_run_decoder(ptq, &ts);
2451
+ return 0;
2452
+}
2453
+
17942454 static int intel_pt_lost(struct intel_pt *pt, struct perf_sample *sample)
17952455 {
17962456 return intel_pt_synth_error(pt, INTEL_PT_ERR_LOST, sample->cpu,
1797
- sample->pid, sample->tid, 0);
2457
+ sample->pid, sample->tid, 0, sample->time);
17982458 }
17992459
18002460 static struct intel_pt_queue *intel_pt_cpu_to_ptq(struct intel_pt *pt, int cpu)
....@@ -1840,7 +2500,6 @@
18402500
18412501 switch (ptq->switch_state) {
18422502 case INTEL_PT_SS_NOT_TRACING:
1843
- ptq->next_tid = -1;
18442503 break;
18452504 case INTEL_PT_SS_UNKNOWN:
18462505 case INTEL_PT_SS_TRACING:
....@@ -1860,12 +2519,13 @@
18602519 ptq->switch_state = INTEL_PT_SS_TRACING;
18612520 break;
18622521 case INTEL_PT_SS_EXPECTING_SWITCH_IP:
1863
- ptq->next_tid = tid;
18642522 intel_pt_log("ERROR: cpu %d expecting switch ip\n", cpu);
18652523 break;
18662524 default:
18672525 break;
18682526 }
2527
+
2528
+ ptq->next_tid = -1;
18692529
18702530 return 1;
18712531 }
....@@ -1873,7 +2533,7 @@
18732533 static int intel_pt_process_switch(struct intel_pt *pt,
18742534 struct perf_sample *sample)
18752535 {
1876
- struct perf_evsel *evsel;
2536
+ struct evsel *evsel;
18772537 pid_t tid;
18782538 int cpu, ret;
18792539
....@@ -1881,7 +2541,7 @@
18812541 if (evsel != pt->switch_evsel)
18822542 return 0;
18832543
1884
- tid = perf_evsel__intval(evsel, sample, "next_pid");
2544
+ tid = evsel__intval(evsel, sample, "next_pid");
18852545 cpu = sample->cpu;
18862546
18872547 intel_pt_log("sched_switch: cpu %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
....@@ -1895,6 +2555,44 @@
18952555 return machine__set_current_tid(pt->machine, cpu, -1, tid);
18962556 }
18972557
2558
+static int intel_pt_context_switch_in(struct intel_pt *pt,
2559
+ struct perf_sample *sample)
2560
+{
2561
+ pid_t pid = sample->pid;
2562
+ pid_t tid = sample->tid;
2563
+ int cpu = sample->cpu;
2564
+
2565
+ if (pt->sync_switch) {
2566
+ struct intel_pt_queue *ptq;
2567
+
2568
+ ptq = intel_pt_cpu_to_ptq(pt, cpu);
2569
+ if (ptq && ptq->sync_switch) {
2570
+ ptq->next_tid = -1;
2571
+ switch (ptq->switch_state) {
2572
+ case INTEL_PT_SS_NOT_TRACING:
2573
+ case INTEL_PT_SS_UNKNOWN:
2574
+ case INTEL_PT_SS_TRACING:
2575
+ break;
2576
+ case INTEL_PT_SS_EXPECTING_SWITCH_EVENT:
2577
+ case INTEL_PT_SS_EXPECTING_SWITCH_IP:
2578
+ ptq->switch_state = INTEL_PT_SS_TRACING;
2579
+ break;
2580
+ default:
2581
+ break;
2582
+ }
2583
+ }
2584
+ }
2585
+
2586
+ /*
2587
+ * If the current tid has not been updated yet, ensure it is now that
2588
+ * a "switch in" event has occurred.
2589
+ */
2590
+ if (machine__get_current_tid(pt->machine, cpu) == tid)
2591
+ return 0;
2592
+
2593
+ return machine__set_current_tid(pt->machine, cpu, pid, tid);
2594
+}
2595
+
18982596 static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event,
18992597 struct perf_sample *sample)
19002598 {
....@@ -1906,7 +2604,7 @@
19062604
19072605 if (pt->have_sched_switch == 3) {
19082606 if (!out)
1909
- return 0;
2607
+ return intel_pt_context_switch_in(pt, sample);
19102608 if (event->header.type != PERF_RECORD_SWITCH_CPU_WIDE) {
19112609 pr_err("Expecting CPU-wide context switch event\n");
19122610 return -EINVAL;
....@@ -1922,10 +2620,6 @@
19222620
19232621 if (tid == -1)
19242622 intel_pt_log("context_switch event has no tid\n");
1925
-
1926
- intel_pt_log("context_switch: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
1927
- cpu, pid, tid, sample->time, perf_time_to_tsc(sample->time,
1928
- &pt->tc));
19292623
19302624 ret = intel_pt_sync_switch(pt, cpu, tid, sample->time);
19312625 if (ret <= 0)
....@@ -1949,6 +2643,67 @@
19492643 return machine__set_current_tid(pt->machine, sample->cpu,
19502644 event->itrace_start.pid,
19512645 event->itrace_start.tid);
2646
+}
2647
+
2648
+static int intel_pt_find_map(struct thread *thread, u8 cpumode, u64 addr,
2649
+ struct addr_location *al)
2650
+{
2651
+ if (!al->map || addr < al->map->start || addr >= al->map->end) {
2652
+ if (!thread__find_map(thread, cpumode, addr, al))
2653
+ return -1;
2654
+ }
2655
+
2656
+ return 0;
2657
+}
2658
+
2659
+/* Invalidate all instruction cache entries that overlap the text poke */
2660
+static int intel_pt_text_poke(struct intel_pt *pt, union perf_event *event)
2661
+{
2662
+ u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
2663
+ u64 addr = event->text_poke.addr + event->text_poke.new_len - 1;
2664
+ /* Assume text poke begins in a basic block no more than 4096 bytes */
2665
+ int cnt = 4096 + event->text_poke.new_len;
2666
+ struct thread *thread = pt->unknown_thread;
2667
+ struct addr_location al = { .map = NULL };
2668
+ struct machine *machine = pt->machine;
2669
+ struct intel_pt_cache_entry *e;
2670
+ u64 offset;
2671
+
2672
+ if (!event->text_poke.new_len)
2673
+ return 0;
2674
+
2675
+ for (; cnt; cnt--, addr--) {
2676
+ if (intel_pt_find_map(thread, cpumode, addr, &al)) {
2677
+ if (addr < event->text_poke.addr)
2678
+ return 0;
2679
+ continue;
2680
+ }
2681
+
2682
+ if (!al.map->dso || !al.map->dso->auxtrace_cache)
2683
+ continue;
2684
+
2685
+ offset = al.map->map_ip(al.map, addr);
2686
+
2687
+ e = intel_pt_cache_lookup(al.map->dso, machine, offset);
2688
+ if (!e)
2689
+ continue;
2690
+
2691
+ if (addr + e->byte_cnt + e->length <= event->text_poke.addr) {
2692
+ /*
2693
+ * No overlap. Working backwards there cannot be another
2694
+ * basic block that overlaps the text poke if there is a
2695
+ * branch instruction before the text poke address.
2696
+ */
2697
+ if (e->branch != INTEL_PT_BR_NO_BRANCH)
2698
+ return 0;
2699
+ } else {
2700
+ intel_pt_cache_invalidate(al.map->dso, machine, offset);
2701
+ intel_pt_log("Invalidated instruction cache for %s at %#"PRIx64"\n",
2702
+ al.map->dso->long_name, addr);
2703
+ }
2704
+ }
2705
+
2706
+ return 0;
19522707 }
19532708
19542709 static int intel_pt_process_event(struct perf_session *session,
....@@ -1981,7 +2736,11 @@
19812736 }
19822737
19832738 if (pt->timeless_decoding) {
1984
- if (event->header.type == PERF_RECORD_EXIT) {
2739
+ if (pt->sampling_mode) {
2740
+ if (sample->aux_sample.size)
2741
+ err = intel_pt_process_timeless_sample(pt,
2742
+ sample);
2743
+ } else if (event->header.type == PERF_RECORD_EXIT) {
19852744 err = intel_pt_process_timeless_queues(pt,
19862745 event->fork.tid,
19872746 sample->time);
....@@ -1991,6 +2750,13 @@
19912750 }
19922751 if (err)
19932752 return err;
2753
+
2754
+ if (event->header.type == PERF_RECORD_SAMPLE) {
2755
+ if (pt->synth_opts.add_callchain && !sample->callchain)
2756
+ intel_pt_add_callchain(pt, sample);
2757
+ if (pt->synth_opts.add_last_branch && !sample->branch_stack)
2758
+ intel_pt_add_br_stack(pt, sample);
2759
+ }
19942760
19952761 if (event->header.type == PERF_RECORD_AUX &&
19962762 (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) &&
....@@ -2008,9 +2774,14 @@
20082774 event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)
20092775 err = intel_pt_context_switch(pt, event, sample);
20102776
2011
- intel_pt_log("event %s (%u): cpu %d time %"PRIu64" tsc %#"PRIx64"\n",
2012
- perf_event__name(event->header.type), event->header.type,
2013
- sample->cpu, sample->time, timestamp);
2777
+ if (!err && event->header.type == PERF_RECORD_TEXT_POKE)
2778
+ err = intel_pt_text_poke(pt, event);
2779
+
2780
+ if (intel_pt_enable_logging && intel_pt_log_events(pt, sample->time)) {
2781
+ intel_pt_log("event %u: cpu %d time %"PRIu64" tsc %#"PRIx64" ",
2782
+ event->header.type, sample->cpu, sample->time, timestamp);
2783
+ intel_pt_log_event(event);
2784
+ }
20142785
20152786 return err;
20162787 }
....@@ -2063,8 +2834,19 @@
20632834 session->auxtrace = NULL;
20642835 thread__put(pt->unknown_thread);
20652836 addr_filters__exit(&pt->filts);
2837
+ zfree(&pt->chain);
20662838 zfree(&pt->filter);
2839
+ zfree(&pt->time_ranges);
20672840 free(pt);
2841
+}
2842
+
2843
+static bool intel_pt_evsel_is_auxtrace(struct perf_session *session,
2844
+ struct evsel *evsel)
2845
+{
2846
+ struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
2847
+ auxtrace);
2848
+
2849
+ return evsel->core.attr.type == pt->pmu_type;
20682850 }
20692851
20702852 static int intel_pt_process_auxtrace_event(struct perf_session *session,
....@@ -2106,6 +2888,28 @@
21062888 return 0;
21072889 }
21082890
2891
+static int intel_pt_queue_data(struct perf_session *session,
2892
+ struct perf_sample *sample,
2893
+ union perf_event *event, u64 data_offset)
2894
+{
2895
+ struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
2896
+ auxtrace);
2897
+ u64 timestamp;
2898
+
2899
+ if (event) {
2900
+ return auxtrace_queues__add_event(&pt->queues, session, event,
2901
+ data_offset, NULL);
2902
+ }
2903
+
2904
+ if (sample->time && sample->time != (u64)-1)
2905
+ timestamp = perf_time_to_tsc(sample->time, &pt->tc);
2906
+ else
2907
+ timestamp = 0;
2908
+
2909
+ return auxtrace_queues__add_sample(&pt->queues, session, sample,
2910
+ data_offset, timestamp);
2911
+}
2912
+
21092913 struct intel_pt_synth {
21102914 struct perf_tool dummy_tool;
21112915 struct perf_session *session;
....@@ -2144,13 +2948,13 @@
21442948 return err;
21452949 }
21462950
2147
-static void intel_pt_set_event_name(struct perf_evlist *evlist, u64 id,
2951
+static void intel_pt_set_event_name(struct evlist *evlist, u64 id,
21482952 const char *name)
21492953 {
2150
- struct perf_evsel *evsel;
2954
+ struct evsel *evsel;
21512955
21522956 evlist__for_each_entry(evlist, evsel) {
2153
- if (evsel->id && evsel->id[0] == id) {
2957
+ if (evsel->core.id && evsel->core.id[0] == id) {
21542958 if (evsel->name)
21552959 zfree(&evsel->name);
21562960 evsel->name = strdup(name);
....@@ -2159,13 +2963,13 @@
21592963 }
21602964 }
21612965
2162
-static struct perf_evsel *intel_pt_evsel(struct intel_pt *pt,
2163
- struct perf_evlist *evlist)
2966
+static struct evsel *intel_pt_evsel(struct intel_pt *pt,
2967
+ struct evlist *evlist)
21642968 {
2165
- struct perf_evsel *evsel;
2969
+ struct evsel *evsel;
21662970
21672971 evlist__for_each_entry(evlist, evsel) {
2168
- if (evsel->attr.type == pt->pmu_type && evsel->ids)
2972
+ if (evsel->core.attr.type == pt->pmu_type && evsel->core.ids)
21692973 return evsel;
21702974 }
21712975
....@@ -2175,8 +2979,8 @@
21752979 static int intel_pt_synth_events(struct intel_pt *pt,
21762980 struct perf_session *session)
21772981 {
2178
- struct perf_evlist *evlist = session->evlist;
2179
- struct perf_evsel *evsel = intel_pt_evsel(pt, evlist);
2982
+ struct evlist *evlist = session->evlist;
2983
+ struct evsel *evsel = intel_pt_evsel(pt, evlist);
21802984 struct perf_event_attr attr;
21812985 u64 id;
21822986 int err;
....@@ -2189,7 +2993,7 @@
21892993 memset(&attr, 0, sizeof(struct perf_event_attr));
21902994 attr.size = sizeof(struct perf_event_attr);
21912995 attr.type = PERF_TYPE_HARDWARE;
2192
- attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK;
2996
+ attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
21932997 attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
21942998 PERF_SAMPLE_PERIOD;
21952999 if (pt->timeless_decoding)
....@@ -2198,15 +3002,15 @@
21983002 attr.sample_type |= PERF_SAMPLE_TIME;
21993003 if (!pt->per_cpu_mmaps)
22003004 attr.sample_type &= ~(u64)PERF_SAMPLE_CPU;
2201
- attr.exclude_user = evsel->attr.exclude_user;
2202
- attr.exclude_kernel = evsel->attr.exclude_kernel;
2203
- attr.exclude_hv = evsel->attr.exclude_hv;
2204
- attr.exclude_host = evsel->attr.exclude_host;
2205
- attr.exclude_guest = evsel->attr.exclude_guest;
2206
- attr.sample_id_all = evsel->attr.sample_id_all;
2207
- attr.read_format = evsel->attr.read_format;
3005
+ attr.exclude_user = evsel->core.attr.exclude_user;
3006
+ attr.exclude_kernel = evsel->core.attr.exclude_kernel;
3007
+ attr.exclude_hv = evsel->core.attr.exclude_hv;
3008
+ attr.exclude_host = evsel->core.attr.exclude_host;
3009
+ attr.exclude_guest = evsel->core.attr.exclude_guest;
3010
+ attr.sample_id_all = evsel->core.attr.sample_id_all;
3011
+ attr.read_format = evsel->core.attr.read_format;
22083012
2209
- id = evsel->id[0] + 1000000000;
3013
+ id = evsel->core.id[0] + 1000000000;
22103014 if (!id)
22113015 id = 1;
22123016
....@@ -2226,8 +3030,15 @@
22263030
22273031 if (pt->synth_opts.callchain)
22283032 attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
2229
- if (pt->synth_opts.last_branch)
3033
+ if (pt->synth_opts.last_branch) {
22303034 attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
3035
+ /*
3036
+ * We don't use the hardware index, but the sample generation
3037
+ * code uses the new format branch_stack with this field,
3038
+ * so the event attributes must indicate that it's present.
3039
+ */
3040
+ attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX;
3041
+ }
22313042
22323043 if (pt->synth_opts.instructions) {
22333044 attr.config = PERF_COUNT_HW_INSTRUCTIONS;
....@@ -2288,7 +3099,7 @@
22883099 id += 1;
22893100 }
22903101
2291
- if (pt->synth_opts.pwr_events && (evsel->attr.config & 0x10)) {
3102
+ if (pt->synth_opts.pwr_events && (evsel->core.attr.config & 0x10)) {
22923103 attr.config = PERF_SYNTH_INTEL_MWAIT;
22933104 err = intel_pt_synth_event(session, "mwait", &attr, id);
22943105 if (err)
....@@ -2325,12 +3136,28 @@
23253136 return 0;
23263137 }
23273138
2328
-static struct perf_evsel *intel_pt_find_sched_switch(struct perf_evlist *evlist)
3139
+static void intel_pt_setup_pebs_events(struct intel_pt *pt)
23293140 {
2330
- struct perf_evsel *evsel;
3141
+ struct evsel *evsel;
3142
+
3143
+ if (!pt->synth_opts.other_events)
3144
+ return;
3145
+
3146
+ evlist__for_each_entry(pt->session->evlist, evsel) {
3147
+ if (evsel->core.attr.aux_output && evsel->core.id) {
3148
+ pt->sample_pebs = true;
3149
+ pt->pebs_evsel = evsel;
3150
+ return;
3151
+ }
3152
+ }
3153
+}
3154
+
3155
+static struct evsel *intel_pt_find_sched_switch(struct evlist *evlist)
3156
+{
3157
+ struct evsel *evsel;
23313158
23323159 evlist__for_each_entry_reverse(evlist, evsel) {
2333
- const char *name = perf_evsel__name(evsel);
3160
+ const char *name = evsel__name(evsel);
23343161
23353162 if (!strcmp(name, "sched:sched_switch"))
23363163 return evsel;
....@@ -2339,12 +3166,12 @@
23393166 return NULL;
23403167 }
23413168
2342
-static bool intel_pt_find_switch(struct perf_evlist *evlist)
3169
+static bool intel_pt_find_switch(struct evlist *evlist)
23433170 {
2344
- struct perf_evsel *evsel;
3171
+ struct evsel *evsel;
23453172
23463173 evlist__for_each_entry(evlist, evsel) {
2347
- if (evsel->attr.context_switch)
3174
+ if (evsel->core.attr.context_switch)
23483175 return true;
23493176 }
23503177
....@@ -2361,6 +3188,85 @@
23613188 return 0;
23623189 }
23633190
3191
+/* Find least TSC which converts to ns or later */
3192
+static u64 intel_pt_tsc_start(u64 ns, struct intel_pt *pt)
3193
+{
3194
+ u64 tsc, tm;
3195
+
3196
+ tsc = perf_time_to_tsc(ns, &pt->tc);
3197
+
3198
+ while (1) {
3199
+ tm = tsc_to_perf_time(tsc, &pt->tc);
3200
+ if (tm < ns)
3201
+ break;
3202
+ tsc -= 1;
3203
+ }
3204
+
3205
+ while (tm < ns)
3206
+ tm = tsc_to_perf_time(++tsc, &pt->tc);
3207
+
3208
+ return tsc;
3209
+}
3210
+
3211
+/* Find greatest TSC which converts to ns or earlier */
3212
+static u64 intel_pt_tsc_end(u64 ns, struct intel_pt *pt)
3213
+{
3214
+ u64 tsc, tm;
3215
+
3216
+ tsc = perf_time_to_tsc(ns, &pt->tc);
3217
+
3218
+ while (1) {
3219
+ tm = tsc_to_perf_time(tsc, &pt->tc);
3220
+ if (tm > ns)
3221
+ break;
3222
+ tsc += 1;
3223
+ }
3224
+
3225
+ while (tm > ns)
3226
+ tm = tsc_to_perf_time(--tsc, &pt->tc);
3227
+
3228
+ return tsc;
3229
+}
3230
+
3231
+static int intel_pt_setup_time_ranges(struct intel_pt *pt,
3232
+ struct itrace_synth_opts *opts)
3233
+{
3234
+ struct perf_time_interval *p = opts->ptime_range;
3235
+ int n = opts->range_num;
3236
+ int i;
3237
+
3238
+ if (!n || !p || pt->timeless_decoding)
3239
+ return 0;
3240
+
3241
+ pt->time_ranges = calloc(n, sizeof(struct range));
3242
+ if (!pt->time_ranges)
3243
+ return -ENOMEM;
3244
+
3245
+ pt->range_cnt = n;
3246
+
3247
+ intel_pt_log("%s: %u range(s)\n", __func__, n);
3248
+
3249
+ for (i = 0; i < n; i++) {
3250
+ struct range *r = &pt->time_ranges[i];
3251
+ u64 ts = p[i].start;
3252
+ u64 te = p[i].end;
3253
+
3254
+ /*
3255
+ * Take care to ensure the TSC range matches the perf-time range
3256
+ * when converted back to perf-time.
3257
+ */
3258
+ r->start = ts ? intel_pt_tsc_start(ts, pt) : 0;
3259
+ r->end = te ? intel_pt_tsc_end(te, pt) : 0;
3260
+
3261
+ intel_pt_log("range %d: perf time interval: %"PRIu64" to %"PRIu64"\n",
3262
+ i, ts, te);
3263
+ intel_pt_log("range %d: TSC time interval: %#"PRIx64" to %#"PRIx64"\n",
3264
+ i, r->start, r->end);
3265
+ }
3266
+
3267
+ return 0;
3268
+}
3269
+
23643270 static const char * const intel_pt_info_fmts[] = {
23653271 [INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n",
23663272 [INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n",
....@@ -2373,6 +3279,7 @@
23733279 [INTEL_PT_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n",
23743280 [INTEL_PT_PER_CPU_MMAPS] = " Per-cpu maps %"PRId64"\n",
23753281 [INTEL_PT_MTC_BIT] = " MTC bit %#"PRIx64"\n",
3282
+ [INTEL_PT_MTC_FREQ_BITS] = " MTC freq bits %#"PRIx64"\n",
23763283 [INTEL_PT_TSC_CTC_N] = " TSC:CTC numerator %"PRIu64"\n",
23773284 [INTEL_PT_TSC_CTC_D] = " TSC:CTC denominator %"PRIu64"\n",
23783285 [INTEL_PT_CYC_BIT] = " CYC bit %#"PRIx64"\n",
....@@ -2380,15 +3287,19 @@
23803287 [INTEL_PT_FILTER_STR_LEN] = " Filter string len. %"PRIu64"\n",
23813288 };
23823289
2383
-static void intel_pt_print_info(u64 *arr, int start, int finish)
3290
+static void intel_pt_print_info(__u64 *arr, int start, int finish)
23843291 {
23853292 int i;
23863293
23873294 if (!dump_trace)
23883295 return;
23893296
2390
- for (i = start; i <= finish; i++)
2391
- fprintf(stdout, intel_pt_info_fmts[i], arr[i]);
3297
+ for (i = start; i <= finish; i++) {
3298
+ const char *fmt = intel_pt_info_fmts[i];
3299
+
3300
+ if (fmt)
3301
+ fprintf(stdout, fmt, arr[i]);
3302
+ }
23923303 }
23933304
23943305 static void intel_pt_print_info_str(const char *name, const char *str)
....@@ -2399,23 +3310,23 @@
23993310 fprintf(stdout, " %-20s%s\n", name, str ? str : "");
24003311 }
24013312
2402
-static bool intel_pt_has(struct auxtrace_info_event *auxtrace_info, int pos)
3313
+static bool intel_pt_has(struct perf_record_auxtrace_info *auxtrace_info, int pos)
24033314 {
24043315 return auxtrace_info->header.size >=
2405
- sizeof(struct auxtrace_info_event) + (sizeof(u64) * (pos + 1));
3316
+ sizeof(struct perf_record_auxtrace_info) + (sizeof(u64) * (pos + 1));
24063317 }
24073318
24083319 int intel_pt_process_auxtrace_info(union perf_event *event,
24093320 struct perf_session *session)
24103321 {
2411
- struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info;
3322
+ struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
24123323 size_t min_sz = sizeof(u64) * INTEL_PT_PER_CPU_MMAPS;
24133324 struct intel_pt *pt;
24143325 void *info_end;
2415
- u64 *info;
3326
+ __u64 *info;
24163327 int err;
24173328
2418
- if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) +
3329
+ if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) +
24193330 min_sz)
24203331 return -EINVAL;
24213332
....@@ -2513,7 +3424,7 @@
25133424 if (pt->timeless_decoding && !pt->tc.time_mult)
25143425 pt->tc.time_mult = 1;
25153426 pt->have_tsc = intel_pt_have_tsc(pt);
2516
- pt->sampling_mode = false;
3427
+ pt->sampling_mode = intel_pt_sampling_mode(pt);
25173428 pt->est_tsc = !pt->timeless_decoding;
25183429
25193430 pt->unknown_thread = thread__new(999999999, 999999999);
....@@ -2533,16 +3444,19 @@
25333444 err = thread__set_comm(pt->unknown_thread, "unknown", 0);
25343445 if (err)
25353446 goto err_delete_thread;
2536
- if (thread__init_map_groups(pt->unknown_thread, pt->machine)) {
3447
+ if (thread__init_maps(pt->unknown_thread, pt->machine)) {
25373448 err = -ENOMEM;
25383449 goto err_delete_thread;
25393450 }
25403451
25413452 pt->auxtrace.process_event = intel_pt_process_event;
25423453 pt->auxtrace.process_auxtrace_event = intel_pt_process_auxtrace_event;
3454
+ pt->auxtrace.queue_data = intel_pt_queue_data;
3455
+ pt->auxtrace.dump_auxtrace_sample = intel_pt_dump_sample;
25433456 pt->auxtrace.flush_events = intel_pt_flush;
25443457 pt->auxtrace.free_events = intel_pt_free_events;
25453458 pt->auxtrace.free = intel_pt_free;
3459
+ pt->auxtrace.evsel_is_auxtrace = intel_pt_evsel_is_auxtrace;
25463460 session->auxtrace = &pt->auxtrace;
25473461
25483462 if (dump_trace)
....@@ -2562,16 +3476,18 @@
25623476 goto err_delete_thread;
25633477 }
25643478
2565
- if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
3479
+ if (session->itrace_synth_opts->set) {
25663480 pt->synth_opts = *session->itrace_synth_opts;
25673481 } else {
2568
- itrace_synth_opts__set_default(&pt->synth_opts);
2569
- if (use_browser != -1) {
3482
+ itrace_synth_opts__set_default(&pt->synth_opts,
3483
+ session->itrace_synth_opts->default_no_sample);
3484
+ if (!session->itrace_synth_opts->default_no_sample &&
3485
+ !session->itrace_synth_opts->inject) {
25703486 pt->synth_opts.branches = false;
25713487 pt->synth_opts.callchain = true;
3488
+ pt->synth_opts.add_callchain = true;
25723489 }
2573
- if (session->itrace_synth_opts)
2574
- pt->synth_opts.thread_stack =
3490
+ pt->synth_opts.thread_stack =
25753491 session->itrace_synth_opts->thread_stack;
25763492 }
25773493
....@@ -2591,6 +3507,10 @@
25913507 pt->cbr2khz = tsc_freq / pt->max_non_turbo_ratio / 1000;
25923508 }
25933509
3510
+ err = intel_pt_setup_time_ranges(pt, session->itrace_synth_opts);
3511
+ if (err)
3512
+ goto err_delete_thread;
3513
+
25943514 if (pt->synth_opts.calls)
25953515 pt->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |
25963516 PERF_IP_FLAG_TRACE_END;
....@@ -2598,19 +3518,64 @@
25983518 pt->branches_filter |= PERF_IP_FLAG_RETURN |
25993519 PERF_IP_FLAG_TRACE_BEGIN;
26003520
2601
- if (pt->synth_opts.callchain && !symbol_conf.use_callchain) {
3521
+ if ((pt->synth_opts.callchain || pt->synth_opts.add_callchain) &&
3522
+ !symbol_conf.use_callchain) {
26023523 symbol_conf.use_callchain = true;
26033524 if (callchain_register_param(&callchain_param) < 0) {
26043525 symbol_conf.use_callchain = false;
26053526 pt->synth_opts.callchain = false;
3527
+ pt->synth_opts.add_callchain = false;
26063528 }
26073529 }
3530
+
3531
+ if (pt->synth_opts.add_callchain) {
3532
+ err = intel_pt_callchain_init(pt);
3533
+ if (err)
3534
+ goto err_delete_thread;
3535
+ }
3536
+
3537
+ if (pt->synth_opts.last_branch || pt->synth_opts.add_last_branch) {
3538
+ pt->br_stack_sz = pt->synth_opts.last_branch_sz;
3539
+ pt->br_stack_sz_plus = pt->br_stack_sz;
3540
+ }
3541
+
3542
+ if (pt->synth_opts.add_last_branch) {
3543
+ err = intel_pt_br_stack_init(pt);
3544
+ if (err)
3545
+ goto err_delete_thread;
3546
+ /*
3547
+ * Additional branch stack size to cater for tracing from the
3548
+ * actual sample ip to where the sample time is recorded.
3549
+ * Measured at about 200 branches, but generously set to 1024.
3550
+ * If kernel space is not being traced, then add just 1 for the
3551
+ * branch to kernel space.
3552
+ */
3553
+ if (intel_pt_tracing_kernel(pt))
3554
+ pt->br_stack_sz_plus += 1024;
3555
+ else
3556
+ pt->br_stack_sz_plus += 1;
3557
+ }
3558
+
3559
+ pt->use_thread_stack = pt->synth_opts.callchain ||
3560
+ pt->synth_opts.add_callchain ||
3561
+ pt->synth_opts.thread_stack ||
3562
+ pt->synth_opts.last_branch ||
3563
+ pt->synth_opts.add_last_branch;
3564
+
3565
+ pt->callstack = pt->synth_opts.callchain ||
3566
+ pt->synth_opts.add_callchain ||
3567
+ pt->synth_opts.thread_stack;
26083568
26093569 err = intel_pt_synth_events(pt, session);
26103570 if (err)
26113571 goto err_delete_thread;
26123572
2613
- err = auxtrace_queues__process_index(&pt->queues, session);
3573
+ intel_pt_setup_pebs_events(pt);
3574
+
3575
+ if (pt->sampling_mode || list_empty(&session->auxtrace_index))
3576
+ err = auxtrace_queue_data(session, true, true);
3577
+ else
3578
+ err = auxtrace_queues__process_index(&pt->queues, session);
26143579 if (err)
26153580 goto err_delete_thread;
26163581
....@@ -2623,6 +3588,7 @@
26233588 return 0;
26243589
26253590 err_delete_thread:
3591
+ zfree(&pt->chain);
26263592 thread__zput(pt->unknown_thread);
26273593 err_free_queues:
26283594 intel_pt_log_disable();
....@@ -2631,6 +3597,7 @@
26313597 err_free:
26323598 addr_filters__exit(&pt->filts);
26333599 zfree(&pt->filter);
3600
+ zfree(&pt->time_ranges);
26343601 free(pt);
26353602 return err;
26363603 }