From b22da3d8526a935aa31e086e63f60ff3246cb61c Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Sat, 09 Dec 2023 07:24:11 +0000 Subject: [PATCH] add stmac read mac form eeprom --- kernel/tools/perf/util/intel-pt.c | 1483 +++++++++++++++++++++++++++++++++++++++++++++++++---------- 1 files changed, 1,225 insertions(+), 258 deletions(-) diff --git a/kernel/tools/perf/util/intel-pt.c b/kernel/tools/perf/util/intel-pt.c index 256b475..453773c 100644 --- a/kernel/tools/perf/util/intel-pt.c +++ b/kernel/tools/perf/util/intel-pt.c @@ -1,16 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * intel_pt.c: Intel Processor Trace support * Copyright (c) 2013-2015, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * */ #include <inttypes.h> @@ -18,9 +9,10 @@ #include <stdbool.h> #include <errno.h> #include <linux/kernel.h> +#include <linux/string.h> #include <linux/types.h> +#include <linux/zalloc.h> -#include "../perf.h" #include "session.h" #include "machine.h" #include "memswap.h" @@ -31,7 +23,6 @@ #include "evsel.h" #include "map.h" #include "color.h" -#include "util.h" #include "thread.h" #include "thread-stack.h" #include "symbol.h" @@ -42,6 +33,11 @@ #include "tsc.h" #include "intel-pt.h" #include "config.h" +#include "util/perf_api_probe.h" +#include "util/synthetic-events.h" +#include "time-utils.h" + +#include "../arch/x86/include/uapi/asm/perf_regs.h" #include "intel-pt-decoder/intel-pt-log.h" #include "intel-pt-decoder/intel-pt-decoder.h" @@ -50,6 +46,11 @@ #define MAX_TIMESTAMP (~0ULL) +struct range { + u64 start; + u64 end; +}; + struct intel_pt { struct auxtrace auxtrace; struct auxtrace_queues queues; @@ -57,7 +58,7 @@ u32 auxtrace_type; struct perf_session *session; struct machine *machine; - struct perf_evsel *switch_evsel; + struct evsel *switch_evsel; struct thread *unknown_thread; bool timeless_decoding; bool sampling_mode; @@ -68,6 +69,10 @@ bool est_tsc; bool sync_switch; bool mispred_all; + bool use_thread_stack; + bool callstack; + unsigned int br_stack_sz; + unsigned int br_stack_sz_plus; int have_sched_switch; u32 pmu_type; u64 kernel_start; @@ -104,6 +109,9 @@ u64 pwrx_id; u64 cbr_id; + bool sample_pebs; + struct evsel *pebs_evsel; + u64 tsc_bit; u64 mtc_bit; u64 mtc_freq_bits; @@ -118,6 +126,12 @@ char *filter; struct addr_filters filts; + + struct range *time_ranges; + unsigned int range_cnt; + + struct ip_callchain *chain; + struct branch_stack *br_stack; }; enum switch_state { @@ -137,8 +151,6 @@ const struct intel_pt_state *state; struct ip_callchain *chain; struct branch_stack *last_branch; - struct branch_stack *last_branch_rb; - size_t last_branch_pos; union perf_event *event_buf; bool on_heap; bool stop; @@ -154,9 +166,19 @@ bool have_sample; u64 time; u64 timestamp; + u64 sel_timestamp; + bool sel_start; + unsigned int sel_idx; u32 flags; u16 insn_len; u64 last_insn_cnt; + u64 ipc_insn_cnt; + u64 ipc_cyc_cnt; + u64 last_in_insn_cnt; + u64 last_in_cyc_cnt; + u64 last_br_insn_cnt; + u64 last_br_cyc_cnt; + unsigned int cbr_seen; char insn[INTEL_PT_INSN_BUF_SZ]; }; @@ -168,13 +190,14 @@ int ret, pkt_len, i; char desc[INTEL_PT_PKT_DESC_MAX]; const char *color = PERF_COLOR_BLUE; + enum intel_pt_pkt_ctx ctx = INTEL_PT_NO_CTX; color_fprintf(stdout, color, ". ... Intel Processor Trace data: size %zu bytes\n", len); while (len) { - ret = intel_pt_get_packet(buf, len, &packet); + ret = intel_pt_get_packet(buf, len, &packet, &ctx); if (ret > 0) pkt_len = ret; else @@ -206,6 +229,44 @@ intel_pt_dump(pt, buf, len); } +static void intel_pt_log_event(union perf_event *event) +{ + FILE *f = intel_pt_log_fp(); + + if (!intel_pt_enable_logging || !f) + return; + + perf_event__fprintf(event, NULL, f); +} + +static void intel_pt_dump_sample(struct perf_session *session, + struct perf_sample *sample) +{ + struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, + auxtrace); + + printf("\n"); + intel_pt_dump(pt, sample->aux_sample.data, sample->aux_sample.size); +} + +static bool intel_pt_log_events(struct intel_pt *pt, u64 tm) +{ + struct perf_time_interval *range = pt->synth_opts.ptime_range; + int n = pt->synth_opts.range_num; + + if (pt->synth_opts.log_plus_flags & AUXTRACE_LOG_FLG_ALL_PERF_EVTS) + return true; + + if (pt->synth_opts.log_minus_flags & AUXTRACE_LOG_FLG_ALL_PERF_EVTS) + return false; + + /* perf_time__ranges_skip_sample does not work if time is zero */ + if (!tm) + tm = 1; + + return !n || !perf_time__ranges_skip_sample(range, n, tm); +} + static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a, struct auxtrace_buffer *b) { @@ -223,31 +284,12 @@ return 0; } -/* This function assumes data is processed sequentially only */ -static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data) +static int intel_pt_get_buffer(struct intel_pt_queue *ptq, + struct auxtrace_buffer *buffer, + struct auxtrace_buffer *old_buffer, + struct intel_pt_buffer *b) { - struct intel_pt_queue *ptq = data; - struct auxtrace_buffer *buffer = ptq->buffer; - struct auxtrace_buffer *old_buffer = ptq->old_buffer; - struct auxtrace_queue *queue; bool might_overlap; - - if (ptq->stop) { - b->len = 0; - return 0; - } - - queue = &ptq->pt->queues.queue_array[ptq->queue_nr]; - - buffer = auxtrace_buffer__next(queue, buffer); - if (!buffer) { - if (old_buffer) - auxtrace_buffer__drop_data(old_buffer); - b->len = 0; - return 0; - } - - ptq->buffer = buffer; if (!buffer->data) { int fd = perf_data__fd(ptq->pt->session->data); @@ -277,6 +319,95 @@ } else { b->consecutive = true; } + + return 0; +} + +/* Do not drop buffers with references - refer intel_pt_get_trace() */ +static void intel_pt_lookahead_drop_buffer(struct intel_pt_queue *ptq, + struct auxtrace_buffer *buffer) +{ + if (!buffer || buffer == ptq->buffer || buffer == ptq->old_buffer) + return; + + auxtrace_buffer__drop_data(buffer); +} + +/* Must be serialized with respect to intel_pt_get_trace() */ +static int intel_pt_lookahead(void *data, intel_pt_lookahead_cb_t cb, + void *cb_data) +{ + struct intel_pt_queue *ptq = data; + struct auxtrace_buffer *buffer = ptq->buffer; + struct auxtrace_buffer *old_buffer = ptq->old_buffer; + struct auxtrace_queue *queue; + int err = 0; + + queue = &ptq->pt->queues.queue_array[ptq->queue_nr]; + + while (1) { + struct intel_pt_buffer b = { .len = 0 }; + + buffer = auxtrace_buffer__next(queue, buffer); + if (!buffer) + break; + + err = intel_pt_get_buffer(ptq, buffer, old_buffer, &b); + if (err) + break; + + if (b.len) { + intel_pt_lookahead_drop_buffer(ptq, old_buffer); + old_buffer = buffer; + } else { + intel_pt_lookahead_drop_buffer(ptq, buffer); + continue; + } + + err = cb(&b, cb_data); + if (err) + break; + } + + if (buffer != old_buffer) + intel_pt_lookahead_drop_buffer(ptq, buffer); + intel_pt_lookahead_drop_buffer(ptq, old_buffer); + + return err; +} + +/* + * This function assumes data is processed sequentially only. + * Must be serialized with respect to intel_pt_lookahead() + */ +static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data) +{ + struct intel_pt_queue *ptq = data; + struct auxtrace_buffer *buffer = ptq->buffer; + struct auxtrace_buffer *old_buffer = ptq->old_buffer; + struct auxtrace_queue *queue; + int err; + + if (ptq->stop) { + b->len = 0; + return 0; + } + + queue = &ptq->pt->queues.queue_array[ptq->queue_nr]; + + buffer = auxtrace_buffer__next(queue, buffer); + if (!buffer) { + if (old_buffer) + auxtrace_buffer__drop_data(old_buffer); + b->len = 0; + return 0; + } + + ptq->buffer = buffer; + + err = intel_pt_get_buffer(ptq, buffer, old_buffer, b); + if (err) + return err; if (ptq->step_through_buffers) ptq->stop = true; @@ -405,6 +536,17 @@ return NULL; return auxtrace_cache__lookup(dso->auxtrace_cache, offset); +} + +static void intel_pt_cache_invalidate(struct dso *dso, struct machine *machine, + u64 offset) +{ + struct auxtrace_cache *c = intel_pt_cache(dso, machine); + + if (!c) + return; + + auxtrace_cache__remove(dso->auxtrace_cache, offset); } static inline u8 intel_pt_cpumode(struct intel_pt *pt, uint64_t ip) @@ -629,11 +771,11 @@ static bool intel_pt_exclude_kernel(struct intel_pt *pt) { - struct perf_evsel *evsel; + struct evsel *evsel; evlist__for_each_entry(pt->session->evlist, evsel) { - if (intel_pt_get_config(pt, &evsel->attr, NULL) && - !evsel->attr.exclude_kernel) + if (intel_pt_get_config(pt, &evsel->core.attr, NULL) && + !evsel->core.attr.exclude_kernel) return false; } return true; @@ -641,14 +783,14 @@ static bool intel_pt_return_compression(struct intel_pt *pt) { - struct perf_evsel *evsel; + struct evsel *evsel; u64 config; if (!pt->noretcomp_bit) return true; evlist__for_each_entry(pt->session->evlist, evsel) { - if (intel_pt_get_config(pt, &evsel->attr, &config) && + if (intel_pt_get_config(pt, &evsel->core.attr, &config) && (config & pt->noretcomp_bit)) return false; } @@ -657,11 +799,11 @@ static bool intel_pt_branch_enable(struct intel_pt *pt) { - struct perf_evsel *evsel; + struct evsel *evsel; u64 config; evlist__for_each_entry(pt->session->evlist, evsel) { - if (intel_pt_get_config(pt, &evsel->attr, &config) && + if (intel_pt_get_config(pt, &evsel->core.attr, &config) && (config & 1) && !(config & 0x2000)) return false; } @@ -670,7 +812,7 @@ static unsigned int intel_pt_mtc_period(struct intel_pt *pt) { - struct perf_evsel *evsel; + struct evsel *evsel; unsigned int shift; u64 config; @@ -681,7 +823,7 @@ config >>= 1; evlist__for_each_entry(pt->session->evlist, evsel) { - if (intel_pt_get_config(pt, &evsel->attr, &config)) + if (intel_pt_get_config(pt, &evsel->core.attr, &config)) return (config & pt->mtc_freq_bits) >> shift; } return 0; @@ -689,7 +831,7 @@ static bool intel_pt_timeless_decoding(struct intel_pt *pt) { - struct perf_evsel *evsel; + struct evsel *evsel; bool timeless_decoding = true; u64 config; @@ -697,9 +839,9 @@ return true; evlist__for_each_entry(pt->session->evlist, evsel) { - if (!(evsel->attr.sample_type & PERF_SAMPLE_TIME)) + if (!(evsel->core.attr.sample_type & PERF_SAMPLE_TIME)) return true; - if (intel_pt_get_config(pt, &evsel->attr, &config)) { + if (intel_pt_get_config(pt, &evsel->core.attr, &config)) { if (config & pt->tsc_bit) timeless_decoding = false; else @@ -711,11 +853,11 @@ static bool intel_pt_tracing_kernel(struct intel_pt *pt) { - struct perf_evsel *evsel; + struct evsel *evsel; evlist__for_each_entry(pt->session->evlist, evsel) { - if (intel_pt_get_config(pt, &evsel->attr, NULL) && - !evsel->attr.exclude_kernel) + if (intel_pt_get_config(pt, &evsel->core.attr, NULL) && + !evsel->core.attr.exclude_kernel) return true; } return false; @@ -723,7 +865,7 @@ static bool intel_pt_have_tsc(struct intel_pt *pt) { - struct perf_evsel *evsel; + struct evsel *evsel; bool have_tsc = false; u64 config; @@ -731,7 +873,7 @@ return false; evlist__for_each_entry(pt->session->evlist, evsel) { - if (intel_pt_get_config(pt, &evsel->attr, &config)) { + if (intel_pt_get_config(pt, &evsel->core.attr, &config)) { if (config & pt->tsc_bit) have_tsc = true; else @@ -739,6 +881,30 @@ } } return have_tsc; +} + +static bool intel_pt_sampling_mode(struct intel_pt *pt) +{ + struct evsel *evsel; + + evlist__for_each_entry(pt->session->evlist, evsel) { + if ((evsel->core.attr.sample_type & PERF_SAMPLE_AUX) && + evsel->core.attr.aux_sample_size) + return true; + } + return false; +} + +static u64 intel_pt_ctl(struct intel_pt *pt) +{ + struct evsel *evsel; + u64 config; + + evlist__for_each_entry(pt->session->evlist, evsel) { + if (intel_pt_get_config(pt, &evsel->core.attr, &config)) + return config; + } + return 0; } static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns) @@ -750,6 +916,86 @@ return (quot << pt->tc.time_shift) + (rem << pt->tc.time_shift) / pt->tc.time_mult; } + +static struct ip_callchain *intel_pt_alloc_chain(struct intel_pt *pt) +{ + size_t sz = sizeof(struct ip_callchain); + + /* Add 1 to callchain_sz for callchain context */ + sz += (pt->synth_opts.callchain_sz + 1) * sizeof(u64); + return zalloc(sz); +} + +static int intel_pt_callchain_init(struct intel_pt *pt) +{ + struct evsel *evsel; + + evlist__for_each_entry(pt->session->evlist, evsel) { + if (!(evsel->core.attr.sample_type & PERF_SAMPLE_CALLCHAIN)) + evsel->synth_sample_type |= PERF_SAMPLE_CALLCHAIN; + } + + pt->chain = intel_pt_alloc_chain(pt); + if (!pt->chain) + return -ENOMEM; + + return 0; +} + +static void intel_pt_add_callchain(struct intel_pt *pt, + struct perf_sample *sample) +{ + struct thread *thread = machine__findnew_thread(pt->machine, + sample->pid, + sample->tid); + + thread_stack__sample_late(thread, sample->cpu, pt->chain, + pt->synth_opts.callchain_sz + 1, sample->ip, + pt->kernel_start); + + sample->callchain = pt->chain; +} + +static struct branch_stack *intel_pt_alloc_br_stack(unsigned int entry_cnt) +{ + size_t sz = sizeof(struct branch_stack); + + sz += entry_cnt * sizeof(struct branch_entry); + return zalloc(sz); +} + +static int intel_pt_br_stack_init(struct intel_pt *pt) +{ + struct evsel *evsel; + + evlist__for_each_entry(pt->session->evlist, evsel) { + if (!(evsel->core.attr.sample_type & PERF_SAMPLE_BRANCH_STACK)) + evsel->synth_sample_type |= PERF_SAMPLE_BRANCH_STACK; + } + + pt->br_stack = intel_pt_alloc_br_stack(pt->br_stack_sz); + if (!pt->br_stack) + return -ENOMEM; + + return 0; +} + +static void intel_pt_add_br_stack(struct intel_pt *pt, + struct perf_sample *sample) +{ + struct thread *thread = machine__findnew_thread(pt->machine, + sample->pid, + sample->tid); + + thread_stack__br_sample_late(thread, sample->cpu, pt->br_stack, + pt->br_stack_sz, sample->ip, + pt->kernel_start); + + sample->branch_stack = pt->br_stack; +} + +/* INTEL_PT_LBR_0, INTEL_PT_LBR_1 and INTEL_PT_LBR_2 */ +#define LBRS_MAX (INTEL_PT_BLK_ITEM_ID_CNT * 3U) static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, unsigned int queue_nr) @@ -763,25 +1009,16 @@ return NULL; if (pt->synth_opts.callchain) { - size_t sz = sizeof(struct ip_callchain); - - /* Add 1 to callchain_sz for callchain context */ - sz += (pt->synth_opts.callchain_sz + 1) * sizeof(u64); - ptq->chain = zalloc(sz); + ptq->chain = intel_pt_alloc_chain(pt); if (!ptq->chain) goto out_free; } - if (pt->synth_opts.last_branch) { - size_t sz = sizeof(struct branch_stack); + if (pt->synth_opts.last_branch || pt->synth_opts.other_events) { + unsigned int entry_cnt = max(LBRS_MAX, pt->br_stack_sz); - sz += pt->synth_opts.last_branch_sz * - sizeof(struct branch_entry); - ptq->last_branch = zalloc(sz); + ptq->last_branch = intel_pt_alloc_br_stack(entry_cnt); if (!ptq->last_branch) - goto out_free; - ptq->last_branch_rb = zalloc(sz); - if (!ptq->last_branch_rb) goto out_free; } @@ -799,13 +1036,16 @@ params.get_trace = intel_pt_get_trace; params.walk_insn = intel_pt_walk_next_insn; + params.lookahead = intel_pt_lookahead; params.data = ptq; params.return_compression = intel_pt_return_compression(pt); params.branch_enable = intel_pt_branch_enable(pt); + params.ctl = intel_pt_ctl(pt); params.max_non_turbo_ratio = pt->max_non_turbo_ratio; params.mtc_period = intel_pt_mtc_period(pt); params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n; params.tsc_ctc_ratio_d = pt->tsc_ctc_ratio_d; + params.quick = pt->synth_opts.quick; if (pt->filts.cnt > 0) params.pgd_ip = intel_pt_pgd_ip; @@ -850,7 +1090,6 @@ out_free: zfree(&ptq->event_buf); zfree(&ptq->last_branch); - zfree(&ptq->last_branch_rb); zfree(&ptq->chain); free(ptq); return NULL; @@ -866,7 +1105,6 @@ intel_pt_decoder_free(ptq->decoder); zfree(&ptq->event_buf); zfree(&ptq->last_branch); - zfree(&ptq->last_branch_rb); zfree(&ptq->chain); free(ptq); } @@ -918,6 +1156,28 @@ ptq->insn_len = ptq->state->insn_len; memcpy(ptq->insn, ptq->state->insn, INTEL_PT_INSN_BUF_SZ); } + + if (ptq->state->type & INTEL_PT_TRACE_BEGIN) + ptq->flags |= PERF_IP_FLAG_TRACE_BEGIN; + if (ptq->state->type & INTEL_PT_TRACE_END) + ptq->flags |= PERF_IP_FLAG_TRACE_END; +} + +static void intel_pt_setup_time_range(struct intel_pt *pt, + struct intel_pt_queue *ptq) +{ + if (!pt->range_cnt) + return; + + ptq->sel_timestamp = pt->time_ranges[0].start; + ptq->sel_idx = 0; + + if (ptq->sel_timestamp) { + ptq->sel_start = true; + } else { + ptq->sel_timestamp = pt->time_ranges[0].end; + ptq->sel_start = false; + } } static int intel_pt_setup_queue(struct intel_pt *pt, @@ -939,11 +1199,15 @@ ptq->cpu = queue->cpu; ptq->tid = queue->tid; + ptq->cbr_seen = UINT_MAX; + if (pt->sampling_mode && !pt->snapshot_mode && pt->timeless_decoding) ptq->step_through_buffers = true; ptq->sync_switch = pt->sync_switch; + + intel_pt_setup_time_range(pt, ptq); } if (!ptq->on_heap && @@ -958,6 +1222,14 @@ intel_pt_log("queue %u getting timestamp\n", queue_nr); intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n", queue_nr, ptq->cpu, ptq->pid, ptq->tid); + + if (ptq->sel_start && ptq->sel_timestamp) { + ret = intel_pt_fast_forward(ptq->decoder, + ptq->sel_timestamp); + if (ret) + return ret; + } + while (1) { state = intel_pt_decode(ptq->decoder); if (state->err) { @@ -977,6 +1249,9 @@ queue_nr, ptq->timestamp); ptq->state = state; ptq->have_sample = true; + if (ptq->sel_start && ptq->sel_timestamp && + ptq->timestamp < ptq->sel_timestamp) + ptq->have_sample = false; intel_pt_sample_flags(ptq); ret = auxtrace_heap__add(&pt->heap, queue_nr, ptq->timestamp); if (ret) @@ -1000,62 +1275,35 @@ return 0; } -static inline void intel_pt_copy_last_branch_rb(struct intel_pt_queue *ptq) -{ - struct branch_stack *bs_src = ptq->last_branch_rb; - struct branch_stack *bs_dst = ptq->last_branch; - size_t nr = 0; - - bs_dst->nr = bs_src->nr; - - if (!bs_src->nr) - return; - - nr = ptq->pt->synth_opts.last_branch_sz - ptq->last_branch_pos; - memcpy(&bs_dst->entries[0], - &bs_src->entries[ptq->last_branch_pos], - sizeof(struct branch_entry) * nr); - - if (bs_src->nr >= ptq->pt->synth_opts.last_branch_sz) { - memcpy(&bs_dst->entries[nr], - &bs_src->entries[0], - sizeof(struct branch_entry) * ptq->last_branch_pos); - } -} - -static inline void intel_pt_reset_last_branch_rb(struct intel_pt_queue *ptq) -{ - ptq->last_branch_pos = 0; - ptq->last_branch_rb->nr = 0; -} - -static void intel_pt_update_last_branch_rb(struct intel_pt_queue *ptq) -{ - const struct intel_pt_state *state = ptq->state; - struct branch_stack *bs = ptq->last_branch_rb; - struct branch_entry *be; - - if (!ptq->last_branch_pos) - ptq->last_branch_pos = ptq->pt->synth_opts.last_branch_sz; - - ptq->last_branch_pos -= 1; - - be = &bs->entries[ptq->last_branch_pos]; - be->from = state->from_ip; - be->to = state->to_ip; - be->flags.abort = !!(state->flags & INTEL_PT_ABORT_TX); - be->flags.in_tx = !!(state->flags & INTEL_PT_IN_TX); - /* No support for mispredict */ - be->flags.mispred = ptq->pt->mispred_all; - - if (bs->nr < ptq->pt->synth_opts.last_branch_sz) - bs->nr += 1; -} - static inline bool intel_pt_skip_event(struct intel_pt *pt) { return pt->synth_opts.initial_skip && pt->num_events++ < pt->synth_opts.initial_skip; +} + +/* + * Cannot count CBR as skipped because it won't go away until cbr == cbr_seen. + * Also ensure CBR is first non-skipped event by allowing for 4 more samples + * from this decoder state. + */ +static inline bool intel_pt_skip_cbr_event(struct intel_pt *pt) +{ + return pt->synth_opts.initial_skip && + pt->num_events + 4 < pt->synth_opts.initial_skip; +} + +static void intel_pt_prep_a_sample(struct intel_pt_queue *ptq, + union perf_event *event, + struct perf_sample *sample) +{ + event->sample.header.type = PERF_RECORD_SAMPLE; + event->sample.header.size = sizeof(struct perf_event_header); + + sample->pid = ptq->pid; + sample->tid = ptq->tid; + sample->cpu = ptq->cpu; + sample->insn_len = ptq->insn_len; + memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ); } static void intel_pt_prep_b_sample(struct intel_pt *pt, @@ -1063,23 +1311,18 @@ union perf_event *event, struct perf_sample *sample) { + intel_pt_prep_a_sample(ptq, event, sample); + if (!pt->timeless_decoding) sample->time = tsc_to_perf_time(ptq->timestamp, &pt->tc); sample->ip = ptq->state->from_ip; sample->cpumode = intel_pt_cpumode(pt, sample->ip); - sample->pid = ptq->pid; - sample->tid = ptq->tid; sample->addr = ptq->state->to_ip; sample->period = 1; - sample->cpu = ptq->cpu; sample->flags = ptq->flags; - sample->insn_len = ptq->insn_len; - memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ); - event->sample.header.type = PERF_RECORD_SAMPLE; event->sample.header.misc = sample->cpumode; - event->sample.header.size = sizeof(struct perf_event_header); } static int intel_pt_inject_event(union perf_event *event, @@ -1099,9 +1342,9 @@ return intel_pt_inject_event(event, sample, type); } -static int intel_pt_deliver_synth_b_event(struct intel_pt *pt, - union perf_event *event, - struct perf_sample *sample, u64 type) +static int intel_pt_deliver_synth_event(struct intel_pt *pt, + union perf_event *event, + struct perf_sample *sample, u64 type) { int ret; @@ -1123,6 +1366,7 @@ struct perf_sample sample = { .ip = 0, }; struct dummy_branch_stack { u64 nr; + u64 hw_idx; struct branch_entry entries; } dummy_bs; @@ -1144,6 +1388,7 @@ if (pt->synth_opts.last_branch && sort__mode == SORT_MODE__BRANCH) { dummy_bs = (struct dummy_branch_stack){ .nr = 1, + .hw_idx = -1ULL, .entries = { .from = sample.ip, .to = sample.addr, @@ -1152,8 +1397,16 @@ sample.branch_stack = (struct branch_stack *)&dummy_bs; } - return intel_pt_deliver_synth_b_event(pt, event, &sample, - pt->branches_sample_type); + if (ptq->state->flags & INTEL_PT_SAMPLE_IPC) + sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_br_cyc_cnt; + if (sample.cyc_cnt) { + sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_br_insn_cnt; + ptq->last_br_insn_cnt = ptq->ipc_insn_cnt; + ptq->last_br_cyc_cnt = ptq->ipc_cyc_cnt; + } + + return intel_pt_deliver_synth_event(pt, event, &sample, + pt->branches_sample_type); } static void intel_pt_prep_sample(struct intel_pt *pt, @@ -1164,32 +1417,17 @@ intel_pt_prep_b_sample(pt, ptq, event, sample); if (pt->synth_opts.callchain) { - thread_stack__sample(ptq->thread, ptq->chain, + thread_stack__sample(ptq->thread, ptq->cpu, ptq->chain, pt->synth_opts.callchain_sz + 1, sample->ip, pt->kernel_start); sample->callchain = ptq->chain; } if (pt->synth_opts.last_branch) { - intel_pt_copy_last_branch_rb(ptq); + thread_stack__br_sample(ptq->thread, ptq->cpu, ptq->last_branch, + pt->br_stack_sz); sample->branch_stack = ptq->last_branch; } -} - -static inline int intel_pt_deliver_synth_event(struct intel_pt *pt, - struct intel_pt_queue *ptq, - union perf_event *event, - struct perf_sample *sample, - u64 type) -{ - int ret; - - ret = intel_pt_deliver_synth_b_event(pt, event, sample, type); - - if (pt->synth_opts.last_branch) - intel_pt_reset_last_branch_rb(ptq); - - return ret; } static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) @@ -1205,11 +1443,22 @@ sample.id = ptq->pt->instructions_id; sample.stream_id = ptq->pt->instructions_id; - sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt; + if (pt->synth_opts.quick) + sample.period = 1; + else + sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt; + + if (ptq->state->flags & INTEL_PT_SAMPLE_IPC) + sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt; + if (sample.cyc_cnt) { + sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_in_insn_cnt; + ptq->last_in_insn_cnt = ptq->ipc_insn_cnt; + ptq->last_in_cyc_cnt = ptq->ipc_cyc_cnt; + } ptq->last_insn_cnt = ptq->state->tot_insn_cnt; - return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + return intel_pt_deliver_synth_event(pt, event, &sample, pt->instructions_sample_type); } @@ -1227,7 +1476,7 @@ sample.id = ptq->pt->transactions_id; sample.stream_id = ptq->pt->transactions_id; - return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + return intel_pt_deliver_synth_event(pt, event, &sample, pt->transactions_sample_type); } @@ -1268,7 +1517,7 @@ sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + return intel_pt_deliver_synth_event(pt, event, &sample, pt->ptwrites_sample_type); } @@ -1280,8 +1529,10 @@ struct perf_synth_intel_cbr raw; u32 flags; - if (intel_pt_skip_event(pt)) + if (intel_pt_skip_cbr_event(pt)) return 0; + + ptq->cbr_seen = ptq->state->cbr; intel_pt_prep_p_sample(pt, ptq, event, &sample); @@ -1296,7 +1547,7 @@ sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + return intel_pt_deliver_synth_event(pt, event, &sample, pt->pwr_events_sample_type); } @@ -1321,7 +1572,7 @@ sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + return intel_pt_deliver_synth_event(pt, event, &sample, pt->pwr_events_sample_type); } @@ -1346,7 +1597,7 @@ sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + return intel_pt_deliver_synth_event(pt, event, &sample, pt->pwr_events_sample_type); } @@ -1371,7 +1622,7 @@ sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + return intel_pt_deliver_synth_event(pt, event, &sample, pt->pwr_events_sample_type); } @@ -1396,21 +1647,277 @@ sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + return intel_pt_deliver_synth_event(pt, event, &sample, pt->pwr_events_sample_type); } +/* + * PEBS gp_regs array indexes plus 1 so that 0 means not present. Refer + * intel_pt_add_gp_regs(). + */ +static const int pebs_gp_regs[] = { + [PERF_REG_X86_FLAGS] = 1, + [PERF_REG_X86_IP] = 2, + [PERF_REG_X86_AX] = 3, + [PERF_REG_X86_CX] = 4, + [PERF_REG_X86_DX] = 5, + [PERF_REG_X86_BX] = 6, + [PERF_REG_X86_SP] = 7, + [PERF_REG_X86_BP] = 8, + [PERF_REG_X86_SI] = 9, + [PERF_REG_X86_DI] = 10, + [PERF_REG_X86_R8] = 11, + [PERF_REG_X86_R9] = 12, + [PERF_REG_X86_R10] = 13, + [PERF_REG_X86_R11] = 14, + [PERF_REG_X86_R12] = 15, + [PERF_REG_X86_R13] = 16, + [PERF_REG_X86_R14] = 17, + [PERF_REG_X86_R15] = 18, +}; + +static u64 *intel_pt_add_gp_regs(struct regs_dump *intr_regs, u64 *pos, + const struct intel_pt_blk_items *items, + u64 regs_mask) +{ + const u64 *gp_regs = items->val[INTEL_PT_GP_REGS_POS]; + u32 mask = items->mask[INTEL_PT_GP_REGS_POS]; + u32 bit; + int i; + + for (i = 0, bit = 1; i < PERF_REG_X86_64_MAX; i++, bit <<= 1) { + /* Get the PEBS gp_regs array index */ + int n = pebs_gp_regs[i] - 1; + + if (n < 0) + continue; + /* + * Add only registers that were requested (i.e. 'regs_mask') and + * that were provided (i.e. 'mask'), and update the resulting + * mask (i.e. 'intr_regs->mask') accordingly. + */ + if (mask & 1 << n && regs_mask & bit) { + intr_regs->mask |= bit; + *pos++ = gp_regs[n]; + } + } + + return pos; +} + +#ifndef PERF_REG_X86_XMM0 +#define PERF_REG_X86_XMM0 32 +#endif + +static void intel_pt_add_xmm(struct regs_dump *intr_regs, u64 *pos, + const struct intel_pt_blk_items *items, + u64 regs_mask) +{ + u32 mask = items->has_xmm & (regs_mask >> PERF_REG_X86_XMM0); + const u64 *xmm = items->xmm; + + /* + * If there are any XMM registers, then there should be all of them. + * Nevertheless, follow the logic to add only registers that were + * requested (i.e. 'regs_mask') and that were provided (i.e. 'mask'), + * and update the resulting mask (i.e. 'intr_regs->mask') accordingly. + */ + intr_regs->mask |= (u64)mask << PERF_REG_X86_XMM0; + + for (; mask; mask >>= 1, xmm++) { + if (mask & 1) + *pos++ = *xmm; + } +} + +#define LBR_INFO_MISPRED (1ULL << 63) +#define LBR_INFO_IN_TX (1ULL << 62) +#define LBR_INFO_ABORT (1ULL << 61) +#define LBR_INFO_CYCLES 0xffff + +/* Refer kernel's intel_pmu_store_pebs_lbrs() */ +static u64 intel_pt_lbr_flags(u64 info) +{ + union { + struct branch_flags flags; + u64 result; + } u; + + u.result = 0; + u.flags.mispred = !!(info & LBR_INFO_MISPRED); + u.flags.predicted = !(info & LBR_INFO_MISPRED); + u.flags.in_tx = !!(info & LBR_INFO_IN_TX); + u.flags.abort = !!(info & LBR_INFO_ABORT); + u.flags.cycles = info & LBR_INFO_CYCLES; + + return u.result; +} + +static void intel_pt_add_lbrs(struct branch_stack *br_stack, + const struct intel_pt_blk_items *items) +{ + u64 *to; + int i; + + br_stack->nr = 0; + + to = &br_stack->entries[0].from; + + for (i = INTEL_PT_LBR_0_POS; i <= INTEL_PT_LBR_2_POS; i++) { + u32 mask = items->mask[i]; + const u64 *from = items->val[i]; + + for (; mask; mask >>= 3, from += 3) { + if ((mask & 7) == 7) { + *to++ = from[0]; + *to++ = from[1]; + *to++ = intel_pt_lbr_flags(from[2]); + br_stack->nr += 1; + } + } + } +} + +static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) +{ + const struct intel_pt_blk_items *items = &ptq->state->items; + struct perf_sample sample = { .ip = 0, }; + union perf_event *event = ptq->event_buf; + struct intel_pt *pt = ptq->pt; + struct evsel *evsel = pt->pebs_evsel; + u64 sample_type = evsel->core.attr.sample_type; + u64 id = evsel->core.id[0]; + u8 cpumode; + u64 regs[8 * sizeof(sample.intr_regs.mask)]; + + if (intel_pt_skip_event(pt)) + return 0; + + intel_pt_prep_a_sample(ptq, event, &sample); + + sample.id = id; + sample.stream_id = id; + + if (!evsel->core.attr.freq) + sample.period = evsel->core.attr.sample_period; + + /* No support for non-zero CS base */ + if (items->has_ip) + sample.ip = items->ip; + else if (items->has_rip) + sample.ip = items->rip; + else + sample.ip = ptq->state->from_ip; + + /* No support for guest mode at this time */ + cpumode = sample.ip < ptq->pt->kernel_start ? + PERF_RECORD_MISC_USER : + PERF_RECORD_MISC_KERNEL; + + event->sample.header.misc = cpumode | PERF_RECORD_MISC_EXACT_IP; + + sample.cpumode = cpumode; + + if (sample_type & PERF_SAMPLE_TIME) { + u64 timestamp = 0; + + if (items->has_timestamp) + timestamp = items->timestamp; + else if (!pt->timeless_decoding) + timestamp = ptq->timestamp; + if (timestamp) + sample.time = tsc_to_perf_time(timestamp, &pt->tc); + } + + if (sample_type & PERF_SAMPLE_CALLCHAIN && + pt->synth_opts.callchain) { + thread_stack__sample(ptq->thread, ptq->cpu, ptq->chain, + pt->synth_opts.callchain_sz, sample.ip, + pt->kernel_start); + sample.callchain = ptq->chain; + } + + if (sample_type & PERF_SAMPLE_REGS_INTR && + (items->mask[INTEL_PT_GP_REGS_POS] || + items->mask[INTEL_PT_XMM_POS])) { + u64 regs_mask = evsel->core.attr.sample_regs_intr; + u64 *pos; + + sample.intr_regs.abi = items->is_32_bit ? + PERF_SAMPLE_REGS_ABI_32 : + PERF_SAMPLE_REGS_ABI_64; + sample.intr_regs.regs = regs; + + pos = intel_pt_add_gp_regs(&sample.intr_regs, regs, items, regs_mask); + + intel_pt_add_xmm(&sample.intr_regs, pos, items, regs_mask); + } + + if (sample_type & PERF_SAMPLE_BRANCH_STACK) { + if (items->mask[INTEL_PT_LBR_0_POS] || + items->mask[INTEL_PT_LBR_1_POS] || + items->mask[INTEL_PT_LBR_2_POS]) { + intel_pt_add_lbrs(ptq->last_branch, items); + } else if (pt->synth_opts.last_branch) { + thread_stack__br_sample(ptq->thread, ptq->cpu, + ptq->last_branch, + pt->br_stack_sz); + } else { + ptq->last_branch->nr = 0; + } + sample.branch_stack = ptq->last_branch; + } + + if (sample_type & PERF_SAMPLE_ADDR && items->has_mem_access_address) + sample.addr = items->mem_access_address; + + if (sample_type & PERF_SAMPLE_WEIGHT) { + /* + * Refer kernel's setup_pebs_adaptive_sample_data() and + * intel_hsw_weight(). + */ + if (items->has_mem_access_latency) + sample.weight = items->mem_access_latency; + if (!sample.weight && items->has_tsx_aux_info) { + /* Cycles last block */ + sample.weight = (u32)items->tsx_aux_info; + } + } + + if (sample_type & PERF_SAMPLE_TRANSACTION && items->has_tsx_aux_info) { + u64 ax = items->has_rax ? items->rax : 0; + /* Refer kernel's intel_hsw_transaction() */ + u64 txn = (u8)(items->tsx_aux_info >> 32); + + /* For RTM XABORTs also log the abort code from AX */ + if (txn & PERF_TXN_TRANSACTION && ax & 1) + txn |= ((ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT; + sample.transaction = txn; + } + + return intel_pt_deliver_synth_event(pt, event, &sample, sample_type); +} + static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, - pid_t pid, pid_t tid, u64 ip) + pid_t pid, pid_t tid, u64 ip, u64 timestamp) { union perf_event event; char msg[MAX_AUXTRACE_ERROR_MSG]; int err; + if (pt->synth_opts.error_minus_flags) { + if (code == INTEL_PT_ERR_OVR && + pt->synth_opts.error_minus_flags & AUXTRACE_ERR_FLG_OVERFLOW) + return 0; + if (code == INTEL_PT_ERR_LOST && + pt->synth_opts.error_minus_flags & AUXTRACE_ERR_FLG_DATA_LOST) + return 0; + } + intel_pt__strerror(code, msg, MAX_AUXTRACE_ERROR_MSG); auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, - code, cpu, pid, tid, ip, msg); + code, cpu, pid, tid, ip, msg, timestamp); err = perf_session__deliver_synth_event(pt->session, &event, NULL); if (err) @@ -1418,6 +1925,18 @@ err); return err; +} + +static int intel_ptq_synth_error(struct intel_pt_queue *ptq, + const struct intel_pt_state *state) +{ + struct intel_pt *pt = ptq->pt; + u64 tm = ptq->timestamp; + + tm = pt->timeless_decoding ? 0 : tsc_to_perf_time(tm, &pt->tc); + + return intel_pt_synth_error(pt, state->err, ptq->cpu, ptq->pid, + ptq->tid, state->from_ip, tm); } static int intel_pt_next_tid(struct intel_pt *pt, struct intel_pt_queue *ptq) @@ -1452,8 +1971,7 @@ } #define INTEL_PT_PWR_EVT (INTEL_PT_MWAIT_OP | INTEL_PT_PWR_ENTRY | \ - INTEL_PT_EX_STOP | INTEL_PT_PWR_EXIT | \ - INTEL_PT_CBR_CHG) + INTEL_PT_EX_STOP | INTEL_PT_PWR_EXIT) static int intel_pt_sample(struct intel_pt_queue *ptq) { @@ -1466,31 +1984,46 @@ ptq->have_sample = false; - if (pt->sample_pwr_events && (state->type & INTEL_PT_PWR_EVT)) { - if (state->type & INTEL_PT_CBR_CHG) { + ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt; + ptq->ipc_cyc_cnt = ptq->state->tot_cyc_cnt; + + /* + * Do PEBS first to allow for the possibility that the PEBS timestamp + * precedes the current timestamp. + */ + if (pt->sample_pebs && state->type & INTEL_PT_BLK_ITEMS) { + err = intel_pt_synth_pebs_sample(ptq); + if (err) + return err; + } + + if (pt->sample_pwr_events) { + if (ptq->state->cbr != ptq->cbr_seen) { err = intel_pt_synth_cbr_sample(ptq); if (err) return err; } - if (state->type & INTEL_PT_MWAIT_OP) { - err = intel_pt_synth_mwait_sample(ptq); - if (err) - return err; - } - if (state->type & INTEL_PT_PWR_ENTRY) { - err = intel_pt_synth_pwre_sample(ptq); - if (err) - return err; - } - if (state->type & INTEL_PT_EX_STOP) { - err = intel_pt_synth_exstop_sample(ptq); - if (err) - return err; - } - if (state->type & INTEL_PT_PWR_EXIT) { - err = intel_pt_synth_pwrx_sample(ptq); - if (err) - return err; + if (state->type & INTEL_PT_PWR_EVT) { + if (state->type & INTEL_PT_MWAIT_OP) { + err = intel_pt_synth_mwait_sample(ptq); + if (err) + return err; + } + if (state->type & INTEL_PT_PWR_ENTRY) { + err = intel_pt_synth_pwre_sample(ptq); + if (err) + return err; + } + if (state->type & INTEL_PT_EX_STOP) { + err = intel_pt_synth_exstop_sample(ptq); + if (err) + return err; + } + if (state->type & INTEL_PT_PWR_EXIT) { + err = intel_pt_synth_pwrx_sample(ptq); + if (err) + return err; + } } } @@ -1515,21 +2048,21 @@ if (!(state->type & INTEL_PT_BRANCH)) return 0; - if (pt->synth_opts.callchain || pt->synth_opts.thread_stack) - thread_stack__event(ptq->thread, ptq->flags, state->from_ip, - state->to_ip, ptq->insn_len, - state->trace_nr); - else - thread_stack__set_trace_nr(ptq->thread, state->trace_nr); + if (pt->use_thread_stack) { + thread_stack__event(ptq->thread, ptq->cpu, ptq->flags, + state->from_ip, state->to_ip, ptq->insn_len, + state->trace_nr, pt->callstack, + pt->br_stack_sz_plus, + pt->mispred_all); + } else { + thread_stack__set_trace_nr(ptq->thread, ptq->cpu, state->trace_nr); + } if (pt->sample_branches) { err = intel_pt_synth_branch_sample(ptq); if (err) return err; } - - if (pt->synth_opts.last_branch) - intel_pt_update_last_branch_rb(ptq); if (!ptq->sync_switch) return 0; @@ -1628,10 +2161,83 @@ } } +/* + * To filter against time ranges, it is only necessary to look at the next start + * or end time. + */ +static bool intel_pt_next_time(struct intel_pt_queue *ptq) +{ + struct intel_pt *pt = ptq->pt; + + if (ptq->sel_start) { + /* Next time is an end time */ + ptq->sel_start = false; + ptq->sel_timestamp = pt->time_ranges[ptq->sel_idx].end; + return true; + } else if (ptq->sel_idx + 1 < pt->range_cnt) { + /* Next time is a start time */ + ptq->sel_start = true; + ptq->sel_idx += 1; + ptq->sel_timestamp = pt->time_ranges[ptq->sel_idx].start; + return true; + } + + /* No next time */ + return false; +} + +static int intel_pt_time_filter(struct intel_pt_queue *ptq, u64 *ff_timestamp) +{ + int err; + + while (1) { + if (ptq->sel_start) { + if (ptq->timestamp >= ptq->sel_timestamp) { + /* After start time, so consider next time */ + intel_pt_next_time(ptq); + if (!ptq->sel_timestamp) { + /* No end time */ + return 0; + } + /* Check against end time */ + continue; + } + /* Before start time, so fast forward */ + ptq->have_sample = false; + if (ptq->sel_timestamp > *ff_timestamp) { + if (ptq->sync_switch) { + intel_pt_next_tid(ptq->pt, ptq); + ptq->switch_state = INTEL_PT_SS_UNKNOWN; + } + *ff_timestamp = ptq->sel_timestamp; + err = intel_pt_fast_forward(ptq->decoder, + ptq->sel_timestamp); + if (err) + return err; + } + return 0; + } else if (ptq->timestamp > ptq->sel_timestamp) { + /* After end time, so consider next time */ + if (!intel_pt_next_time(ptq)) { + /* No next time range, so stop decoding */ + ptq->have_sample = false; + ptq->switch_state = INTEL_PT_SS_NOT_TRACING; + return 1; + } + /* Check against next start time */ + continue; + } else { + /* Before end time */ + return 0; + } + } +} + static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp) { const struct intel_pt_state *state = ptq->state; struct intel_pt *pt = ptq->pt; + u64 ff_timestamp = 0; int err; if (!pt->kernel_start) { @@ -1665,11 +2271,9 @@ ptq->sync_switch = false; intel_pt_next_tid(pt, ptq); } + ptq->timestamp = state->est_timestamp; if (pt->synth_opts.errors) { - err = intel_pt_synth_error(pt, state->err, - ptq->cpu, ptq->pid, - ptq->tid, - state->from_ip); + err = intel_ptq_synth_error(ptq, state); if (err) return err; } @@ -1697,6 +2301,12 @@ ptq->timestamp = state->est_timestamp; } else if (state->timestamp > ptq->timestamp) { ptq->timestamp = state->timestamp; + } + + if (ptq->sel_timestamp) { + err = intel_pt_time_filter(ptq, &ff_timestamp); + if (err) + return err; } if (!pt->timeless_decoding && ptq->timestamp >= *timestamp) { @@ -1791,10 +2401,60 @@ return 0; } +static void intel_pt_sample_set_pid_tid_cpu(struct intel_pt_queue *ptq, + struct auxtrace_queue *queue, + struct perf_sample *sample) +{ + struct machine *m = ptq->pt->machine; + + ptq->pid = sample->pid; + ptq->tid = sample->tid; + ptq->cpu = queue->cpu; + + intel_pt_log("queue %u cpu %d pid %d tid %d\n", + ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid); + + thread__zput(ptq->thread); + + if (ptq->tid == -1) + return; + + if (ptq->pid == -1) { + ptq->thread = machine__find_thread(m, -1, ptq->tid); + if (ptq->thread) + ptq->pid = ptq->thread->pid_; + return; + } + + ptq->thread = machine__findnew_thread(m, ptq->pid, ptq->tid); +} + +static int intel_pt_process_timeless_sample(struct intel_pt *pt, + struct perf_sample *sample) +{ + struct auxtrace_queue *queue; + struct intel_pt_queue *ptq; + u64 ts = 0; + + queue = auxtrace_queues__sample_queue(&pt->queues, sample, pt->session); + if (!queue) + return -EINVAL; + + ptq = queue->priv; + if (!ptq) + return 0; + + ptq->stop = false; + ptq->time = sample->time; + intel_pt_sample_set_pid_tid_cpu(ptq, queue, sample); + intel_pt_run_decoder(ptq, &ts); + return 0; +} + static int intel_pt_lost(struct intel_pt *pt, struct perf_sample *sample) { return intel_pt_synth_error(pt, INTEL_PT_ERR_LOST, sample->cpu, - sample->pid, sample->tid, 0); + sample->pid, sample->tid, 0, sample->time); } static struct intel_pt_queue *intel_pt_cpu_to_ptq(struct intel_pt *pt, int cpu) @@ -1840,7 +2500,6 @@ switch (ptq->switch_state) { case INTEL_PT_SS_NOT_TRACING: - ptq->next_tid = -1; break; case INTEL_PT_SS_UNKNOWN: case INTEL_PT_SS_TRACING: @@ -1860,12 +2519,13 @@ ptq->switch_state = INTEL_PT_SS_TRACING; break; case INTEL_PT_SS_EXPECTING_SWITCH_IP: - ptq->next_tid = tid; intel_pt_log("ERROR: cpu %d expecting switch ip\n", cpu); break; default: break; } + + ptq->next_tid = -1; return 1; } @@ -1873,7 +2533,7 @@ static int intel_pt_process_switch(struct intel_pt *pt, struct perf_sample *sample) { - struct perf_evsel *evsel; + struct evsel *evsel; pid_t tid; int cpu, ret; @@ -1881,7 +2541,7 @@ if (evsel != pt->switch_evsel) return 0; - tid = perf_evsel__intval(evsel, sample, "next_pid"); + tid = evsel__intval(evsel, sample, "next_pid"); cpu = sample->cpu; intel_pt_log("sched_switch: cpu %d tid %d time %"PRIu64" tsc %#"PRIx64"\n", @@ -1895,6 +2555,44 @@ return machine__set_current_tid(pt->machine, cpu, -1, tid); } +static int intel_pt_context_switch_in(struct intel_pt *pt, + struct perf_sample *sample) +{ + pid_t pid = sample->pid; + pid_t tid = sample->tid; + int cpu = sample->cpu; + + if (pt->sync_switch) { + struct intel_pt_queue *ptq; + + ptq = intel_pt_cpu_to_ptq(pt, cpu); + if (ptq && ptq->sync_switch) { + ptq->next_tid = -1; + switch (ptq->switch_state) { + case INTEL_PT_SS_NOT_TRACING: + case INTEL_PT_SS_UNKNOWN: + case INTEL_PT_SS_TRACING: + break; + case INTEL_PT_SS_EXPECTING_SWITCH_EVENT: + case INTEL_PT_SS_EXPECTING_SWITCH_IP: + ptq->switch_state = INTEL_PT_SS_TRACING; + break; + default: + break; + } + } + } + + /* + * If the current tid has not been updated yet, ensure it is now that + * a "switch in" event has occurred. + */ + if (machine__get_current_tid(pt->machine, cpu) == tid) + return 0; + + return machine__set_current_tid(pt->machine, cpu, pid, tid); +} + static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event, struct perf_sample *sample) { @@ -1906,7 +2604,7 @@ if (pt->have_sched_switch == 3) { if (!out) - return 0; + return intel_pt_context_switch_in(pt, sample); if (event->header.type != PERF_RECORD_SWITCH_CPU_WIDE) { pr_err("Expecting CPU-wide context switch event\n"); return -EINVAL; @@ -1922,10 +2620,6 @@ if (tid == -1) intel_pt_log("context_switch event has no tid\n"); - - intel_pt_log("context_switch: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n", - cpu, pid, tid, sample->time, perf_time_to_tsc(sample->time, - &pt->tc)); ret = intel_pt_sync_switch(pt, cpu, tid, sample->time); if (ret <= 0) @@ -1949,6 +2643,67 @@ return machine__set_current_tid(pt->machine, sample->cpu, event->itrace_start.pid, event->itrace_start.tid); +} + +static int intel_pt_find_map(struct thread *thread, u8 cpumode, u64 addr, + struct addr_location *al) +{ + if (!al->map || addr < al->map->start || addr >= al->map->end) { + if (!thread__find_map(thread, cpumode, addr, al)) + return -1; + } + + return 0; +} + +/* Invalidate all instruction cache entries that overlap the text poke */ +static int intel_pt_text_poke(struct intel_pt *pt, union perf_event *event) +{ + u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + u64 addr = event->text_poke.addr + event->text_poke.new_len - 1; + /* Assume text poke begins in a basic block no more than 4096 bytes */ + int cnt = 4096 + event->text_poke.new_len; + struct thread *thread = pt->unknown_thread; + struct addr_location al = { .map = NULL }; + struct machine *machine = pt->machine; + struct intel_pt_cache_entry *e; + u64 offset; + + if (!event->text_poke.new_len) + return 0; + + for (; cnt; cnt--, addr--) { + if (intel_pt_find_map(thread, cpumode, addr, &al)) { + if (addr < event->text_poke.addr) + return 0; + continue; + } + + if (!al.map->dso || !al.map->dso->auxtrace_cache) + continue; + + offset = al.map->map_ip(al.map, addr); + + e = intel_pt_cache_lookup(al.map->dso, machine, offset); + if (!e) + continue; + + if (addr + e->byte_cnt + e->length <= event->text_poke.addr) { + /* + * No overlap. Working backwards there cannot be another + * basic block that overlaps the text poke if there is a + * branch instruction before the text poke address. + */ + if (e->branch != INTEL_PT_BR_NO_BRANCH) + return 0; + } else { + intel_pt_cache_invalidate(al.map->dso, machine, offset); + intel_pt_log("Invalidated instruction cache for %s at %#"PRIx64"\n", + al.map->dso->long_name, addr); + } + } + + return 0; } static int intel_pt_process_event(struct perf_session *session, @@ -1981,7 +2736,11 @@ } if (pt->timeless_decoding) { - if (event->header.type == PERF_RECORD_EXIT) { + if (pt->sampling_mode) { + if (sample->aux_sample.size) + err = intel_pt_process_timeless_sample(pt, + sample); + } else if (event->header.type == PERF_RECORD_EXIT) { err = intel_pt_process_timeless_queues(pt, event->fork.tid, sample->time); @@ -1991,6 +2750,13 @@ } if (err) return err; + + if (event->header.type == PERF_RECORD_SAMPLE) { + if (pt->synth_opts.add_callchain && !sample->callchain) + intel_pt_add_callchain(pt, sample); + if (pt->synth_opts.add_last_branch && !sample->branch_stack) + intel_pt_add_br_stack(pt, sample); + } if (event->header.type == PERF_RECORD_AUX && (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) && @@ -2008,9 +2774,14 @@ event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) err = intel_pt_context_switch(pt, event, sample); - intel_pt_log("event %s (%u): cpu %d time %"PRIu64" tsc %#"PRIx64"\n", - perf_event__name(event->header.type), event->header.type, - sample->cpu, sample->time, timestamp); + if (!err && event->header.type == PERF_RECORD_TEXT_POKE) + err = intel_pt_text_poke(pt, event); + + if (intel_pt_enable_logging && intel_pt_log_events(pt, sample->time)) { + intel_pt_log("event %u: cpu %d time %"PRIu64" tsc %#"PRIx64" ", + event->header.type, sample->cpu, sample->time, timestamp); + intel_pt_log_event(event); + } return err; } @@ -2063,8 +2834,19 @@ session->auxtrace = NULL; thread__put(pt->unknown_thread); addr_filters__exit(&pt->filts); + zfree(&pt->chain); zfree(&pt->filter); + zfree(&pt->time_ranges); free(pt); +} + +static bool intel_pt_evsel_is_auxtrace(struct perf_session *session, + struct evsel *evsel) +{ + struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, + auxtrace); + + return evsel->core.attr.type == pt->pmu_type; } static int intel_pt_process_auxtrace_event(struct perf_session *session, @@ -2106,6 +2888,28 @@ return 0; } +static int intel_pt_queue_data(struct perf_session *session, + struct perf_sample *sample, + union perf_event *event, u64 data_offset) +{ + struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, + auxtrace); + u64 timestamp; + + if (event) { + return auxtrace_queues__add_event(&pt->queues, session, event, + data_offset, NULL); + } + + if (sample->time && sample->time != (u64)-1) + timestamp = perf_time_to_tsc(sample->time, &pt->tc); + else + timestamp = 0; + + return auxtrace_queues__add_sample(&pt->queues, session, sample, + data_offset, timestamp); +} + struct intel_pt_synth { struct perf_tool dummy_tool; struct perf_session *session; @@ -2144,13 +2948,13 @@ return err; } -static void intel_pt_set_event_name(struct perf_evlist *evlist, u64 id, +static void intel_pt_set_event_name(struct evlist *evlist, u64 id, const char *name) { - struct perf_evsel *evsel; + struct evsel *evsel; evlist__for_each_entry(evlist, evsel) { - if (evsel->id && evsel->id[0] == id) { + if (evsel->core.id && evsel->core.id[0] == id) { if (evsel->name) zfree(&evsel->name); evsel->name = strdup(name); @@ -2159,13 +2963,13 @@ } } -static struct perf_evsel *intel_pt_evsel(struct intel_pt *pt, - struct perf_evlist *evlist) +static struct evsel *intel_pt_evsel(struct intel_pt *pt, + struct evlist *evlist) { - struct perf_evsel *evsel; + struct evsel *evsel; evlist__for_each_entry(evlist, evsel) { - if (evsel->attr.type == pt->pmu_type && evsel->ids) + if (evsel->core.attr.type == pt->pmu_type && evsel->core.ids) return evsel; } @@ -2175,8 +2979,8 @@ static int intel_pt_synth_events(struct intel_pt *pt, struct perf_session *session) { - struct perf_evlist *evlist = session->evlist; - struct perf_evsel *evsel = intel_pt_evsel(pt, evlist); + struct evlist *evlist = session->evlist; + struct evsel *evsel = intel_pt_evsel(pt, evlist); struct perf_event_attr attr; u64 id; int err; @@ -2189,7 +2993,7 @@ memset(&attr, 0, sizeof(struct perf_event_attr)); attr.size = sizeof(struct perf_event_attr); attr.type = PERF_TYPE_HARDWARE; - attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK; + attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK; attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_PERIOD; if (pt->timeless_decoding) @@ -2198,15 +3002,15 @@ attr.sample_type |= PERF_SAMPLE_TIME; if (!pt->per_cpu_mmaps) attr.sample_type &= ~(u64)PERF_SAMPLE_CPU; - attr.exclude_user = evsel->attr.exclude_user; - attr.exclude_kernel = evsel->attr.exclude_kernel; - attr.exclude_hv = evsel->attr.exclude_hv; - attr.exclude_host = evsel->attr.exclude_host; - attr.exclude_guest = evsel->attr.exclude_guest; - attr.sample_id_all = evsel->attr.sample_id_all; - attr.read_format = evsel->attr.read_format; + attr.exclude_user = evsel->core.attr.exclude_user; + attr.exclude_kernel = evsel->core.attr.exclude_kernel; + attr.exclude_hv = evsel->core.attr.exclude_hv; + attr.exclude_host = evsel->core.attr.exclude_host; + attr.exclude_guest = evsel->core.attr.exclude_guest; + attr.sample_id_all = evsel->core.attr.sample_id_all; + attr.read_format = evsel->core.attr.read_format; - id = evsel->id[0] + 1000000000; + id = evsel->core.id[0] + 1000000000; if (!id) id = 1; @@ -2226,8 +3030,15 @@ if (pt->synth_opts.callchain) attr.sample_type |= PERF_SAMPLE_CALLCHAIN; - if (pt->synth_opts.last_branch) + if (pt->synth_opts.last_branch) { attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; + /* + * We don't use the hardware index, but the sample generation + * code uses the new format branch_stack with this field, + * so the event attributes must indicate that it's present. + */ + attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX; + } if (pt->synth_opts.instructions) { attr.config = PERF_COUNT_HW_INSTRUCTIONS; @@ -2288,7 +3099,7 @@ id += 1; } - if (pt->synth_opts.pwr_events && (evsel->attr.config & 0x10)) { + if (pt->synth_opts.pwr_events && (evsel->core.attr.config & 0x10)) { attr.config = PERF_SYNTH_INTEL_MWAIT; err = intel_pt_synth_event(session, "mwait", &attr, id); if (err) @@ -2325,12 +3136,28 @@ return 0; } -static struct perf_evsel *intel_pt_find_sched_switch(struct perf_evlist *evlist) +static void intel_pt_setup_pebs_events(struct intel_pt *pt) { - struct perf_evsel *evsel; + struct evsel *evsel; + + if (!pt->synth_opts.other_events) + return; + + evlist__for_each_entry(pt->session->evlist, evsel) { + if (evsel->core.attr.aux_output && evsel->core.id) { + pt->sample_pebs = true; + pt->pebs_evsel = evsel; + return; + } + } +} + +static struct evsel *intel_pt_find_sched_switch(struct evlist *evlist) +{ + struct evsel *evsel; evlist__for_each_entry_reverse(evlist, evsel) { - const char *name = perf_evsel__name(evsel); + const char *name = evsel__name(evsel); if (!strcmp(name, "sched:sched_switch")) return evsel; @@ -2339,12 +3166,12 @@ return NULL; } -static bool intel_pt_find_switch(struct perf_evlist *evlist) +static bool intel_pt_find_switch(struct evlist *evlist) { - struct perf_evsel *evsel; + struct evsel *evsel; evlist__for_each_entry(evlist, evsel) { - if (evsel->attr.context_switch) + if (evsel->core.attr.context_switch) return true; } @@ -2361,6 +3188,85 @@ return 0; } +/* Find least TSC which converts to ns or later */ +static u64 intel_pt_tsc_start(u64 ns, struct intel_pt *pt) +{ + u64 tsc, tm; + + tsc = perf_time_to_tsc(ns, &pt->tc); + + while (1) { + tm = tsc_to_perf_time(tsc, &pt->tc); + if (tm < ns) + break; + tsc -= 1; + } + + while (tm < ns) + tm = tsc_to_perf_time(++tsc, &pt->tc); + + return tsc; +} + +/* Find greatest TSC which converts to ns or earlier */ +static u64 intel_pt_tsc_end(u64 ns, struct intel_pt *pt) +{ + u64 tsc, tm; + + tsc = perf_time_to_tsc(ns, &pt->tc); + + while (1) { + tm = tsc_to_perf_time(tsc, &pt->tc); + if (tm > ns) + break; + tsc += 1; + } + + while (tm > ns) + tm = tsc_to_perf_time(--tsc, &pt->tc); + + return tsc; +} + +static int intel_pt_setup_time_ranges(struct intel_pt *pt, + struct itrace_synth_opts *opts) +{ + struct perf_time_interval *p = opts->ptime_range; + int n = opts->range_num; + int i; + + if (!n || !p || pt->timeless_decoding) + return 0; + + pt->time_ranges = calloc(n, sizeof(struct range)); + if (!pt->time_ranges) + return -ENOMEM; + + pt->range_cnt = n; + + intel_pt_log("%s: %u range(s)\n", __func__, n); + + for (i = 0; i < n; i++) { + struct range *r = &pt->time_ranges[i]; + u64 ts = p[i].start; + u64 te = p[i].end; + + /* + * Take care to ensure the TSC range matches the perf-time range + * when converted back to perf-time. + */ + r->start = ts ? intel_pt_tsc_start(ts, pt) : 0; + r->end = te ? intel_pt_tsc_end(te, pt) : 0; + + intel_pt_log("range %d: perf time interval: %"PRIu64" to %"PRIu64"\n", + i, ts, te); + intel_pt_log("range %d: TSC time interval: %#"PRIx64" to %#"PRIx64"\n", + i, r->start, r->end); + } + + return 0; +} + static const char * const intel_pt_info_fmts[] = { [INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n", [INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n", @@ -2373,6 +3279,7 @@ [INTEL_PT_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n", [INTEL_PT_PER_CPU_MMAPS] = " Per-cpu maps %"PRId64"\n", [INTEL_PT_MTC_BIT] = " MTC bit %#"PRIx64"\n", + [INTEL_PT_MTC_FREQ_BITS] = " MTC freq bits %#"PRIx64"\n", [INTEL_PT_TSC_CTC_N] = " TSC:CTC numerator %"PRIu64"\n", [INTEL_PT_TSC_CTC_D] = " TSC:CTC denominator %"PRIu64"\n", [INTEL_PT_CYC_BIT] = " CYC bit %#"PRIx64"\n", @@ -2380,15 +3287,19 @@ [INTEL_PT_FILTER_STR_LEN] = " Filter string len. %"PRIu64"\n", }; -static void intel_pt_print_info(u64 *arr, int start, int finish) +static void intel_pt_print_info(__u64 *arr, int start, int finish) { int i; if (!dump_trace) return; - for (i = start; i <= finish; i++) - fprintf(stdout, intel_pt_info_fmts[i], arr[i]); + for (i = start; i <= finish; i++) { + const char *fmt = intel_pt_info_fmts[i]; + + if (fmt) + fprintf(stdout, fmt, arr[i]); + } } static void intel_pt_print_info_str(const char *name, const char *str) @@ -2399,23 +3310,23 @@ fprintf(stdout, " %-20s%s\n", name, str ? str : ""); } -static bool intel_pt_has(struct auxtrace_info_event *auxtrace_info, int pos) +static bool intel_pt_has(struct perf_record_auxtrace_info *auxtrace_info, int pos) { return auxtrace_info->header.size >= - sizeof(struct auxtrace_info_event) + (sizeof(u64) * (pos + 1)); + sizeof(struct perf_record_auxtrace_info) + (sizeof(u64) * (pos + 1)); } int intel_pt_process_auxtrace_info(union perf_event *event, struct perf_session *session) { - struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info; + struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info; size_t min_sz = sizeof(u64) * INTEL_PT_PER_CPU_MMAPS; struct intel_pt *pt; void *info_end; - u64 *info; + __u64 *info; int err; - if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) + + if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) + min_sz) return -EINVAL; @@ -2513,7 +3424,7 @@ if (pt->timeless_decoding && !pt->tc.time_mult) pt->tc.time_mult = 1; pt->have_tsc = intel_pt_have_tsc(pt); - pt->sampling_mode = false; + pt->sampling_mode = intel_pt_sampling_mode(pt); pt->est_tsc = !pt->timeless_decoding; pt->unknown_thread = thread__new(999999999, 999999999); @@ -2533,16 +3444,19 @@ err = thread__set_comm(pt->unknown_thread, "unknown", 0); if (err) goto err_delete_thread; - if (thread__init_map_groups(pt->unknown_thread, pt->machine)) { + if (thread__init_maps(pt->unknown_thread, pt->machine)) { err = -ENOMEM; goto err_delete_thread; } pt->auxtrace.process_event = intel_pt_process_event; pt->auxtrace.process_auxtrace_event = intel_pt_process_auxtrace_event; + pt->auxtrace.queue_data = intel_pt_queue_data; + pt->auxtrace.dump_auxtrace_sample = intel_pt_dump_sample; pt->auxtrace.flush_events = intel_pt_flush; pt->auxtrace.free_events = intel_pt_free_events; pt->auxtrace.free = intel_pt_free; + pt->auxtrace.evsel_is_auxtrace = intel_pt_evsel_is_auxtrace; session->auxtrace = &pt->auxtrace; if (dump_trace) @@ -2562,16 +3476,18 @@ goto err_delete_thread; } - if (session->itrace_synth_opts && session->itrace_synth_opts->set) { + if (session->itrace_synth_opts->set) { pt->synth_opts = *session->itrace_synth_opts; } else { - itrace_synth_opts__set_default(&pt->synth_opts); - if (use_browser != -1) { + itrace_synth_opts__set_default(&pt->synth_opts, + session->itrace_synth_opts->default_no_sample); + if (!session->itrace_synth_opts->default_no_sample && + !session->itrace_synth_opts->inject) { pt->synth_opts.branches = false; pt->synth_opts.callchain = true; + pt->synth_opts.add_callchain = true; } - if (session->itrace_synth_opts) - pt->synth_opts.thread_stack = + pt->synth_opts.thread_stack = session->itrace_synth_opts->thread_stack; } @@ -2591,6 +3507,10 @@ pt->cbr2khz = tsc_freq / pt->max_non_turbo_ratio / 1000; } + err = intel_pt_setup_time_ranges(pt, session->itrace_synth_opts); + if (err) + goto err_delete_thread; + if (pt->synth_opts.calls) pt->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC | PERF_IP_FLAG_TRACE_END; @@ -2598,19 +3518,64 @@ pt->branches_filter |= PERF_IP_FLAG_RETURN | PERF_IP_FLAG_TRACE_BEGIN; - if (pt->synth_opts.callchain && !symbol_conf.use_callchain) { + if ((pt->synth_opts.callchain || pt->synth_opts.add_callchain) && + !symbol_conf.use_callchain) { symbol_conf.use_callchain = true; if (callchain_register_param(&callchain_param) < 0) { symbol_conf.use_callchain = false; pt->synth_opts.callchain = false; + pt->synth_opts.add_callchain = false; } } + + if (pt->synth_opts.add_callchain) { + err = intel_pt_callchain_init(pt); + if (err) + goto err_delete_thread; + } + + if (pt->synth_opts.last_branch || pt->synth_opts.add_last_branch) { + pt->br_stack_sz = pt->synth_opts.last_branch_sz; + pt->br_stack_sz_plus = pt->br_stack_sz; + } + + if (pt->synth_opts.add_last_branch) { + err = intel_pt_br_stack_init(pt); + if (err) + goto err_delete_thread; + /* + * Additional branch stack size to cater for tracing from the + * actual sample ip to where the sample time is recorded. + * Measured at about 200 branches, but generously set to 1024. + * If kernel space is not being traced, then add just 1 for the + * branch to kernel space. + */ + if (intel_pt_tracing_kernel(pt)) + pt->br_stack_sz_plus += 1024; + else + pt->br_stack_sz_plus += 1; + } + + pt->use_thread_stack = pt->synth_opts.callchain || + pt->synth_opts.add_callchain || + pt->synth_opts.thread_stack || + pt->synth_opts.last_branch || + pt->synth_opts.add_last_branch; + + pt->callstack = pt->synth_opts.callchain || + pt->synth_opts.add_callchain || + pt->synth_opts.thread_stack; err = intel_pt_synth_events(pt, session); if (err) goto err_delete_thread; - err = auxtrace_queues__process_index(&pt->queues, session); + intel_pt_setup_pebs_events(pt); + + if (pt->sampling_mode || list_empty(&session->auxtrace_index)) + err = auxtrace_queue_data(session, true, true); + else + err = auxtrace_queues__process_index(&pt->queues, session); if (err) goto err_delete_thread; @@ -2623,6 +3588,7 @@ return 0; err_delete_thread: + zfree(&pt->chain); thread__zput(pt->unknown_thread); err_free_queues: intel_pt_log_disable(); @@ -2631,6 +3597,7 @@ err_free: addr_filters__exit(&pt->filts); zfree(&pt->filter); + zfree(&pt->time_ranges); free(pt); return err; } -- Gitblit v1.6.2