From cf4ce59b3b70238352c7f1729f0f7223214828ad Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Fri, 20 Sep 2024 01:46:19 +0000 Subject: [PATCH] rtl88x2CE_WiFi_linux add concurrent mode --- kernel/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | 812 ++++++++++++++++++++++++++++++++++++++++++++++++++------- 1 files changed, 708 insertions(+), 104 deletions(-) diff --git a/kernel/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/kernel/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index e2a6c22..48fda1a 100644 --- a/kernel/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/kernel/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -1,16 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * intel_pt_decoder.c: Intel Processor Trace support * Copyright (c) 2013-2014, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * */ #ifndef _GNU_SOURCE @@ -23,15 +14,22 @@ #include <stdint.h> #include <inttypes.h> #include <linux/compiler.h> +#include <linux/string.h> +#include <linux/zalloc.h> -#include "../cache.h" -#include "../util.h" #include "../auxtrace.h" #include "intel-pt-insn-decoder.h" #include "intel-pt-pkt-decoder.h" #include "intel-pt-decoder.h" #include "intel-pt-log.h" + +#define BITULL(x) (1ULL << (x)) + +/* IA32_RTIT_CTL MSR bits */ +#define INTEL_PT_CYC_ENABLE BITULL(1) +#define INTEL_PT_CYC_THRESHOLD (BITULL(22) | BITULL(21) | BITULL(20) | BITULL(19)) +#define INTEL_PT_CYC_THRESHOLD_SHIFT 19 #define INTEL_PT_BLK_SIZE 1024 @@ -64,6 +62,7 @@ INTEL_PT_STATE_TIP_PGD, INTEL_PT_STATE_FUP, INTEL_PT_STATE_FUP_NO_TIP, + INTEL_PT_STATE_RESAMPLE, }; static inline bool intel_pt_sample_time(enum intel_pt_pkt_state pkt_state) @@ -74,6 +73,7 @@ case INTEL_PT_STATE_ERR_RESYNC: case INTEL_PT_STATE_IN_SYNC: case INTEL_PT_STATE_TNT_CONT: + case INTEL_PT_STATE_RESAMPLE: return true; case INTEL_PT_STATE_TNT: case INTEL_PT_STATE_TIP: @@ -104,6 +104,7 @@ uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip, uint64_t max_insn_cnt, void *data); bool (*pgd_ip)(uint64_t ip, void *data); + int (*lookahead)(void *data, intel_pt_lookahead_cb_t cb, void *cb_data); void *data; struct intel_pt_state state; const unsigned char *buf; @@ -116,6 +117,10 @@ bool have_cyc; bool fixup_last_mtc; bool have_last_ip; + bool in_psb; + bool hop; + bool hop_psb_fup; + bool leap; enum intel_pt_param_flags flags; uint64_t pos; uint64_t last_ip; @@ -124,6 +129,7 @@ uint64_t timestamp; uint64_t tsc_timestamp; uint64_t ref_timestamp; + uint64_t buf_timestamp; uint64_t sample_timestamp; uint64_t ret_addr; uint64_t ctc_timestamp; @@ -139,6 +145,10 @@ int mtc_shift; struct intel_pt_stack stack; enum intel_pt_pkt_state pkt_state; + enum intel_pt_pkt_ctx pkt_ctx; + enum intel_pt_pkt_ctx prev_pkt_ctx; + enum intel_pt_blk_type blk_type; + int blk_type_pos; struct intel_pt_pkt packet; struct intel_pt_pkt tnt; int pkt_step; @@ -160,6 +170,13 @@ uint64_t period_mask; uint64_t period_ticks; uint64_t last_masked_timestamp; + uint64_t tot_cyc_cnt; + uint64_t sample_tot_cyc_cnt; + uint64_t base_cyc_cnt; + uint64_t cyc_cnt_timestamp; + uint64_t ctl; + uint64_t cyc_threshold; + double tsc_to_cyc; bool continuous_period; bool overflow; bool set_fup_tx_flags; @@ -167,6 +184,8 @@ bool set_fup_mwait; bool set_fup_pwre; bool set_fup_exstop; + bool set_fup_bep; + bool sample_cyc; unsigned int fup_tx_flags; unsigned int tx_flags; uint64_t fup_ptw_payload; @@ -192,6 +211,14 @@ x >>= 1; return x << i; +} + +static uint64_t intel_pt_cyc_threshold(uint64_t ctl) +{ + if (!(ctl & INTEL_PT_CYC_ENABLE)) + return 0; + + return (ctl & INTEL_PT_CYC_THRESHOLD) >> INTEL_PT_CYC_THRESHOLD_SHIFT; } static void intel_pt_setup_period(struct intel_pt_decoder *decoder) @@ -226,17 +253,23 @@ decoder->get_trace = params->get_trace; decoder->walk_insn = params->walk_insn; decoder->pgd_ip = params->pgd_ip; + decoder->lookahead = params->lookahead; decoder->data = params->data; decoder->return_compression = params->return_compression; decoder->branch_enable = params->branch_enable; + decoder->hop = params->quick >= 1; + decoder->leap = params->quick >= 2; decoder->flags = params->flags; + decoder->ctl = params->ctl; decoder->period = params->period; decoder->period_type = params->period_type; decoder->max_non_turbo_ratio = params->max_non_turbo_ratio; decoder->max_non_turbo_ratio_fp = params->max_non_turbo_ratio; + + decoder->cyc_threshold = intel_pt_cyc_threshold(decoder->ctl); intel_pt_setup_period(decoder); @@ -268,6 +301,9 @@ intel_pt_log("timestamp: tsc_ctc_ratio_d %u\n", decoder->tsc_ctc_ratio_d); intel_pt_log("timestamp: tsc_ctc_mult %u\n", decoder->tsc_ctc_mult); intel_pt_log("timestamp: tsc_slip %#x\n", decoder->tsc_slip); + + if (decoder->hop) + intel_pt_log("Hop mode: decoding FUP and TIPs, but not TNT\n"); return decoder; } @@ -479,7 +515,21 @@ return -EBADMSG; } -static int intel_pt_get_data(struct intel_pt_decoder *decoder) +static inline void intel_pt_update_sample_time(struct intel_pt_decoder *decoder) +{ + decoder->sample_timestamp = decoder->timestamp; + decoder->sample_insn_cnt = decoder->timestamp_insn_cnt; +} + +static void intel_pt_reposition(struct intel_pt_decoder *decoder) +{ + decoder->ip = 0; + decoder->pkt_state = INTEL_PT_STATE_NO_PSB; + decoder->timestamp = 0; + decoder->have_tma = false; +} + +static int intel_pt_get_data(struct intel_pt_decoder *decoder, bool reposition) { struct intel_pt_buffer buffer = { .buf = 0, }; int ret; @@ -496,12 +546,10 @@ intel_pt_log("No more data\n"); return -ENODATA; } - if (!buffer.consecutive) { - decoder->ip = 0; - decoder->pkt_state = INTEL_PT_STATE_NO_PSB; + decoder->buf_timestamp = buffer.ref_timestamp; + if (!buffer.consecutive || reposition) { + intel_pt_reposition(decoder); decoder->ref_timestamp = buffer.ref_timestamp; - decoder->timestamp = 0; - decoder->have_tma = false; decoder->state.trace_nr = buffer.trace_nr; intel_pt_log("Reference timestamp 0x%" PRIx64 "\n", decoder->ref_timestamp); @@ -511,10 +559,11 @@ return 0; } -static int intel_pt_get_next_data(struct intel_pt_decoder *decoder) +static int intel_pt_get_next_data(struct intel_pt_decoder *decoder, + bool reposition) { if (!decoder->next_buf) - return intel_pt_get_data(decoder); + return intel_pt_get_data(decoder, reposition); decoder->buf = decoder->next_buf; decoder->len = decoder->next_len; @@ -533,7 +582,7 @@ len = decoder->len; memcpy(buf, decoder->buf, len); - ret = intel_pt_get_data(decoder); + ret = intel_pt_get_data(decoder, false); if (ret) { decoder->pos += old_len; return ret < 0 ? ret : -EINVAL; @@ -545,7 +594,8 @@ memcpy(buf + len, decoder->buf, n); len += n; - ret = intel_pt_get_packet(buf, len, &decoder->packet); + decoder->prev_pkt_ctx = decoder->pkt_ctx; + ret = intel_pt_get_packet(buf, len, &decoder->packet, &decoder->pkt_ctx); if (ret < (int)old_len) { decoder->next_buf = decoder->buf; decoder->next_len = decoder->len; @@ -580,6 +630,7 @@ { struct intel_pt_pkt_info pkt_info; const unsigned char *buf = decoder->buf; + enum intel_pt_pkt_ctx pkt_ctx = decoder->pkt_ctx; size_t len = decoder->len; int ret; @@ -598,7 +649,8 @@ if (!len) return INTEL_PT_NEED_MORE_BYTES; - ret = intel_pt_get_packet(buf, len, &pkt_info.packet); + ret = intel_pt_get_packet(buf, len, &pkt_info.packet, + &pkt_ctx); if (!ret) return INTEL_PT_NEED_MORE_BYTES; if (ret < 0) @@ -673,6 +725,10 @@ case INTEL_PT_MNT: case INTEL_PT_PTWRITE: case INTEL_PT_PTWRITE_IP: + case INTEL_PT_BBP: + case INTEL_PT_BIP: + case INTEL_PT_BEP: + case INTEL_PT_BEP_IP: return 0; case INTEL_PT_MTC: @@ -859,14 +915,15 @@ decoder->len -= decoder->pkt_step; if (!decoder->len) { - ret = intel_pt_get_next_data(decoder); + ret = intel_pt_get_next_data(decoder, false); if (ret) return ret; } + decoder->prev_pkt_ctx = decoder->pkt_ctx; ret = intel_pt_get_packet(decoder->buf, decoder->len, - &decoder->packet); - if (ret == INTEL_PT_NEED_MORE_BYTES && + &decoder->packet, &decoder->pkt_ctx); + if (ret == INTEL_PT_NEED_MORE_BYTES && BITS_PER_LONG == 32 && decoder->len < INTEL_PT_PKT_MAX_SZ && !decoder->next_buf) { ret = intel_pt_get_split_packet(decoder); if (ret < 0) @@ -1057,53 +1114,69 @@ static bool intel_pt_fup_event(struct intel_pt_decoder *decoder) { + enum intel_pt_sample_type type = decoder->state.type; bool ret = false; + + decoder->state.type &= ~INTEL_PT_BRANCH; if (decoder->set_fup_tx_flags) { decoder->set_fup_tx_flags = false; decoder->tx_flags = decoder->fup_tx_flags; - decoder->state.type = INTEL_PT_TRANSACTION; + decoder->state.type |= INTEL_PT_TRANSACTION; if (decoder->fup_tx_flags & INTEL_PT_ABORT_TX) decoder->state.type |= INTEL_PT_BRANCH; - decoder->state.from_ip = decoder->ip; - decoder->state.to_ip = 0; decoder->state.flags = decoder->fup_tx_flags; - return true; + ret = true; } if (decoder->set_fup_ptw) { decoder->set_fup_ptw = false; - decoder->state.type = INTEL_PT_PTW; + decoder->state.type |= INTEL_PT_PTW; decoder->state.flags |= INTEL_PT_FUP_IP; - decoder->state.from_ip = decoder->ip; - decoder->state.to_ip = 0; decoder->state.ptw_payload = decoder->fup_ptw_payload; - return true; + ret = true; } if (decoder->set_fup_mwait) { decoder->set_fup_mwait = false; - decoder->state.type = INTEL_PT_MWAIT_OP; - decoder->state.from_ip = decoder->ip; - decoder->state.to_ip = 0; + decoder->state.type |= INTEL_PT_MWAIT_OP; decoder->state.mwait_payload = decoder->fup_mwait_payload; ret = true; } if (decoder->set_fup_pwre) { decoder->set_fup_pwre = false; decoder->state.type |= INTEL_PT_PWR_ENTRY; - decoder->state.type &= ~INTEL_PT_BRANCH; - decoder->state.from_ip = decoder->ip; - decoder->state.to_ip = 0; decoder->state.pwre_payload = decoder->fup_pwre_payload; ret = true; } if (decoder->set_fup_exstop) { decoder->set_fup_exstop = false; decoder->state.type |= INTEL_PT_EX_STOP; - decoder->state.type &= ~INTEL_PT_BRANCH; decoder->state.flags |= INTEL_PT_FUP_IP; + ret = true; + } + if (decoder->set_fup_bep) { + decoder->set_fup_bep = false; + decoder->state.type |= INTEL_PT_BLK_ITEMS; + ret = true; + } + if (decoder->overflow) { + decoder->overflow = false; + if (!ret && !decoder->pge) { + if (decoder->hop) { + decoder->state.type = 0; + decoder->pkt_state = INTEL_PT_STATE_RESAMPLE; + } + decoder->pge = true; + decoder->state.type |= INTEL_PT_BRANCH | INTEL_PT_TRACE_BEGIN; + decoder->state.from_ip = 0; + decoder->state.to_ip = decoder->ip; + return true; + } + } + if (ret) { decoder->state.from_ip = decoder->ip; decoder->state.to_ip = 0; - ret = true; + } else { + decoder->state.type = type; } return ret; } @@ -1176,7 +1249,7 @@ decoder->pge = false; decoder->continuous_period = false; decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; - decoder->state.to_ip = 0; + decoder->state.type |= INTEL_PT_TRACE_END; return 0; } if (err == INTEL_PT_RETURN) @@ -1190,9 +1263,13 @@ decoder->continuous_period = false; decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; decoder->state.from_ip = decoder->ip; - decoder->state.to_ip = 0; - if (decoder->packet.count != 0) + if (decoder->packet.count == 0) { + decoder->state.to_ip = 0; + } else { + decoder->state.to_ip = decoder->last_ip; decoder->ip = decoder->last_ip; + } + decoder->state.type |= INTEL_PT_TRACE_END; } else { decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; decoder->state.from_ip = decoder->ip; @@ -1219,7 +1296,8 @@ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; decoder->ip = to_ip; decoder->state.from_ip = decoder->ip; - decoder->state.to_ip = 0; + decoder->state.to_ip = to_ip; + decoder->state.type |= INTEL_PT_TRACE_END; return 0; } intel_pt_log_at("ERROR: Conditional branch when expecting indirect branch", @@ -1317,10 +1395,10 @@ decoder->ip += intel_pt_insn.length; return 0; } + decoder->sample_cyc = false; decoder->ip += intel_pt_insn.length; if (!decoder->tnt.count) { - decoder->sample_timestamp = decoder->timestamp; - decoder->sample_insn_cnt = decoder->timestamp_insn_cnt; + intel_pt_update_sample_time(decoder); return -EAGAIN; } decoder->tnt.payload <<= 1; @@ -1354,6 +1432,21 @@ return 0; } +static uint64_t intel_pt_8b_tsc(uint64_t timestamp, uint64_t ref_timestamp) +{ + timestamp |= (ref_timestamp & (0xffULL << 56)); + + if (timestamp < ref_timestamp) { + if (ref_timestamp - timestamp > (1ULL << 55)) + timestamp += (1ULL << 56); + } else { + if (timestamp - ref_timestamp > (1ULL << 55)) + timestamp -= (1ULL << 56); + } + + return timestamp; +} + static void intel_pt_calc_tsc_timestamp(struct intel_pt_decoder *decoder) { uint64_t timestamp; @@ -1361,15 +1454,8 @@ decoder->have_tma = false; if (decoder->ref_timestamp) { - timestamp = decoder->packet.payload | - (decoder->ref_timestamp & (0xffULL << 56)); - if (timestamp < decoder->ref_timestamp) { - if (decoder->ref_timestamp - timestamp > (1ULL << 55)) - timestamp += (1ULL << 56); - } else { - if (timestamp - decoder->ref_timestamp > (1ULL << 55)) - timestamp -= (1ULL << 56); - } + timestamp = intel_pt_8b_tsc(decoder->packet.payload, + decoder->ref_timestamp); decoder->tsc_timestamp = timestamp; decoder->timestamp = timestamp; decoder->ref_timestamp = 0; @@ -1408,9 +1494,54 @@ intel_pt_log("ERROR: Buffer overflow\n"); intel_pt_clear_tx_flags(decoder); decoder->timestamp_insn_cnt = 0; - decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC; + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; + decoder->state.from_ip = decoder->ip; + decoder->ip = 0; + decoder->pge = false; + decoder->set_fup_tx_flags = false; + decoder->set_fup_ptw = false; + decoder->set_fup_mwait = false; + decoder->set_fup_pwre = false; + decoder->set_fup_exstop = false; + decoder->set_fup_bep = false; decoder->overflow = true; return -EOVERFLOW; +} + +static inline void intel_pt_mtc_cyc_cnt_pge(struct intel_pt_decoder *decoder) +{ + if (decoder->have_cyc) + return; + + decoder->cyc_cnt_timestamp = decoder->timestamp; + decoder->base_cyc_cnt = decoder->tot_cyc_cnt; +} + +static inline void intel_pt_mtc_cyc_cnt_cbr(struct intel_pt_decoder *decoder) +{ + decoder->tsc_to_cyc = decoder->cbr / decoder->max_non_turbo_ratio_fp; + + if (decoder->pge) + intel_pt_mtc_cyc_cnt_pge(decoder); +} + +static inline void intel_pt_mtc_cyc_cnt_upd(struct intel_pt_decoder *decoder) +{ + uint64_t tot_cyc_cnt, tsc_delta; + + if (decoder->have_cyc) + return; + + decoder->sample_cyc = true; + + if (!decoder->pge || decoder->timestamp <= decoder->cyc_cnt_timestamp) + return; + + tsc_delta = decoder->timestamp - decoder->cyc_cnt_timestamp; + tot_cyc_cnt = tsc_delta * decoder->tsc_to_cyc + decoder->base_cyc_cnt; + + if (tot_cyc_cnt > decoder->tot_cyc_cnt) + decoder->tot_cyc_cnt = tot_cyc_cnt; } static void intel_pt_calc_tma(struct intel_pt_decoder *decoder) @@ -1421,6 +1552,11 @@ if (!decoder->tsc_ctc_ratio_d) return; + + if (decoder->pge && !decoder->in_psb) + intel_pt_mtc_cyc_cnt_pge(decoder); + else + intel_pt_mtc_cyc_cnt_upd(decoder); decoder->last_mtc = (ctc >> decoder->mtc_shift) & 0xff; decoder->ctc_timestamp = decoder->tsc_timestamp - fc; @@ -1477,6 +1613,8 @@ else decoder->timestamp = timestamp; + intel_pt_mtc_cyc_cnt_upd(decoder); + decoder->timestamp_insn_cnt = 0; decoder->last_mtc = mtc; @@ -1486,6 +1624,8 @@ decoder->have_calc_cyc_to_tsc = false; intel_pt_calc_cyc_to_tsc(decoder, true); } + + intel_pt_log_to("Setting timestamp", decoder->timestamp); } static void intel_pt_calc_cbr(struct intel_pt_decoder *decoder) @@ -1499,6 +1639,10 @@ decoder->cbr = cbr; decoder->cbr_cyc_to_tsc = decoder->max_non_turbo_ratio_fp / cbr; + decoder->cyc_ref_timestamp = decoder->timestamp; + decoder->cycle_cnt = 0; + + intel_pt_mtc_cyc_cnt_cbr(decoder); } static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder) @@ -1508,6 +1652,9 @@ decoder->have_cyc = true; decoder->cycle_cnt += decoder->packet.payload; + if (decoder->pge) + decoder->tot_cyc_cnt += decoder->packet.payload; + decoder->sample_cyc = true; if (!decoder->cyc_ref_timestamp) return; @@ -1526,6 +1673,48 @@ decoder->timestamp = timestamp; decoder->timestamp_insn_cnt = 0; + + intel_pt_log_to("Setting timestamp", decoder->timestamp); +} + +static void intel_pt_bbp(struct intel_pt_decoder *decoder) +{ + if (decoder->prev_pkt_ctx == INTEL_PT_NO_CTX) { + memset(decoder->state.items.mask, 0, sizeof(decoder->state.items.mask)); + decoder->state.items.is_32_bit = false; + } + decoder->blk_type = decoder->packet.payload; + decoder->blk_type_pos = intel_pt_blk_type_pos(decoder->blk_type); + if (decoder->blk_type == INTEL_PT_GP_REGS) + decoder->state.items.is_32_bit = decoder->packet.count; + if (decoder->blk_type_pos < 0) { + intel_pt_log("WARNING: Unknown block type %u\n", + decoder->blk_type); + } else if (decoder->state.items.mask[decoder->blk_type_pos]) { + intel_pt_log("WARNING: Duplicate block type %u\n", + decoder->blk_type); + } +} + +static void intel_pt_bip(struct intel_pt_decoder *decoder) +{ + uint32_t id = decoder->packet.count; + uint32_t bit = 1 << id; + int pos = decoder->blk_type_pos; + + if (pos < 0 || id >= INTEL_PT_BLK_ITEM_ID_CNT) { + intel_pt_log("WARNING: Unknown block item %u type %d\n", + id, decoder->blk_type); + return; + } + + if (decoder->state.items.mask[pos] & bit) { + intel_pt_log("WARNING: Duplicate block item %u type %d\n", + id, decoder->blk_type); + } + + decoder->state.items.mask[pos] |= bit; + decoder->state.items.val[pos][id] = decoder->packet.payload; } /* Walk PSB+ packets when already in sync. */ @@ -1533,14 +1722,17 @@ { int err; + decoder->in_psb = true; + while (1) { err = intel_pt_get_next_packet(decoder); if (err) - return err; + goto out; switch (decoder->packet.type) { case INTEL_PT_PSBEND: - return 0; + err = 0; + goto out; case INTEL_PT_TIP_PGD: case INTEL_PT_TIP_PGE: @@ -1556,12 +1748,18 @@ case INTEL_PT_MWAIT: case INTEL_PT_PWRE: case INTEL_PT_PWRX: + case INTEL_PT_BBP: + case INTEL_PT_BIP: + case INTEL_PT_BEP: + case INTEL_PT_BEP_IP: decoder->have_tma = false; intel_pt_log("ERROR: Unexpected packet\n"); - return -EAGAIN; + err = -EAGAIN; + goto out; case INTEL_PT_OVF: - return intel_pt_overflow(decoder); + err = intel_pt_overflow(decoder); + goto out; case INTEL_PT_TSC: intel_pt_calc_tsc_timestamp(decoder); @@ -1585,8 +1783,14 @@ case INTEL_PT_FUP: decoder->pge = true; - if (decoder->packet.count) + if (decoder->packet.count) { intel_pt_set_last_ip(decoder); + if (decoder->hop) { + /* Act on FUP at PSBEND */ + decoder->ip = decoder->last_ip; + decoder->hop_psb_fup = true; + } + } break; case INTEL_PT_MODE_TSX: @@ -1610,6 +1814,10 @@ break; } } +out: + decoder->in_psb = false; + + return err; } static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder) @@ -1646,6 +1854,10 @@ case INTEL_PT_MWAIT: case INTEL_PT_PWRE: case INTEL_PT_PWRX: + case INTEL_PT_BBP: + case INTEL_PT_BIP: + case INTEL_PT_BEP: + case INTEL_PT_BEP_IP: intel_pt_log("ERROR: Missing TIP after FUP\n"); decoder->pkt_state = INTEL_PT_STATE_ERR3; decoder->pkt_step = 0; @@ -1660,14 +1872,15 @@ case INTEL_PT_TIP_PGD: decoder->state.from_ip = decoder->ip; - decoder->state.to_ip = 0; - if (decoder->packet.count != 0) { + if (decoder->packet.count == 0) { + decoder->state.to_ip = 0; + } else { intel_pt_set_ip(decoder); - intel_pt_log("Omitting PGD ip " x64_fmt "\n", - decoder->ip); + decoder->state.to_ip = decoder->ip; } decoder->pge = false; decoder->continuous_period = false; + decoder->state.type |= INTEL_PT_TRACE_END; return 0; case INTEL_PT_TIP_PGE: @@ -1681,6 +1894,8 @@ intel_pt_set_ip(decoder); decoder->state.to_ip = decoder->ip; } + decoder->state.type |= INTEL_PT_TRACE_BEGIN; + intel_pt_mtc_cyc_cnt_pge(decoder); return 0; case INTEL_PT_TIP: @@ -1722,8 +1937,136 @@ } } +static int intel_pt_resample(struct intel_pt_decoder *decoder) +{ + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; + decoder->state.type = INTEL_PT_INSTRUCTION; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + return 0; +} + +#define HOP_PROCESS 0 +#define HOP_IGNORE 1 +#define HOP_RETURN 2 +#define HOP_AGAIN 3 + +static int intel_pt_scan_for_psb(struct intel_pt_decoder *decoder); + +/* Hop mode: Ignore TNT, do not walk code, but get ip from FUPs and TIPs */ +static int intel_pt_hop_trace(struct intel_pt_decoder *decoder, bool *no_tip, int *err) +{ + *err = 0; + + /* Leap from PSB to PSB, getting ip from FUP within PSB+ */ + if (decoder->leap && !decoder->in_psb && decoder->packet.type != INTEL_PT_PSB) { + *err = intel_pt_scan_for_psb(decoder); + if (*err) + return HOP_RETURN; + } + + switch (decoder->packet.type) { + case INTEL_PT_TNT: + return HOP_IGNORE; + + case INTEL_PT_TIP_PGD: + decoder->pge = false; + if (!decoder->packet.count) + return HOP_IGNORE; + intel_pt_set_ip(decoder); + decoder->state.type |= INTEL_PT_TRACE_END; + decoder->state.from_ip = 0; + decoder->state.to_ip = decoder->ip; + return HOP_RETURN; + + case INTEL_PT_TIP: + if (!decoder->packet.count) + return HOP_IGNORE; + intel_pt_set_ip(decoder); + decoder->state.type = INTEL_PT_INSTRUCTION; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + return HOP_RETURN; + + case INTEL_PT_FUP: + if (!decoder->packet.count) + return HOP_IGNORE; + intel_pt_set_ip(decoder); + if (decoder->set_fup_mwait || decoder->set_fup_pwre) + *no_tip = true; + if (!decoder->branch_enable || !decoder->pge) + *no_tip = true; + if (*no_tip) { + decoder->state.type = INTEL_PT_INSTRUCTION; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + intel_pt_fup_event(decoder); + return HOP_RETURN; + } + intel_pt_fup_event(decoder); + decoder->state.type |= INTEL_PT_INSTRUCTION | INTEL_PT_BRANCH; + *err = intel_pt_walk_fup_tip(decoder); + if (!*err && decoder->state.to_ip) + decoder->pkt_state = INTEL_PT_STATE_RESAMPLE; + return HOP_RETURN; + + case INTEL_PT_PSB: + decoder->last_ip = 0; + decoder->have_last_ip = true; + decoder->hop_psb_fup = false; + *err = intel_pt_walk_psbend(decoder); + if (*err == -EAGAIN) + return HOP_AGAIN; + if (*err) + return HOP_RETURN; + if (decoder->hop_psb_fup) { + decoder->hop_psb_fup = false; + decoder->state.type = INTEL_PT_INSTRUCTION; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + return HOP_RETURN; + } + if (decoder->cbr != decoder->cbr_seen) { + decoder->state.type = 0; + return HOP_RETURN; + } + return HOP_IGNORE; + + case INTEL_PT_BAD: + case INTEL_PT_PAD: + case INTEL_PT_TIP_PGE: + case INTEL_PT_TSC: + case INTEL_PT_TMA: + case INTEL_PT_MODE_EXEC: + case INTEL_PT_MODE_TSX: + case INTEL_PT_MTC: + case INTEL_PT_CYC: + case INTEL_PT_VMCS: + case INTEL_PT_PSBEND: + case INTEL_PT_CBR: + case INTEL_PT_TRACESTOP: + case INTEL_PT_PIP: + case INTEL_PT_OVF: + case INTEL_PT_MNT: + case INTEL_PT_PTWRITE: + case INTEL_PT_PTWRITE_IP: + case INTEL_PT_EXSTOP: + case INTEL_PT_EXSTOP_IP: + case INTEL_PT_MWAIT: + case INTEL_PT_PWRE: + case INTEL_PT_PWRX: + case INTEL_PT_BBP: + case INTEL_PT_BIP: + case INTEL_PT_BEP: + case INTEL_PT_BEP_IP: + default: + return HOP_PROCESS; + } +} + static int intel_pt_walk_trace(struct intel_pt_decoder *decoder) { + int last_packet_type = INTEL_PT_PAD; bool no_tip = false; int err; @@ -1732,6 +2075,26 @@ if (err) return err; next: + err = 0; + if (decoder->cyc_threshold) { + if (decoder->sample_cyc && last_packet_type != INTEL_PT_CYC) + decoder->sample_cyc = false; + last_packet_type = decoder->packet.type; + } + + if (decoder->hop) { + switch (intel_pt_hop_trace(decoder, &no_tip, &err)) { + case HOP_IGNORE: + continue; + case HOP_RETURN: + return err; + case HOP_AGAIN: + goto next; + default: + break; + } + } + switch (decoder->packet.type) { case INTEL_PT_TNT: if (!decoder->packet.count) @@ -1751,6 +2114,8 @@ case INTEL_PT_TIP_PGE: { decoder->pge = true; + decoder->overflow = false; + intel_pt_mtc_cyc_cnt_pge(decoder); if (decoder->packet.count == 0) { intel_pt_log_at("Skipping zero TIP.PGE", decoder->pos); @@ -1759,6 +2124,13 @@ intel_pt_set_ip(decoder); decoder->state.from_ip = 0; decoder->state.to_ip = decoder->ip; + decoder->state.type |= INTEL_PT_TRACE_BEGIN; + /* + * In hop mode, resample to get the to_ip as an + * "instruction" sample. + */ + if (decoder->hop) + decoder->pkt_state = INTEL_PT_STATE_RESAMPLE; return 0; } @@ -1779,7 +2151,7 @@ break; } intel_pt_set_last_ip(decoder); - if (!decoder->branch_enable) { + if (!decoder->branch_enable || !decoder->pge) { decoder->ip = decoder->last_ip; if (intel_pt_fup_event(decoder)) return 0; @@ -1817,6 +2189,15 @@ goto next; if (err) return err; + /* + * PSB+ CBR will not have changed but cater for the + * possibility of another CBR change that gets caught up + * in the PSB+. + */ + if (decoder->cbr != decoder->cbr_seen) { + decoder->state.type = 0; + return 0; + } break; case INTEL_PT_PIP: @@ -1857,14 +2238,8 @@ case INTEL_PT_CBR: intel_pt_calc_cbr(decoder); - if (!decoder->branch_enable && - decoder->cbr != decoder->cbr_seen) { - decoder->cbr_seen = decoder->cbr; - decoder->state.type = INTEL_PT_CBR_CHG; - decoder->state.from_ip = decoder->ip; - decoder->state.to_ip = 0; - decoder->state.cbr_payload = - decoder->packet.payload; + if (decoder->cbr != decoder->cbr_seen) { + decoder->state.type = 0; return 0; } break; @@ -1875,7 +2250,7 @@ case INTEL_PT_MODE_TSX: /* MODE_TSX need not be followed by FUP */ - if (!decoder->pge) { + if (!decoder->pge || decoder->in_psb) { intel_pt_update_in_tx(decoder); break; } @@ -1958,6 +2333,33 @@ decoder->state.pwrx_payload = decoder->packet.payload; return 0; + case INTEL_PT_BBP: + intel_pt_bbp(decoder); + break; + + case INTEL_PT_BIP: + intel_pt_bip(decoder); + break; + + case INTEL_PT_BEP: + decoder->state.type = INTEL_PT_BLK_ITEMS; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + return 0; + + case INTEL_PT_BEP_IP: + err = intel_pt_get_next_packet(decoder); + if (err) + return err; + if (decoder->packet.type == INTEL_PT_FUP) { + decoder->set_fup_bep = true; + no_tip = true; + } else { + intel_pt_log_at("ERROR: Missing FUP after BEP", + decoder->pos); + } + goto next; + default: return intel_pt_bug(decoder); } @@ -1976,10 +2378,12 @@ { int err; + decoder->in_psb = true; + while (1) { err = intel_pt_get_next_packet(decoder); if (err) - return err; + goto out; switch (decoder->packet.type) { case INTEL_PT_TIP_PGD: @@ -1994,8 +2398,13 @@ case INTEL_PT_MWAIT: case INTEL_PT_PWRE: case INTEL_PT_PWRX: + case INTEL_PT_BBP: + case INTEL_PT_BIP: + case INTEL_PT_BEP: + case INTEL_PT_BEP_IP: intel_pt_log("ERROR: Unexpected packet\n"); - return -ENOENT; + err = -ENOENT; + goto out; case INTEL_PT_FUP: decoder->pge = true; @@ -2054,16 +2463,20 @@ decoder->pkt_state = INTEL_PT_STATE_ERR4; else decoder->pkt_state = INTEL_PT_STATE_ERR3; - return -ENOENT; + err = -ENOENT; + goto out; case INTEL_PT_BAD: /* Does not happen */ - return intel_pt_bug(decoder); + err = intel_pt_bug(decoder); + goto out; case INTEL_PT_OVF: - return intel_pt_overflow(decoder); + err = intel_pt_overflow(decoder); + goto out; case INTEL_PT_PSBEND: - return 0; + err = 0; + goto out; case INTEL_PT_PSB: case INTEL_PT_VMCS: @@ -2073,6 +2486,10 @@ break; } } +out: + decoder->in_psb = false; + + return err; } static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) @@ -2087,15 +2504,31 @@ switch (decoder->packet.type) { case INTEL_PT_TIP_PGD: decoder->continuous_period = false; - __fallthrough; - case INTEL_PT_TIP_PGE: - case INTEL_PT_TIP: - decoder->pge = decoder->packet.type != INTEL_PT_TIP_PGD; + decoder->pge = false; if (intel_pt_have_ip(decoder)) intel_pt_set_ip(decoder); - if (decoder->ip) - return 0; - break; + if (!decoder->ip) + break; + decoder->state.type |= INTEL_PT_TRACE_END; + return 0; + + case INTEL_PT_TIP_PGE: + decoder->pge = true; + intel_pt_mtc_cyc_cnt_pge(decoder); + if (intel_pt_have_ip(decoder)) + intel_pt_set_ip(decoder); + if (!decoder->ip) + break; + decoder->state.type |= INTEL_PT_TRACE_BEGIN; + return 0; + + case INTEL_PT_TIP: + decoder->pge = true; + if (intel_pt_have_ip(decoder)) + intel_pt_set_ip(decoder); + if (!decoder->ip) + break; + return 0; case INTEL_PT_FUP: if (intel_pt_have_ip(decoder)) @@ -2175,6 +2608,10 @@ case INTEL_PT_MWAIT: case INTEL_PT_PWRE: case INTEL_PT_PWRX: + case INTEL_PT_BBP: + case INTEL_PT_BIP: + case INTEL_PT_BEP: + case INTEL_PT_BEP_IP: default: break; } @@ -2190,10 +2627,11 @@ decoder->set_fup_mwait = false; decoder->set_fup_pwre = false; decoder->set_fup_exstop = false; + decoder->set_fup_bep = false; + decoder->overflow = false; if (!decoder->branch_enable) { decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; - decoder->overflow = false; decoder->state.type = 0; /* Do not have a sample */ return 0; } @@ -2203,8 +2641,11 @@ if (err) return err; - decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; - decoder->overflow = false; + /* In hop mode, resample to get the to_ip as an "instruction" sample */ + if (decoder->hop) + decoder->pkt_state = INTEL_PT_STATE_RESAMPLE; + else + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; decoder->state.from_ip = 0; decoder->state.to_ip = decoder->ip; @@ -2247,7 +2688,7 @@ decoder->pos += decoder->len; decoder->len = 0; - ret = intel_pt_get_next_data(decoder); + ret = intel_pt_get_next_data(decoder, false); if (ret) return ret; @@ -2273,7 +2714,7 @@ intel_pt_log("Scanning for PSB\n"); while (1) { if (!decoder->len) { - ret = intel_pt_get_next_data(decoder); + ret = intel_pt_get_next_data(decoder, false); if (ret) return ret; } @@ -2311,12 +2752,13 @@ decoder->ip = 0; intel_pt_clear_stack(&decoder->stack); +leap: err = intel_pt_scan_for_psb(decoder); if (err) return err; decoder->have_last_ip = true; - decoder->pkt_state = INTEL_PT_STATE_NO_IP; + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; err = intel_pt_walk_psb(decoder); if (err) @@ -2324,7 +2766,20 @@ if (decoder->ip) { decoder->state.type = 0; /* Do not have a sample */ - decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; + /* + * In hop mode, resample to get the PSB FUP ip as an + * "instruction" sample. + */ + if (decoder->hop) + decoder->pkt_state = INTEL_PT_STATE_RESAMPLE; + else + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; + } else if (decoder->leap) { + /* + * In leap mode, only PSB+ is decoded, so keeping leaping to the + * next PSB until there is an ip. + */ + goto leap; } else { return intel_pt_sync_ip(decoder); } @@ -2388,6 +2843,9 @@ if (err == -EAGAIN) err = intel_pt_walk_trace(decoder); break; + case INTEL_PT_STATE_RESAMPLE: + err = intel_pt_resample(decoder); + break; default: err = intel_pt_bug(decoder); break; @@ -2396,26 +2854,43 @@ if (err) { decoder->state.err = intel_pt_ext_err(err); - decoder->state.from_ip = decoder->ip; - decoder->sample_timestamp = decoder->timestamp; - decoder->sample_insn_cnt = decoder->timestamp_insn_cnt; + if (err != -EOVERFLOW) + decoder->state.from_ip = decoder->ip; + intel_pt_update_sample_time(decoder); + decoder->sample_tot_cyc_cnt = decoder->tot_cyc_cnt; } else { decoder->state.err = 0; - if (decoder->cbr != decoder->cbr_seen && decoder->state.type) { + if (decoder->cbr != decoder->cbr_seen) { decoder->cbr_seen = decoder->cbr; + if (!decoder->state.type) { + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + } decoder->state.type |= INTEL_PT_CBR_CHG; decoder->state.cbr_payload = decoder->cbr_payload; + decoder->state.cbr = decoder->cbr; } if (intel_pt_sample_time(decoder->pkt_state)) { - decoder->sample_timestamp = decoder->timestamp; - decoder->sample_insn_cnt = decoder->timestamp_insn_cnt; + intel_pt_update_sample_time(decoder); + if (decoder->sample_cyc) { + decoder->sample_tot_cyc_cnt = decoder->tot_cyc_cnt; + decoder->state.flags |= INTEL_PT_SAMPLE_IPC; + decoder->sample_cyc = false; + } } + /* + * When using only TSC/MTC to compute cycles, IPC can be + * sampled as soon as the cycle count changes. + */ + if (!decoder->have_cyc) + decoder->state.flags |= INTEL_PT_SAMPLE_IPC; } decoder->state.timestamp = decoder->sample_timestamp; decoder->state.est_timestamp = intel_pt_est_timestamp(decoder); decoder->state.cr3 = decoder->cr3; decoder->state.tot_insn_cnt = decoder->tot_insn_cnt; + decoder->state.tot_cyc_cnt = decoder->sample_tot_cyc_cnt; return &decoder->state; } @@ -2519,11 +2994,12 @@ static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc, size_t *rem) { + enum intel_pt_pkt_ctx ctx = INTEL_PT_NO_CTX; struct intel_pt_pkt packet; int ret; while (len) { - ret = intel_pt_get_packet(buf, len, &packet); + ret = intel_pt_get_packet(buf, len, &packet, &ctx); if (ret <= 0) return false; if (packet.type == INTEL_PT_TSC) { @@ -2725,3 +3201,131 @@ return buf_b; /* No overlap */ } } + +/** + * struct fast_forward_data - data used by intel_pt_ff_cb(). + * @timestamp: timestamp to fast forward towards + * @buf_timestamp: buffer timestamp of last buffer with trace data earlier than + * the fast forward timestamp. + */ +struct fast_forward_data { + uint64_t timestamp; + uint64_t buf_timestamp; +}; + +/** + * intel_pt_ff_cb - fast forward lookahead callback. + * @buffer: Intel PT trace buffer + * @data: opaque pointer to fast forward data (struct fast_forward_data) + * + * Determine if @buffer trace is past the fast forward timestamp. + * + * Return: 1 (stop lookahead) if @buffer trace is past the fast forward + * timestamp, and 0 otherwise. + */ +static int intel_pt_ff_cb(struct intel_pt_buffer *buffer, void *data) +{ + struct fast_forward_data *d = data; + unsigned char *buf; + uint64_t tsc; + size_t rem; + size_t len; + + buf = (unsigned char *)buffer->buf; + len = buffer->len; + + if (!intel_pt_next_psb(&buf, &len) || + !intel_pt_next_tsc(buf, len, &tsc, &rem)) + return 0; + + tsc = intel_pt_8b_tsc(tsc, buffer->ref_timestamp); + + intel_pt_log("Buffer 1st timestamp " x64_fmt " ref timestamp " x64_fmt "\n", + tsc, buffer->ref_timestamp); + + /* + * If the buffer contains a timestamp earlier that the fast forward + * timestamp, then record it, else stop. + */ + if (tsc < d->timestamp) + d->buf_timestamp = buffer->ref_timestamp; + else + return 1; + + return 0; +} + +/** + * intel_pt_fast_forward - reposition decoder forwards. + * @decoder: Intel PT decoder + * @timestamp: timestamp to fast forward towards + * + * Reposition decoder at the last PSB with a timestamp earlier than @timestamp. + * + * Return: 0 on success or negative error code on failure. + */ +int intel_pt_fast_forward(struct intel_pt_decoder *decoder, uint64_t timestamp) +{ + struct fast_forward_data d = { .timestamp = timestamp }; + unsigned char *buf; + size_t len; + int err; + + intel_pt_log("Fast forward towards timestamp " x64_fmt "\n", timestamp); + + /* Find buffer timestamp of buffer to fast forward to */ + err = decoder->lookahead(decoder->data, intel_pt_ff_cb, &d); + if (err < 0) + return err; + + /* Walk to buffer with same buffer timestamp */ + if (d.buf_timestamp) { + do { + decoder->pos += decoder->len; + decoder->len = 0; + err = intel_pt_get_next_data(decoder, true); + /* -ENOLINK means non-consecutive trace */ + if (err && err != -ENOLINK) + return err; + } while (decoder->buf_timestamp != d.buf_timestamp); + } + + if (!decoder->buf) + return 0; + + buf = (unsigned char *)decoder->buf; + len = decoder->len; + + if (!intel_pt_next_psb(&buf, &len)) + return 0; + + /* + * Walk PSBs while the PSB timestamp is less than the fast forward + * timestamp. + */ + do { + uint64_t tsc; + size_t rem; + + if (!intel_pt_next_tsc(buf, len, &tsc, &rem)) + break; + tsc = intel_pt_8b_tsc(tsc, decoder->buf_timestamp); + /* + * A TSC packet can slip past MTC packets but, after fast + * forward, decoding starts at the TSC timestamp. That means + * the timestamps may not be exactly the same as the timestamps + * that would have been decoded without fast forward. + */ + if (tsc < timestamp) { + intel_pt_log("Fast forward to next PSB timestamp " x64_fmt "\n", tsc); + decoder->pos += decoder->len - len; + decoder->buf = buf; + decoder->len = len; + intel_pt_reposition(decoder); + } else { + break; + } + } while (intel_pt_step_psb(&buf, &len)); + + return 0; +} -- Gitblit v1.6.2