From 9d77db3c730780c8ef5ccd4b66403ff5675cfe4e Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Mon, 13 May 2024 10:30:14 +0000 Subject: [PATCH] modify sin led gpio --- kernel/arch/x86/events/intel/ds.c | 626 ++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 files changed, 529 insertions(+), 97 deletions(-) diff --git a/kernel/arch/x86/events/intel/ds.c b/kernel/arch/x86/events/intel/ds.c index b3279fe..48f30ff 100644 --- a/kernel/arch/x86/events/intel/ds.c +++ b/kernel/arch/x86/events/intel/ds.c @@ -7,6 +7,7 @@ #include <asm/perf_event.h> #include <asm/tlbflush.h> #include <asm/insn.h> +#include <asm/io.h> #include "../perf_event.h" @@ -337,7 +338,7 @@ struct debug_store *ds = hwev->ds; size_t bsiz = x86_pmu.pebs_buffer_size; int max, node = cpu_to_node(cpu); - void *buffer, *ibuffer, *cea; + void *buffer, *insn_buff, *cea; if (!x86_pmu.pebs) return 0; @@ -351,12 +352,12 @@ * buffer then. */ if (x86_pmu.intel_cap.pebs_format < 2) { - ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node); - if (!ibuffer) { + insn_buff = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node); + if (!insn_buff) { dsfree_pages(buffer, bsiz); return -ENOMEM; } - per_cpu(insn_buffer, cpu) = ibuffer; + per_cpu(insn_buffer, cpu) = insn_buff; } hwev->ds_pebs_vaddr = buffer; /* Update the cpu entry area mapping */ @@ -641,8 +642,8 @@ rcu_read_lock(); perf_prepare_sample(&header, &data, event, ®s); - if (perf_output_begin(&handle, event, header.size * - (top - base - skip))) + if (perf_output_begin(&handle, &data, event, + header.size * (top - base - skip))) goto unlock; for (at = base; at < top; at++) { @@ -669,9 +670,9 @@ static inline void intel_pmu_drain_pebs_buffer(void) { - struct pt_regs regs; + struct perf_sample_data data; - x86_pmu.drain_pebs(®s); + x86_pmu.drain_pebs(NULL, &data); } /* @@ -849,6 +850,31 @@ EVENT_CONSTRAINT_END }; +struct event_constraint intel_icl_pebs_event_constraints[] = { + INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */ + INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL), /* SLOTS */ + + INTEL_PLD_CONSTRAINT(0x1cd, 0xff), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */ + + INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf), /* MEM_LOAD_*_RETIRED.* */ + + INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_INST_RETIRED.* */ + + /* + * Everything else is handled by PMU_FL_PEBS_ALL, because we + * need the full constraints from the main table. + */ + + EVENT_CONSTRAINT_END +}; + struct event_constraint *intel_pebs_constraints(struct perf_event *event) { struct event_constraint *c; @@ -858,7 +884,7 @@ if (x86_pmu.pebs_constraints) { for_each_event_constraint(c, x86_pmu.pebs_constraints) { - if ((event->hw.config & c->cmask) == c->code) { + if (constraint_match(c, event->hw.config)) { event->hw.flags |= c->flags; return c; } @@ -882,6 +908,9 @@ */ static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc) { + if (cpuc->n_pebs == cpuc->n_pebs_via_pt) + return false; + return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs); } @@ -899,6 +928,9 @@ u64 threshold; int reserved; + if (cpuc->n_pebs_via_pt) + return; + if (x86_pmu.flags & PMU_FL_PEBS_ALL) reserved = x86_pmu.max_pebs_events + x86_pmu.num_counters_fixed; else @@ -906,17 +938,87 @@ if (cpuc->n_pebs == cpuc->n_large_pebs) { threshold = ds->pebs_absolute_maximum - - reserved * x86_pmu.pebs_record_size; + reserved * cpuc->pebs_record_size; } else { - threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size; + threshold = ds->pebs_buffer_base + cpuc->pebs_record_size; } ds->pebs_interrupt_threshold = threshold; } -static void -pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc, struct pmu *pmu) +static void adaptive_pebs_record_size_update(void) { + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + u64 pebs_data_cfg = cpuc->pebs_data_cfg; + int sz = sizeof(struct pebs_basic); + + if (pebs_data_cfg & PEBS_DATACFG_MEMINFO) + sz += sizeof(struct pebs_meminfo); + if (pebs_data_cfg & PEBS_DATACFG_GP) + sz += sizeof(struct pebs_gprs); + if (pebs_data_cfg & PEBS_DATACFG_XMMS) + sz += sizeof(struct pebs_xmm); + if (pebs_data_cfg & PEBS_DATACFG_LBRS) + sz += x86_pmu.lbr_nr * sizeof(struct lbr_entry); + + cpuc->pebs_record_size = sz; +} + +#define PERF_PEBS_MEMINFO_TYPE (PERF_SAMPLE_ADDR | PERF_SAMPLE_DATA_SRC | \ + PERF_SAMPLE_PHYS_ADDR | PERF_SAMPLE_WEIGHT | \ + PERF_SAMPLE_TRANSACTION) + +static u64 pebs_update_adaptive_cfg(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + u64 sample_type = attr->sample_type; + u64 pebs_data_cfg = 0; + bool gprs, tsx_weight; + + if (!(sample_type & ~(PERF_SAMPLE_IP|PERF_SAMPLE_TIME)) && + attr->precise_ip > 1) + return pebs_data_cfg; + + if (sample_type & PERF_PEBS_MEMINFO_TYPE) + pebs_data_cfg |= PEBS_DATACFG_MEMINFO; + + /* + * We need GPRs when: + * + user requested them + * + precise_ip < 2 for the non event IP + * + For RTM TSX weight we need GPRs for the abort code. + */ + gprs = (sample_type & PERF_SAMPLE_REGS_INTR) && + (attr->sample_regs_intr & PEBS_GP_REGS); + + tsx_weight = (sample_type & PERF_SAMPLE_WEIGHT) && + ((attr->config & INTEL_ARCH_EVENT_MASK) == + x86_pmu.rtm_abort_event); + + if (gprs || (attr->precise_ip < 2) || tsx_weight) + pebs_data_cfg |= PEBS_DATACFG_GP; + + if ((sample_type & PERF_SAMPLE_REGS_INTR) && + (attr->sample_regs_intr & PERF_REG_EXTENDED_MASK)) + pebs_data_cfg |= PEBS_DATACFG_XMMS; + + if (sample_type & PERF_SAMPLE_BRANCH_STACK) { + /* + * For now always log all LBRs. Could configure this + * later. + */ + pebs_data_cfg |= PEBS_DATACFG_LBRS | + ((x86_pmu.lbr_nr-1) << PEBS_DATACFG_LBR_SHIFT); + } + + return pebs_data_cfg; +} + +static void +pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc, + struct perf_event *event, bool add) +{ + struct pmu *pmu = event->ctx->pmu; /* * Make sure we get updated with the first PEBS * event. It will trigger also during removal, but @@ -933,6 +1035,29 @@ update = true; } + /* + * The PEBS record doesn't shrink on pmu::del(). Doing so would require + * iterating all remaining PEBS events to reconstruct the config. + */ + if (x86_pmu.intel_cap.pebs_baseline && add) { + u64 pebs_data_cfg; + + /* Clear pebs_data_cfg and pebs_record_size for first PEBS. */ + if (cpuc->n_pebs == 1) { + cpuc->pebs_data_cfg = 0; + cpuc->pebs_record_size = sizeof(struct pebs_basic); + } + + pebs_data_cfg = pebs_update_adaptive_cfg(event); + + /* Update pebs_record_size if new event requires more data. */ + if (pebs_data_cfg & ~cpuc->pebs_data_cfg) { + cpuc->pebs_data_cfg |= pebs_data_cfg; + adaptive_pebs_record_size_update(); + update = true; + } + } + if (update) pebs_update_threshold(cpuc); } @@ -946,8 +1071,38 @@ cpuc->n_pebs++; if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS) cpuc->n_large_pebs++; + if (hwc->flags & PERF_X86_EVENT_PEBS_VIA_PT) + cpuc->n_pebs_via_pt++; - pebs_update_state(needed_cb, cpuc, event->ctx->pmu); + pebs_update_state(needed_cb, cpuc, event, true); +} + +static void intel_pmu_pebs_via_pt_disable(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + if (!is_pebs_pt(event)) + return; + + if (!(cpuc->pebs_enabled & ~PEBS_VIA_PT_MASK)) + cpuc->pebs_enabled &= ~PEBS_VIA_PT_MASK; +} + +static void intel_pmu_pebs_via_pt_enable(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + struct debug_store *ds = cpuc->ds; + + if (!is_pebs_pt(event)) + return; + + if (!(event->hw.flags & PERF_X86_EVENT_LARGE_PEBS)) + cpuc->pebs_enabled |= PEBS_PMI_AFTER_EACH_RECORD; + + cpuc->pebs_enabled |= PEBS_OUTPUT_PT; + + wrmsrl(MSR_RELOAD_PMC0 + hwc->idx, ds->pebs_event_reset[hwc->idx]); } void intel_pmu_pebs_enable(struct perf_event *event) @@ -960,10 +1115,18 @@ cpuc->pebs_enabled |= 1ULL << hwc->idx; - if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) + if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) && (x86_pmu.version < 5)) cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32); else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST) cpuc->pebs_enabled |= 1ULL << 63; + + if (x86_pmu.intel_cap.pebs_baseline) { + hwc->config |= ICL_EVENTSEL_ADAPTIVE; + if (cpuc->pebs_data_cfg != cpuc->active_pebs_data_cfg) { + wrmsrl(MSR_PEBS_DATA_CFG, cpuc->pebs_data_cfg); + cpuc->active_pebs_data_cfg = cpuc->pebs_data_cfg; + } + } /* * Use auto-reload if possible to save a MSR write in the PMI. @@ -979,6 +1142,8 @@ } else { ds->pebs_event_reset[hwc->idx] = 0; } + + intel_pmu_pebs_via_pt_enable(event); } void intel_pmu_pebs_del(struct perf_event *event) @@ -990,8 +1155,10 @@ cpuc->n_pebs--; if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS) cpuc->n_large_pebs--; + if (hwc->flags & PERF_X86_EVENT_PEBS_VIA_PT) + cpuc->n_pebs_via_pt--; - pebs_update_state(needed_cb, cpuc, event->ctx->pmu); + pebs_update_state(needed_cb, cpuc, event, false); } void intel_pmu_pebs_disable(struct perf_event *event) @@ -999,15 +1166,19 @@ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; - if (cpuc->n_pebs == cpuc->n_large_pebs) + if (cpuc->n_pebs == cpuc->n_large_pebs && + cpuc->n_pebs != cpuc->n_pebs_via_pt) intel_pmu_drain_pebs_buffer(); cpuc->pebs_enabled &= ~(1ULL << hwc->idx); - if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) + if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) && + (x86_pmu.version < 5)) cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32)); else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST) cpuc->pebs_enabled &= ~(1ULL << 63); + + intel_pmu_pebs_via_pt_disable(event); if (cpuc->enabled) wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); @@ -1125,34 +1296,57 @@ return 0; } -static inline u64 intel_hsw_weight(struct pebs_record_skl *pebs) +static inline u64 intel_get_tsx_weight(u64 tsx_tuning) { - if (pebs->tsx_tuning) { - union hsw_tsx_tuning tsx = { .value = pebs->tsx_tuning }; + if (tsx_tuning) { + union hsw_tsx_tuning tsx = { .value = tsx_tuning }; return tsx.cycles_last_block; } return 0; } -static inline u64 intel_hsw_transaction(struct pebs_record_skl *pebs) +static inline u64 intel_get_tsx_transaction(u64 tsx_tuning, u64 ax) { - u64 txn = (pebs->tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32; + u64 txn = (tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32; /* For RTM XABORTs also log the abort code from AX */ - if ((txn & PERF_TXN_TRANSACTION) && (pebs->ax & 1)) - txn |= ((pebs->ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT; + if ((txn & PERF_TXN_TRANSACTION) && (ax & 1)) + txn |= ((ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT; return txn; } -static void setup_pebs_sample_data(struct perf_event *event, - struct pt_regs *iregs, void *__pebs, - struct perf_sample_data *data, - struct pt_regs *regs) +static inline u64 get_pebs_status(void *n) { + if (x86_pmu.intel_cap.pebs_format < 4) + return ((struct pebs_record_nhm *)n)->status; + return ((struct pebs_basic *)n)->applicable_counters; +} + #define PERF_X86_EVENT_PEBS_HSW_PREC \ (PERF_X86_EVENT_PEBS_ST_HSW | \ PERF_X86_EVENT_PEBS_LD_HSW | \ PERF_X86_EVENT_PEBS_NA_HSW) + +static u64 get_data_src(struct perf_event *event, u64 aux) +{ + u64 val = PERF_MEM_NA; + int fl = event->hw.flags; + bool fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC); + + if (fl & PERF_X86_EVENT_PEBS_LDLAT) + val = load_latency_data(aux); + else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC)) + val = precise_datala_hsw(event, aux); + else if (fst) + val = precise_store_data(aux); + return val; +} + +static void setup_pebs_fixed_sample_data(struct perf_event *event, + struct pt_regs *iregs, void *__pebs, + struct perf_sample_data *data, + struct pt_regs *regs) +{ /* * We cast to the biggest pebs_record but are careful not to * unconditionally access the 'extra' entries. @@ -1160,17 +1354,13 @@ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct pebs_record_skl *pebs = __pebs; u64 sample_type; - int fll, fst, dsrc; - int fl = event->hw.flags; + int fll; if (pebs == NULL) return; sample_type = event->attr.sample_type; - dsrc = sample_type & PERF_SAMPLE_DATA_SRC; - - fll = fl & PERF_X86_EVENT_PEBS_LDLAT; - fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC); + fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT; perf_sample_data_init(data, 0, event->hw.last_period); @@ -1185,21 +1375,13 @@ /* * data.data_src encodes the data source */ - if (dsrc) { - u64 val = PERF_MEM_NA; - if (fll) - val = load_latency_data(pebs->dse); - else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC)) - val = precise_datala_hsw(event, pebs->dse); - else if (fst) - val = precise_store_data(pebs->dse); - data->data_src.val = val; - } + if (sample_type & PERF_SAMPLE_DATA_SRC) + data->data_src.val = get_data_src(event, pebs->dse); /* * We must however always use iregs for the unwinder to stay sane; the * record BP,SP,IP can point into thin air when the record is from a - * previous PMI context or an (I)RET happend between the record and + * previous PMI context or an (I)RET happened between the record and * PMI. */ if (sample_type & PERF_SAMPLE_CALLCHAIN) @@ -1281,10 +1463,11 @@ if (x86_pmu.intel_cap.pebs_format >= 2) { /* Only set the TSX weight when no memory weight. */ if ((sample_type & PERF_SAMPLE_WEIGHT) && !fll) - data->weight = intel_hsw_weight(pebs); + data->weight = intel_get_tsx_weight(pebs->tsx_tuning); if (sample_type & PERF_SAMPLE_TRANSACTION) - data->txn = intel_hsw_transaction(pebs); + data->txn = intel_get_tsx_transaction(pebs->tsx_tuning, + pebs->ax); } /* @@ -1299,6 +1482,140 @@ if (has_branch_stack(event)) data->br_stack = &cpuc->lbr_stack; +} + +static void adaptive_pebs_save_regs(struct pt_regs *regs, + struct pebs_gprs *gprs) +{ + regs->ax = gprs->ax; + regs->bx = gprs->bx; + regs->cx = gprs->cx; + regs->dx = gprs->dx; + regs->si = gprs->si; + regs->di = gprs->di; + regs->bp = gprs->bp; + regs->sp = gprs->sp; +#ifndef CONFIG_X86_32 + regs->r8 = gprs->r8; + regs->r9 = gprs->r9; + regs->r10 = gprs->r10; + regs->r11 = gprs->r11; + regs->r12 = gprs->r12; + regs->r13 = gprs->r13; + regs->r14 = gprs->r14; + regs->r15 = gprs->r15; +#endif +} + +/* + * With adaptive PEBS the layout depends on what fields are configured. + */ + +static void setup_pebs_adaptive_sample_data(struct perf_event *event, + struct pt_regs *iregs, void *__pebs, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct pebs_basic *basic = __pebs; + void *next_record = basic + 1; + u64 sample_type; + u64 format_size; + struct pebs_meminfo *meminfo = NULL; + struct pebs_gprs *gprs = NULL; + struct x86_perf_regs *perf_regs; + + if (basic == NULL) + return; + + perf_regs = container_of(regs, struct x86_perf_regs, regs); + perf_regs->xmm_regs = NULL; + + sample_type = event->attr.sample_type; + format_size = basic->format_size; + perf_sample_data_init(data, 0, event->hw.last_period); + data->period = event->hw.last_period; + + if (event->attr.use_clockid == 0) + data->time = native_sched_clock_from_tsc(basic->tsc); + + /* + * We must however always use iregs for the unwinder to stay sane; the + * record BP,SP,IP can point into thin air when the record is from a + * previous PMI context or an (I)RET happened between the record and + * PMI. + */ + if (sample_type & PERF_SAMPLE_CALLCHAIN) + data->callchain = perf_callchain(event, iregs); + + *regs = *iregs; + /* The ip in basic is EventingIP */ + set_linear_ip(regs, basic->ip); + regs->flags = PERF_EFLAGS_EXACT; + + /* + * The record for MEMINFO is in front of GP + * But PERF_SAMPLE_TRANSACTION needs gprs->ax. + * Save the pointer here but process later. + */ + if (format_size & PEBS_DATACFG_MEMINFO) { + meminfo = next_record; + next_record = meminfo + 1; + } + + if (format_size & PEBS_DATACFG_GP) { + gprs = next_record; + next_record = gprs + 1; + + if (event->attr.precise_ip < 2) { + set_linear_ip(regs, gprs->ip); + regs->flags &= ~PERF_EFLAGS_EXACT; + } + + if (sample_type & PERF_SAMPLE_REGS_INTR) + adaptive_pebs_save_regs(regs, gprs); + } + + if (format_size & PEBS_DATACFG_MEMINFO) { + if (sample_type & PERF_SAMPLE_WEIGHT) + data->weight = meminfo->latency ?: + intel_get_tsx_weight(meminfo->tsx_tuning); + + if (sample_type & PERF_SAMPLE_DATA_SRC) + data->data_src.val = get_data_src(event, meminfo->aux); + + if (sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR)) + data->addr = meminfo->address; + + if (sample_type & PERF_SAMPLE_TRANSACTION) + data->txn = intel_get_tsx_transaction(meminfo->tsx_tuning, + gprs ? gprs->ax : 0); + } + + if (format_size & PEBS_DATACFG_XMMS) { + struct pebs_xmm *xmm = next_record; + + next_record = xmm + 1; + perf_regs->xmm_regs = xmm->xmm; + } + + if (format_size & PEBS_DATACFG_LBRS) { + struct lbr_entry *lbr = next_record; + int num_lbr = ((format_size >> PEBS_DATACFG_LBR_SHIFT) + & 0xff) + 1; + next_record = next_record + num_lbr * sizeof(struct lbr_entry); + + if (has_branch_stack(event)) { + intel_pmu_store_pebs_lbrs(lbr); + data->br_stack = &cpuc->lbr_stack; + } + } + + WARN_ONCE(next_record != __pebs + (format_size >> 48), + "PEBS record size %llu, expected %llu, config %llx\n", + format_size >> 48, + (u64)(next_record - __pebs), + basic->format_size); } static inline void * @@ -1318,19 +1635,19 @@ if (base == NULL) return NULL; - for (at = base; at < top; at += x86_pmu.pebs_record_size) { - struct pebs_record_nhm *p = at; + for (at = base; at < top; at += cpuc->pebs_record_size) { + unsigned long status = get_pebs_status(at); - if (test_bit(bit, (unsigned long *)&p->status)) { + if (test_bit(bit, (unsigned long *)&status)) { /* PEBS v3 has accurate status bits */ if (x86_pmu.intel_cap.pebs_format >= 3) return at; - if (p->status == (1 << bit)) + if (status == (1 << bit)) return at; /* clear non-PEBS bit and re-check */ - pebs_status = p->status & cpuc->pebs_enabled; + pebs_status = status & cpuc->pebs_enabled; pebs_status &= PEBS_COUNTER_MASK; if (pebs_status == (1 << bit)) return at; @@ -1409,15 +1726,24 @@ return 0; } -static void __intel_pmu_pebs_event(struct perf_event *event, - struct pt_regs *iregs, - void *base, void *top, - int bit, int count) +static __always_inline void +__intel_pmu_pebs_event(struct perf_event *event, + struct pt_regs *iregs, + struct perf_sample_data *data, + void *base, void *top, + int bit, int count, + void (*setup_sample)(struct perf_event *, + struct pt_regs *, + void *, + struct perf_sample_data *, + struct pt_regs *)) { + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; - struct perf_sample_data data; - struct pt_regs regs; + struct x86_perf_regs perf_regs; + struct pt_regs *regs = &perf_regs.regs; void *at = get_next_pebs_record_by_bit(base, top, bit); + static struct pt_regs dummy_iregs; if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) { /* @@ -1430,28 +1756,37 @@ } else if (!intel_pmu_save_and_restart(event)) return; + if (!iregs) + iregs = &dummy_iregs; + while (count > 1) { - setup_pebs_sample_data(event, iregs, at, &data, ®s); - perf_event_output(event, &data, ®s); - at += x86_pmu.pebs_record_size; + setup_sample(event, iregs, at, data, regs); + perf_event_output(event, data, regs); + at += cpuc->pebs_record_size; at = get_next_pebs_record_by_bit(at, top, bit); count--; } - setup_pebs_sample_data(event, iregs, at, &data, ®s); - - /* - * All but the last records are processed. - * The last one is left to be able to call the overflow handler. - */ - if (perf_event_overflow(event, &data, ®s)) { - x86_pmu_stop(event, 0); - return; + setup_sample(event, iregs, at, data, regs); + if (iregs == &dummy_iregs) { + /* + * The PEBS records may be drained in the non-overflow context, + * e.g., large PEBS + context switch. Perf should treat the + * last record the same as other PEBS records, and doesn't + * invoke the generic overflow handler. + */ + perf_event_output(event, data, regs); + } else { + /* + * All but the last records are processed. + * The last one is left to be able to call the overflow handler. + */ + if (perf_event_overflow(event, data, regs)) + x86_pmu_stop(event, 0); } - } -static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) +static void intel_pmu_drain_pebs_core(struct pt_regs *iregs, struct perf_sample_data *data) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct debug_store *ds = cpuc->ds; @@ -1485,10 +1820,30 @@ return; } - __intel_pmu_pebs_event(event, iregs, at, top, 0, n); + __intel_pmu_pebs_event(event, iregs, data, at, top, 0, n, + setup_pebs_fixed_sample_data); } -static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) +static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, int size) +{ + struct perf_event *event; + int bit; + + /* + * The drain_pebs() could be called twice in a short period + * for auto-reload event in pmu::read(). There are no + * overflows have happened in between. + * It needs to call intel_pmu_save_and_restart_reload() to + * update the event->count for this case. + */ + for_each_set_bit(bit, (unsigned long *)&cpuc->pebs_enabled, size) { + event = cpuc->events[bit]; + if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD) + intel_pmu_save_and_restart_reload(event, 0); + } +} + +static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_data *data) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct debug_store *ds = cpuc->ds; @@ -1515,19 +1870,7 @@ } if (unlikely(base >= top)) { - /* - * The drain_pebs() could be called twice in a short period - * for auto-reload event in pmu::read(). There are no - * overflows have happened in between. - * It needs to call intel_pmu_save_and_restart_reload() to - * update the event->count for this case. - */ - for_each_set_bit(bit, (unsigned long *)&cpuc->pebs_enabled, - size) { - event = cpuc->events[bit]; - if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD) - intel_pmu_save_and_restart_reload(event, 0); - } + intel_pmu_pebs_event_update_no_drain(cpuc, size); return; } @@ -1540,8 +1883,7 @@ /* PEBS v3 has more accurate status bits */ if (x86_pmu.intel_cap.pebs_format >= 3) { - for_each_set_bit(bit, (unsigned long *)&pebs_status, - size) + for_each_set_bit(bit, (unsigned long *)&pebs_status, size) counts[bit]++; continue; @@ -1579,9 +1921,8 @@ * that caused the PEBS record. It's called collision. * If collision happened, the record will be dropped. */ - if (p->status != (1ULL << bit)) { - for_each_set_bit(i, (unsigned long *)&pebs_status, - x86_pmu.max_pebs_events) + if (pebs_status != (1ULL << bit)) { + for_each_set_bit(i, (unsigned long *)&pebs_status, size) error[i]++; continue; } @@ -1589,7 +1930,7 @@ counts[bit]++; } - for (bit = 0; bit < size; bit++) { + for_each_set_bit(bit, (unsigned long *)&mask, size) { if ((counts[bit] == 0) && (error[bit] == 0)) continue; @@ -1604,14 +1945,69 @@ if (error[bit]) { perf_log_lost_samples(event, error[bit]); - if (perf_event_account_interrupt(event)) + if (iregs && perf_event_account_interrupt(event)) x86_pmu_stop(event, 0); } if (counts[bit]) { - __intel_pmu_pebs_event(event, iregs, base, - top, bit, counts[bit]); + __intel_pmu_pebs_event(event, iregs, data, base, + top, bit, counts[bit], + setup_pebs_fixed_sample_data); } + } +} + +static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_data *data) +{ + short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {}; + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct debug_store *ds = cpuc->ds; + struct perf_event *event; + void *base, *at, *top; + int bit, size; + u64 mask; + + if (!x86_pmu.pebs_active) + return; + + base = (struct pebs_basic *)(unsigned long)ds->pebs_buffer_base; + top = (struct pebs_basic *)(unsigned long)ds->pebs_index; + + ds->pebs_index = ds->pebs_buffer_base; + + mask = ((1ULL << x86_pmu.max_pebs_events) - 1) | + (((1ULL << x86_pmu.num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED); + size = INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed; + + if (unlikely(base >= top)) { + intel_pmu_pebs_event_update_no_drain(cpuc, size); + return; + } + + for (at = base; at < top; at += cpuc->pebs_record_size) { + u64 pebs_status; + + pebs_status = get_pebs_status(at) & cpuc->pebs_enabled; + pebs_status &= mask; + + for_each_set_bit(bit, (unsigned long *)&pebs_status, size) + counts[bit]++; + } + + for_each_set_bit(bit, (unsigned long *)&mask, size) { + if (counts[bit] == 0) + continue; + + event = cpuc->events[bit]; + if (WARN_ON_ONCE(!event)) + continue; + + if (WARN_ON_ONCE(!event->attr.precise_ip)) + continue; + + __intel_pmu_pebs_event(event, iregs, data, base, + top, bit, counts[bit], + setup_pebs_adaptive_sample_data); } } @@ -1630,9 +2026,16 @@ x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS); x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS); x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE; + if (x86_pmu.version <= 4) + x86_pmu.pebs_no_isolation = 1; + if (x86_pmu.pebs) { char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-'; + char *pebs_qual = ""; int format = x86_pmu.intel_cap.pebs_format; + + if (format < 4) + x86_pmu.intel_cap.pebs_baseline = 0; switch (format) { case 0: @@ -1669,6 +2072,35 @@ x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME; break; + case 4: + x86_pmu.drain_pebs = intel_pmu_drain_pebs_icl; + x86_pmu.pebs_record_size = sizeof(struct pebs_basic); + if (x86_pmu.intel_cap.pebs_baseline) { + x86_pmu.large_pebs_flags |= + PERF_SAMPLE_BRANCH_STACK | + PERF_SAMPLE_TIME; + x86_pmu.flags |= PMU_FL_PEBS_ALL; + pebs_qual = "-baseline"; + x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_EXTENDED_REGS; + } else { + /* Only basic record supported */ + x86_pmu.large_pebs_flags &= + ~(PERF_SAMPLE_ADDR | + PERF_SAMPLE_TIME | + PERF_SAMPLE_DATA_SRC | + PERF_SAMPLE_TRANSACTION | + PERF_SAMPLE_REGS_USER | + PERF_SAMPLE_REGS_INTR); + } + pr_cont("PEBS fmt4%c%s, ", pebs_type, pebs_qual); + + if (x86_pmu.intel_cap.pebs_output_pt_available) { + pr_cont("PEBS-via-PT, "); + x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_AUX_OUTPUT; + } + + break; + default: pr_cont("no PEBS fmt%d%c, ", format, pebs_type); x86_pmu.pebs = 0; -- Gitblit v1.6.2