.. | .. |
---|
7 | 7 | */ |
---|
8 | 8 | |
---|
9 | 9 | #define INTEL_PMC_MAX_GENERIC 32 |
---|
10 | | -#define INTEL_PMC_MAX_FIXED 3 |
---|
| 10 | +#define INTEL_PMC_MAX_FIXED 4 |
---|
11 | 11 | #define INTEL_PMC_IDX_FIXED 32 |
---|
12 | 12 | |
---|
13 | 13 | #define X86_PMC_IDX_MAX 64 |
---|
.. | .. |
---|
32 | 32 | |
---|
33 | 33 | #define HSW_IN_TX (1ULL << 32) |
---|
34 | 34 | #define HSW_IN_TX_CHECKPOINTED (1ULL << 33) |
---|
| 35 | +#define ICL_EVENTSEL_ADAPTIVE (1ULL << 34) |
---|
| 36 | +#define ICL_FIXED_0_ADAPTIVE (1ULL << 32) |
---|
35 | 37 | |
---|
36 | 38 | #define AMD64_EVENTSEL_INT_CORE_ENABLE (1ULL << 36) |
---|
37 | 39 | #define AMD64_EVENTSEL_GUESTONLY (1ULL << 40) |
---|
.. | .. |
---|
48 | 50 | |
---|
49 | 51 | #define AMD64_L3_SLICE_SHIFT 48 |
---|
50 | 52 | #define AMD64_L3_SLICE_MASK \ |
---|
51 | | - ((0xFULL) << AMD64_L3_SLICE_SHIFT) |
---|
| 53 | + (0xFULL << AMD64_L3_SLICE_SHIFT) |
---|
| 54 | +#define AMD64_L3_SLICEID_MASK \ |
---|
| 55 | + (0x7ULL << AMD64_L3_SLICE_SHIFT) |
---|
52 | 56 | |
---|
53 | 57 | #define AMD64_L3_THREAD_SHIFT 56 |
---|
54 | 58 | #define AMD64_L3_THREAD_MASK \ |
---|
55 | | - ((0xFFULL) << AMD64_L3_THREAD_SHIFT) |
---|
| 59 | + (0xFFULL << AMD64_L3_THREAD_SHIFT) |
---|
| 60 | +#define AMD64_L3_F19H_THREAD_MASK \ |
---|
| 61 | + (0x3ULL << AMD64_L3_THREAD_SHIFT) |
---|
| 62 | + |
---|
| 63 | +#define AMD64_L3_EN_ALL_CORES BIT_ULL(47) |
---|
| 64 | +#define AMD64_L3_EN_ALL_SLICES BIT_ULL(46) |
---|
| 65 | + |
---|
| 66 | +#define AMD64_L3_COREID_SHIFT 42 |
---|
| 67 | +#define AMD64_L3_COREID_MASK \ |
---|
| 68 | + (0x7ULL << AMD64_L3_COREID_SHIFT) |
---|
56 | 69 | |
---|
57 | 70 | #define X86_RAW_EVENT_MASK \ |
---|
58 | 71 | (ARCH_PERFMON_EVENTSEL_EVENT | \ |
---|
.. | .. |
---|
87 | 100 | #define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6 |
---|
88 | 101 | #define ARCH_PERFMON_EVENTS_COUNT 7 |
---|
89 | 102 | |
---|
| 103 | +#define PEBS_DATACFG_MEMINFO BIT_ULL(0) |
---|
| 104 | +#define PEBS_DATACFG_GP BIT_ULL(1) |
---|
| 105 | +#define PEBS_DATACFG_XMMS BIT_ULL(2) |
---|
| 106 | +#define PEBS_DATACFG_LBRS BIT_ULL(3) |
---|
| 107 | +#define PEBS_DATACFG_LBR_SHIFT 24 |
---|
| 108 | + |
---|
90 | 109 | /* |
---|
91 | 110 | * Intel "Architectural Performance Monitoring" CPUID |
---|
92 | 111 | * detection/enumeration details: |
---|
.. | .. |
---|
118 | 137 | struct { |
---|
119 | 138 | unsigned int num_counters_fixed:5; |
---|
120 | 139 | unsigned int bit_width_fixed:8; |
---|
121 | | - unsigned int reserved:19; |
---|
| 140 | + unsigned int reserved1:2; |
---|
| 141 | + unsigned int anythread_deprecated:1; |
---|
| 142 | + unsigned int reserved2:16; |
---|
122 | 143 | } split; |
---|
123 | 144 | unsigned int full; |
---|
| 145 | +}; |
---|
| 146 | + |
---|
| 147 | +/* |
---|
| 148 | + * Intel Architectural LBR CPUID detection/enumeration details: |
---|
| 149 | + */ |
---|
| 150 | +union cpuid28_eax { |
---|
| 151 | + struct { |
---|
| 152 | + /* Supported LBR depth values */ |
---|
| 153 | + unsigned int lbr_depth_mask:8; |
---|
| 154 | + unsigned int reserved:22; |
---|
| 155 | + /* Deep C-state Reset */ |
---|
| 156 | + unsigned int lbr_deep_c_reset:1; |
---|
| 157 | + /* IP values contain LIP */ |
---|
| 158 | + unsigned int lbr_lip:1; |
---|
| 159 | + } split; |
---|
| 160 | + unsigned int full; |
---|
| 161 | +}; |
---|
| 162 | + |
---|
| 163 | +union cpuid28_ebx { |
---|
| 164 | + struct { |
---|
| 165 | + /* CPL Filtering Supported */ |
---|
| 166 | + unsigned int lbr_cpl:1; |
---|
| 167 | + /* Branch Filtering Supported */ |
---|
| 168 | + unsigned int lbr_filter:1; |
---|
| 169 | + /* Call-stack Mode Supported */ |
---|
| 170 | + unsigned int lbr_call_stack:1; |
---|
| 171 | + } split; |
---|
| 172 | + unsigned int full; |
---|
| 173 | +}; |
---|
| 174 | + |
---|
| 175 | +union cpuid28_ecx { |
---|
| 176 | + struct { |
---|
| 177 | + /* Mispredict Bit Supported */ |
---|
| 178 | + unsigned int lbr_mispred:1; |
---|
| 179 | + /* Timed LBRs Supported */ |
---|
| 180 | + unsigned int lbr_timed_lbr:1; |
---|
| 181 | + /* Branch Type Field Supported */ |
---|
| 182 | + unsigned int lbr_br_type:1; |
---|
| 183 | + } split; |
---|
| 184 | + unsigned int full; |
---|
124 | 185 | }; |
---|
125 | 186 | |
---|
126 | 187 | struct x86_pmu_capability { |
---|
.. | .. |
---|
137 | 198 | * Fixed-purpose performance events: |
---|
138 | 199 | */ |
---|
139 | 200 | |
---|
| 201 | +/* RDPMC offset for Fixed PMCs */ |
---|
| 202 | +#define INTEL_PMC_FIXED_RDPMC_BASE (1 << 30) |
---|
| 203 | +#define INTEL_PMC_FIXED_RDPMC_METRICS (1 << 29) |
---|
| 204 | + |
---|
140 | 205 | /* |
---|
141 | | - * All 3 fixed-mode PMCs are configured via this single MSR: |
---|
| 206 | + * All the fixed-mode PMCs are configured via this single MSR: |
---|
142 | 207 | */ |
---|
143 | 208 | #define MSR_ARCH_PERFMON_FIXED_CTR_CTRL 0x38d |
---|
144 | 209 | |
---|
145 | 210 | /* |
---|
146 | | - * The counts are available in three separate MSRs: |
---|
| 211 | + * There is no event-code assigned to the fixed-mode PMCs. |
---|
| 212 | + * |
---|
| 213 | + * For a fixed-mode PMC, which has an equivalent event on a general-purpose |
---|
| 214 | + * PMC, the event-code of the equivalent event is used for the fixed-mode PMC, |
---|
| 215 | + * e.g., Instr_Retired.Any and CPU_CLK_Unhalted.Core. |
---|
| 216 | + * |
---|
| 217 | + * For a fixed-mode PMC, which doesn't have an equivalent event, a |
---|
| 218 | + * pseudo-encoding is used, e.g., CPU_CLK_Unhalted.Ref and TOPDOWN.SLOTS. |
---|
| 219 | + * The pseudo event-code for a fixed-mode PMC must be 0x00. |
---|
| 220 | + * The pseudo umask-code is 0xX. The X equals the index of the fixed |
---|
| 221 | + * counter + 1, e.g., the fixed counter 2 has the pseudo-encoding 0x0300. |
---|
| 222 | + * |
---|
| 223 | + * The counts are available in separate MSRs: |
---|
147 | 224 | */ |
---|
148 | 225 | |
---|
149 | 226 | /* Instr_Retired.Any: */ |
---|
.. | .. |
---|
154 | 231 | #define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a |
---|
155 | 232 | #define INTEL_PMC_IDX_FIXED_CPU_CYCLES (INTEL_PMC_IDX_FIXED + 1) |
---|
156 | 233 | |
---|
157 | | -/* CPU_CLK_Unhalted.Ref: */ |
---|
| 234 | +/* CPU_CLK_Unhalted.Ref: event=0x00,umask=0x3 (pseudo-encoding) */ |
---|
158 | 235 | #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b |
---|
159 | 236 | #define INTEL_PMC_IDX_FIXED_REF_CYCLES (INTEL_PMC_IDX_FIXED + 2) |
---|
160 | 237 | #define INTEL_PMC_MSK_FIXED_REF_CYCLES (1ULL << INTEL_PMC_IDX_FIXED_REF_CYCLES) |
---|
161 | 238 | |
---|
| 239 | +/* TOPDOWN.SLOTS: event=0x00,umask=0x4 (pseudo-encoding) */ |
---|
| 240 | +#define MSR_ARCH_PERFMON_FIXED_CTR3 0x30c |
---|
| 241 | +#define INTEL_PMC_IDX_FIXED_SLOTS (INTEL_PMC_IDX_FIXED + 3) |
---|
| 242 | +#define INTEL_PMC_MSK_FIXED_SLOTS (1ULL << INTEL_PMC_IDX_FIXED_SLOTS) |
---|
| 243 | + |
---|
162 | 244 | /* |
---|
163 | 245 | * We model BTS tracing as another fixed-mode PMC. |
---|
164 | 246 | * |
---|
165 | | - * We choose a value in the middle of the fixed event range, since lower |
---|
| 247 | + * We choose the value 47 for the fixed index of BTS, since lower |
---|
166 | 248 | * values are used by actual fixed events and higher values are used |
---|
167 | 249 | * to indicate other overflow conditions in the PERF_GLOBAL_STATUS msr. |
---|
168 | 250 | */ |
---|
169 | | -#define INTEL_PMC_IDX_FIXED_BTS (INTEL_PMC_IDX_FIXED + 16) |
---|
| 251 | +#define INTEL_PMC_IDX_FIXED_BTS (INTEL_PMC_IDX_FIXED + 15) |
---|
170 | 252 | |
---|
171 | | -#define GLOBAL_STATUS_COND_CHG BIT_ULL(63) |
---|
172 | | -#define GLOBAL_STATUS_BUFFER_OVF BIT_ULL(62) |
---|
173 | | -#define GLOBAL_STATUS_UNC_OVF BIT_ULL(61) |
---|
174 | | -#define GLOBAL_STATUS_ASIF BIT_ULL(60) |
---|
175 | | -#define GLOBAL_STATUS_COUNTERS_FROZEN BIT_ULL(59) |
---|
176 | | -#define GLOBAL_STATUS_LBRS_FROZEN BIT_ULL(58) |
---|
177 | | -#define GLOBAL_STATUS_TRACE_TOPAPMI BIT_ULL(55) |
---|
| 253 | +/* |
---|
| 254 | + * The PERF_METRICS MSR is modeled as several magic fixed-mode PMCs, one for |
---|
| 255 | + * each TopDown metric event. |
---|
| 256 | + * |
---|
| 257 | + * Internally the TopDown metric events are mapped to the FxCtr 3 (SLOTS). |
---|
| 258 | + */ |
---|
| 259 | +#define INTEL_PMC_IDX_METRIC_BASE (INTEL_PMC_IDX_FIXED + 16) |
---|
| 260 | +#define INTEL_PMC_IDX_TD_RETIRING (INTEL_PMC_IDX_METRIC_BASE + 0) |
---|
| 261 | +#define INTEL_PMC_IDX_TD_BAD_SPEC (INTEL_PMC_IDX_METRIC_BASE + 1) |
---|
| 262 | +#define INTEL_PMC_IDX_TD_FE_BOUND (INTEL_PMC_IDX_METRIC_BASE + 2) |
---|
| 263 | +#define INTEL_PMC_IDX_TD_BE_BOUND (INTEL_PMC_IDX_METRIC_BASE + 3) |
---|
| 264 | +#define INTEL_PMC_IDX_METRIC_END INTEL_PMC_IDX_TD_BE_BOUND |
---|
| 265 | +#define INTEL_PMC_MSK_TOPDOWN ((0xfull << INTEL_PMC_IDX_METRIC_BASE) | \ |
---|
| 266 | + INTEL_PMC_MSK_FIXED_SLOTS) |
---|
| 267 | + |
---|
| 268 | +/* |
---|
| 269 | + * There is no event-code assigned to the TopDown events. |
---|
| 270 | + * |
---|
| 271 | + * For the slots event, use the pseudo code of the fixed counter 3. |
---|
| 272 | + * |
---|
| 273 | + * For the metric events, the pseudo event-code is 0x00. |
---|
| 274 | + * The pseudo umask-code starts from the middle of the pseudo event |
---|
| 275 | + * space, 0x80. |
---|
| 276 | + */ |
---|
| 277 | +#define INTEL_TD_SLOTS 0x0400 /* TOPDOWN.SLOTS */ |
---|
| 278 | +/* Level 1 metrics */ |
---|
| 279 | +#define INTEL_TD_METRIC_RETIRING 0x8000 /* Retiring metric */ |
---|
| 280 | +#define INTEL_TD_METRIC_BAD_SPEC 0x8100 /* Bad speculation metric */ |
---|
| 281 | +#define INTEL_TD_METRIC_FE_BOUND 0x8200 /* FE bound metric */ |
---|
| 282 | +#define INTEL_TD_METRIC_BE_BOUND 0x8300 /* BE bound metric */ |
---|
| 283 | +#define INTEL_TD_METRIC_MAX INTEL_TD_METRIC_BE_BOUND |
---|
| 284 | +#define INTEL_TD_METRIC_NUM 4 |
---|
| 285 | + |
---|
| 286 | +static inline bool is_metric_idx(int idx) |
---|
| 287 | +{ |
---|
| 288 | + return (unsigned)(idx - INTEL_PMC_IDX_METRIC_BASE) < INTEL_TD_METRIC_NUM; |
---|
| 289 | +} |
---|
| 290 | + |
---|
| 291 | +static inline bool is_topdown_idx(int idx) |
---|
| 292 | +{ |
---|
| 293 | + return is_metric_idx(idx) || idx == INTEL_PMC_IDX_FIXED_SLOTS; |
---|
| 294 | +} |
---|
| 295 | + |
---|
| 296 | +#define INTEL_PMC_OTHER_TOPDOWN_BITS(bit) \ |
---|
| 297 | + (~(0x1ull << bit) & INTEL_PMC_MSK_TOPDOWN) |
---|
| 298 | + |
---|
| 299 | +#define GLOBAL_STATUS_COND_CHG BIT_ULL(63) |
---|
| 300 | +#define GLOBAL_STATUS_BUFFER_OVF_BIT 62 |
---|
| 301 | +#define GLOBAL_STATUS_BUFFER_OVF BIT_ULL(GLOBAL_STATUS_BUFFER_OVF_BIT) |
---|
| 302 | +#define GLOBAL_STATUS_UNC_OVF BIT_ULL(61) |
---|
| 303 | +#define GLOBAL_STATUS_ASIF BIT_ULL(60) |
---|
| 304 | +#define GLOBAL_STATUS_COUNTERS_FROZEN BIT_ULL(59) |
---|
| 305 | +#define GLOBAL_STATUS_LBRS_FROZEN_BIT 58 |
---|
| 306 | +#define GLOBAL_STATUS_LBRS_FROZEN BIT_ULL(GLOBAL_STATUS_LBRS_FROZEN_BIT) |
---|
| 307 | +#define GLOBAL_STATUS_TRACE_TOPAPMI_BIT 55 |
---|
| 308 | +#define GLOBAL_STATUS_TRACE_TOPAPMI BIT_ULL(GLOBAL_STATUS_TRACE_TOPAPMI_BIT) |
---|
| 309 | +#define GLOBAL_STATUS_PERF_METRICS_OVF_BIT 48 |
---|
| 310 | + |
---|
| 311 | +#define GLOBAL_CTRL_EN_PERF_METRICS 48 |
---|
| 312 | +/* |
---|
| 313 | + * We model guest LBR event tracing as another fixed-mode PMC like BTS. |
---|
| 314 | + * |
---|
| 315 | + * We choose bit 58 because it's used to indicate LBR stack frozen state |
---|
| 316 | + * for architectural perfmon v4, also we unconditionally mask that bit in |
---|
| 317 | + * the handle_pmi_common(), so it'll never be set in the overflow handling. |
---|
| 318 | + * |
---|
| 319 | + * With this fake counter assigned, the guest LBR event user (such as KVM), |
---|
| 320 | + * can program the LBR registers on its own, and we don't actually do anything |
---|
| 321 | + * with then in the host context. |
---|
| 322 | + */ |
---|
| 323 | +#define INTEL_PMC_IDX_FIXED_VLBR (GLOBAL_STATUS_LBRS_FROZEN_BIT) |
---|
| 324 | + |
---|
| 325 | +/* |
---|
| 326 | + * Pseudo-encoding the guest LBR event as event=0x00,umask=0x1b, |
---|
| 327 | + * since it would claim bit 58 which is effectively Fixed26. |
---|
| 328 | + */ |
---|
| 329 | +#define INTEL_FIXED_VLBR_EVENT 0x1b00 |
---|
| 330 | + |
---|
| 331 | +/* |
---|
| 332 | + * Adaptive PEBS v4 |
---|
| 333 | + */ |
---|
| 334 | + |
---|
| 335 | +struct pebs_basic { |
---|
| 336 | + u64 format_size; |
---|
| 337 | + u64 ip; |
---|
| 338 | + u64 applicable_counters; |
---|
| 339 | + u64 tsc; |
---|
| 340 | +}; |
---|
| 341 | + |
---|
| 342 | +struct pebs_meminfo { |
---|
| 343 | + u64 address; |
---|
| 344 | + u64 aux; |
---|
| 345 | + u64 latency; |
---|
| 346 | + u64 tsx_tuning; |
---|
| 347 | +}; |
---|
| 348 | + |
---|
| 349 | +struct pebs_gprs { |
---|
| 350 | + u64 flags, ip, ax, cx, dx, bx, sp, bp, si, di; |
---|
| 351 | + u64 r8, r9, r10, r11, r12, r13, r14, r15; |
---|
| 352 | +}; |
---|
| 353 | + |
---|
| 354 | +struct pebs_xmm { |
---|
| 355 | + u64 xmm[16*2]; /* two entries for each register */ |
---|
| 356 | +}; |
---|
178 | 357 | |
---|
179 | 358 | /* |
---|
180 | 359 | * IBS cpuid feature detection |
---|
.. | .. |
---|
228 | 407 | #define IBS_OP_ENABLE (1ULL<<17) |
---|
229 | 408 | #define IBS_OP_MAX_CNT 0x0000FFFFULL |
---|
230 | 409 | #define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */ |
---|
| 410 | +#define IBS_OP_MAX_CNT_EXT_MASK (0x7FULL<<20) /* separate upper 7 bits */ |
---|
231 | 411 | #define IBS_RIP_INVALID (1ULL<<38) |
---|
232 | 412 | |
---|
233 | 413 | #ifdef CONFIG_X86_LOCAL_APIC |
---|
.. | .. |
---|
252 | 432 | #define PERF_EFLAGS_VM (1UL << 5) |
---|
253 | 433 | |
---|
254 | 434 | struct pt_regs; |
---|
| 435 | +struct x86_perf_regs { |
---|
| 436 | + struct pt_regs regs; |
---|
| 437 | + u64 *xmm_regs; |
---|
| 438 | +}; |
---|
| 439 | + |
---|
255 | 440 | extern unsigned long perf_instruction_pointer(struct pt_regs *regs); |
---|
256 | 441 | extern unsigned long perf_misc_flags(struct pt_regs *regs); |
---|
257 | 442 | #define perf_misc_flags(regs) perf_misc_flags(regs) |
---|
.. | .. |
---|
264 | 449 | */ |
---|
265 | 450 | #define perf_arch_fetch_caller_regs(regs, __ip) { \ |
---|
266 | 451 | (regs)->ip = (__ip); \ |
---|
267 | | - (regs)->bp = caller_frame_pointer(); \ |
---|
| 452 | + (regs)->sp = (unsigned long)__builtin_frame_address(0); \ |
---|
268 | 453 | (regs)->cs = __KERNEL_CS; \ |
---|
269 | 454 | regs->flags = 0; \ |
---|
270 | | - asm volatile( \ |
---|
271 | | - _ASM_MOV "%%"_ASM_SP ", %0\n" \ |
---|
272 | | - : "=m" ((regs)->sp) \ |
---|
273 | | - :: "memory" \ |
---|
274 | | - ); \ |
---|
275 | 455 | } |
---|
276 | 456 | |
---|
277 | 457 | struct perf_guest_switch_msr { |
---|
.. | .. |
---|
279 | 459 | u64 host, guest; |
---|
280 | 460 | }; |
---|
281 | 461 | |
---|
282 | | -extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr); |
---|
| 462 | +struct x86_pmu_lbr { |
---|
| 463 | + unsigned int nr; |
---|
| 464 | + unsigned int from; |
---|
| 465 | + unsigned int to; |
---|
| 466 | + unsigned int info; |
---|
| 467 | +}; |
---|
| 468 | + |
---|
283 | 469 | extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap); |
---|
284 | 470 | extern void perf_check_microcode(void); |
---|
| 471 | +extern int x86_perf_rdpmc_index(struct perf_event *event); |
---|
285 | 472 | #else |
---|
286 | | -static inline struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr) |
---|
287 | | -{ |
---|
288 | | - *nr = 0; |
---|
289 | | - return NULL; |
---|
290 | | -} |
---|
291 | | - |
---|
292 | 473 | static inline void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap) |
---|
293 | 474 | { |
---|
294 | 475 | memset(cap, 0, sizeof(*cap)); |
---|
.. | .. |
---|
298 | 479 | static inline void perf_check_microcode(void) { } |
---|
299 | 480 | #endif |
---|
300 | 481 | |
---|
| 482 | +#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL) |
---|
| 483 | +extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr); |
---|
| 484 | +extern int x86_perf_get_lbr(struct x86_pmu_lbr *lbr); |
---|
| 485 | +#else |
---|
| 486 | +static inline struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr) |
---|
| 487 | +{ |
---|
| 488 | + *nr = 0; |
---|
| 489 | + return NULL; |
---|
| 490 | +} |
---|
| 491 | +static inline int x86_perf_get_lbr(struct x86_pmu_lbr *lbr) |
---|
| 492 | +{ |
---|
| 493 | + return -1; |
---|
| 494 | +} |
---|
| 495 | +#endif |
---|
| 496 | + |
---|
301 | 497 | #ifdef CONFIG_CPU_SUP_INTEL |
---|
302 | 498 | extern void intel_pt_handle_vmx(int on); |
---|
| 499 | +#else |
---|
| 500 | +static inline void intel_pt_handle_vmx(int on) |
---|
| 501 | +{ |
---|
| 502 | + |
---|
| 503 | +} |
---|
303 | 504 | #endif |
---|
304 | 505 | |
---|
305 | 506 | #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) |
---|