hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/arch/x86/include/asm/perf_event.h
....@@ -7,7 +7,7 @@
77 */
88
99 #define INTEL_PMC_MAX_GENERIC 32
10
-#define INTEL_PMC_MAX_FIXED 3
10
+#define INTEL_PMC_MAX_FIXED 4
1111 #define INTEL_PMC_IDX_FIXED 32
1212
1313 #define X86_PMC_IDX_MAX 64
....@@ -32,6 +32,8 @@
3232
3333 #define HSW_IN_TX (1ULL << 32)
3434 #define HSW_IN_TX_CHECKPOINTED (1ULL << 33)
35
+#define ICL_EVENTSEL_ADAPTIVE (1ULL << 34)
36
+#define ICL_FIXED_0_ADAPTIVE (1ULL << 32)
3537
3638 #define AMD64_EVENTSEL_INT_CORE_ENABLE (1ULL << 36)
3739 #define AMD64_EVENTSEL_GUESTONLY (1ULL << 40)
....@@ -48,11 +50,22 @@
4850
4951 #define AMD64_L3_SLICE_SHIFT 48
5052 #define AMD64_L3_SLICE_MASK \
51
- ((0xFULL) << AMD64_L3_SLICE_SHIFT)
53
+ (0xFULL << AMD64_L3_SLICE_SHIFT)
54
+#define AMD64_L3_SLICEID_MASK \
55
+ (0x7ULL << AMD64_L3_SLICE_SHIFT)
5256
5357 #define AMD64_L3_THREAD_SHIFT 56
5458 #define AMD64_L3_THREAD_MASK \
55
- ((0xFFULL) << AMD64_L3_THREAD_SHIFT)
59
+ (0xFFULL << AMD64_L3_THREAD_SHIFT)
60
+#define AMD64_L3_F19H_THREAD_MASK \
61
+ (0x3ULL << AMD64_L3_THREAD_SHIFT)
62
+
63
+#define AMD64_L3_EN_ALL_CORES BIT_ULL(47)
64
+#define AMD64_L3_EN_ALL_SLICES BIT_ULL(46)
65
+
66
+#define AMD64_L3_COREID_SHIFT 42
67
+#define AMD64_L3_COREID_MASK \
68
+ (0x7ULL << AMD64_L3_COREID_SHIFT)
5669
5770 #define X86_RAW_EVENT_MASK \
5871 (ARCH_PERFMON_EVENTSEL_EVENT | \
....@@ -87,6 +100,12 @@
87100 #define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6
88101 #define ARCH_PERFMON_EVENTS_COUNT 7
89102
103
+#define PEBS_DATACFG_MEMINFO BIT_ULL(0)
104
+#define PEBS_DATACFG_GP BIT_ULL(1)
105
+#define PEBS_DATACFG_XMMS BIT_ULL(2)
106
+#define PEBS_DATACFG_LBRS BIT_ULL(3)
107
+#define PEBS_DATACFG_LBR_SHIFT 24
108
+
90109 /*
91110 * Intel "Architectural Performance Monitoring" CPUID
92111 * detection/enumeration details:
....@@ -118,9 +137,51 @@
118137 struct {
119138 unsigned int num_counters_fixed:5;
120139 unsigned int bit_width_fixed:8;
121
- unsigned int reserved:19;
140
+ unsigned int reserved1:2;
141
+ unsigned int anythread_deprecated:1;
142
+ unsigned int reserved2:16;
122143 } split;
123144 unsigned int full;
145
+};
146
+
147
+/*
148
+ * Intel Architectural LBR CPUID detection/enumeration details:
149
+ */
150
+union cpuid28_eax {
151
+ struct {
152
+ /* Supported LBR depth values */
153
+ unsigned int lbr_depth_mask:8;
154
+ unsigned int reserved:22;
155
+ /* Deep C-state Reset */
156
+ unsigned int lbr_deep_c_reset:1;
157
+ /* IP values contain LIP */
158
+ unsigned int lbr_lip:1;
159
+ } split;
160
+ unsigned int full;
161
+};
162
+
163
+union cpuid28_ebx {
164
+ struct {
165
+ /* CPL Filtering Supported */
166
+ unsigned int lbr_cpl:1;
167
+ /* Branch Filtering Supported */
168
+ unsigned int lbr_filter:1;
169
+ /* Call-stack Mode Supported */
170
+ unsigned int lbr_call_stack:1;
171
+ } split;
172
+ unsigned int full;
173
+};
174
+
175
+union cpuid28_ecx {
176
+ struct {
177
+ /* Mispredict Bit Supported */
178
+ unsigned int lbr_mispred:1;
179
+ /* Timed LBRs Supported */
180
+ unsigned int lbr_timed_lbr:1;
181
+ /* Branch Type Field Supported */
182
+ unsigned int lbr_br_type:1;
183
+ } split;
184
+ unsigned int full;
124185 };
125186
126187 struct x86_pmu_capability {
....@@ -137,13 +198,29 @@
137198 * Fixed-purpose performance events:
138199 */
139200
201
+/* RDPMC offset for Fixed PMCs */
202
+#define INTEL_PMC_FIXED_RDPMC_BASE (1 << 30)
203
+#define INTEL_PMC_FIXED_RDPMC_METRICS (1 << 29)
204
+
140205 /*
141
- * All 3 fixed-mode PMCs are configured via this single MSR:
206
+ * All the fixed-mode PMCs are configured via this single MSR:
142207 */
143208 #define MSR_ARCH_PERFMON_FIXED_CTR_CTRL 0x38d
144209
145210 /*
146
- * The counts are available in three separate MSRs:
211
+ * There is no event-code assigned to the fixed-mode PMCs.
212
+ *
213
+ * For a fixed-mode PMC, which has an equivalent event on a general-purpose
214
+ * PMC, the event-code of the equivalent event is used for the fixed-mode PMC,
215
+ * e.g., Instr_Retired.Any and CPU_CLK_Unhalted.Core.
216
+ *
217
+ * For a fixed-mode PMC, which doesn't have an equivalent event, a
218
+ * pseudo-encoding is used, e.g., CPU_CLK_Unhalted.Ref and TOPDOWN.SLOTS.
219
+ * The pseudo event-code for a fixed-mode PMC must be 0x00.
220
+ * The pseudo umask-code is 0xX. The X equals the index of the fixed
221
+ * counter + 1, e.g., the fixed counter 2 has the pseudo-encoding 0x0300.
222
+ *
223
+ * The counts are available in separate MSRs:
147224 */
148225
149226 /* Instr_Retired.Any: */
....@@ -154,27 +231,129 @@
154231 #define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a
155232 #define INTEL_PMC_IDX_FIXED_CPU_CYCLES (INTEL_PMC_IDX_FIXED + 1)
156233
157
-/* CPU_CLK_Unhalted.Ref: */
234
+/* CPU_CLK_Unhalted.Ref: event=0x00,umask=0x3 (pseudo-encoding) */
158235 #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b
159236 #define INTEL_PMC_IDX_FIXED_REF_CYCLES (INTEL_PMC_IDX_FIXED + 2)
160237 #define INTEL_PMC_MSK_FIXED_REF_CYCLES (1ULL << INTEL_PMC_IDX_FIXED_REF_CYCLES)
161238
239
+/* TOPDOWN.SLOTS: event=0x00,umask=0x4 (pseudo-encoding) */
240
+#define MSR_ARCH_PERFMON_FIXED_CTR3 0x30c
241
+#define INTEL_PMC_IDX_FIXED_SLOTS (INTEL_PMC_IDX_FIXED + 3)
242
+#define INTEL_PMC_MSK_FIXED_SLOTS (1ULL << INTEL_PMC_IDX_FIXED_SLOTS)
243
+
162244 /*
163245 * We model BTS tracing as another fixed-mode PMC.
164246 *
165
- * We choose a value in the middle of the fixed event range, since lower
247
+ * We choose the value 47 for the fixed index of BTS, since lower
166248 * values are used by actual fixed events and higher values are used
167249 * to indicate other overflow conditions in the PERF_GLOBAL_STATUS msr.
168250 */
169
-#define INTEL_PMC_IDX_FIXED_BTS (INTEL_PMC_IDX_FIXED + 16)
251
+#define INTEL_PMC_IDX_FIXED_BTS (INTEL_PMC_IDX_FIXED + 15)
170252
171
-#define GLOBAL_STATUS_COND_CHG BIT_ULL(63)
172
-#define GLOBAL_STATUS_BUFFER_OVF BIT_ULL(62)
173
-#define GLOBAL_STATUS_UNC_OVF BIT_ULL(61)
174
-#define GLOBAL_STATUS_ASIF BIT_ULL(60)
175
-#define GLOBAL_STATUS_COUNTERS_FROZEN BIT_ULL(59)
176
-#define GLOBAL_STATUS_LBRS_FROZEN BIT_ULL(58)
177
-#define GLOBAL_STATUS_TRACE_TOPAPMI BIT_ULL(55)
253
+/*
254
+ * The PERF_METRICS MSR is modeled as several magic fixed-mode PMCs, one for
255
+ * each TopDown metric event.
256
+ *
257
+ * Internally the TopDown metric events are mapped to the FxCtr 3 (SLOTS).
258
+ */
259
+#define INTEL_PMC_IDX_METRIC_BASE (INTEL_PMC_IDX_FIXED + 16)
260
+#define INTEL_PMC_IDX_TD_RETIRING (INTEL_PMC_IDX_METRIC_BASE + 0)
261
+#define INTEL_PMC_IDX_TD_BAD_SPEC (INTEL_PMC_IDX_METRIC_BASE + 1)
262
+#define INTEL_PMC_IDX_TD_FE_BOUND (INTEL_PMC_IDX_METRIC_BASE + 2)
263
+#define INTEL_PMC_IDX_TD_BE_BOUND (INTEL_PMC_IDX_METRIC_BASE + 3)
264
+#define INTEL_PMC_IDX_METRIC_END INTEL_PMC_IDX_TD_BE_BOUND
265
+#define INTEL_PMC_MSK_TOPDOWN ((0xfull << INTEL_PMC_IDX_METRIC_BASE) | \
266
+ INTEL_PMC_MSK_FIXED_SLOTS)
267
+
268
+/*
269
+ * There is no event-code assigned to the TopDown events.
270
+ *
271
+ * For the slots event, use the pseudo code of the fixed counter 3.
272
+ *
273
+ * For the metric events, the pseudo event-code is 0x00.
274
+ * The pseudo umask-code starts from the middle of the pseudo event
275
+ * space, 0x80.
276
+ */
277
+#define INTEL_TD_SLOTS 0x0400 /* TOPDOWN.SLOTS */
278
+/* Level 1 metrics */
279
+#define INTEL_TD_METRIC_RETIRING 0x8000 /* Retiring metric */
280
+#define INTEL_TD_METRIC_BAD_SPEC 0x8100 /* Bad speculation metric */
281
+#define INTEL_TD_METRIC_FE_BOUND 0x8200 /* FE bound metric */
282
+#define INTEL_TD_METRIC_BE_BOUND 0x8300 /* BE bound metric */
283
+#define INTEL_TD_METRIC_MAX INTEL_TD_METRIC_BE_BOUND
284
+#define INTEL_TD_METRIC_NUM 4
285
+
286
+static inline bool is_metric_idx(int idx)
287
+{
288
+ return (unsigned)(idx - INTEL_PMC_IDX_METRIC_BASE) < INTEL_TD_METRIC_NUM;
289
+}
290
+
291
+static inline bool is_topdown_idx(int idx)
292
+{
293
+ return is_metric_idx(idx) || idx == INTEL_PMC_IDX_FIXED_SLOTS;
294
+}
295
+
296
+#define INTEL_PMC_OTHER_TOPDOWN_BITS(bit) \
297
+ (~(0x1ull << bit) & INTEL_PMC_MSK_TOPDOWN)
298
+
299
+#define GLOBAL_STATUS_COND_CHG BIT_ULL(63)
300
+#define GLOBAL_STATUS_BUFFER_OVF_BIT 62
301
+#define GLOBAL_STATUS_BUFFER_OVF BIT_ULL(GLOBAL_STATUS_BUFFER_OVF_BIT)
302
+#define GLOBAL_STATUS_UNC_OVF BIT_ULL(61)
303
+#define GLOBAL_STATUS_ASIF BIT_ULL(60)
304
+#define GLOBAL_STATUS_COUNTERS_FROZEN BIT_ULL(59)
305
+#define GLOBAL_STATUS_LBRS_FROZEN_BIT 58
306
+#define GLOBAL_STATUS_LBRS_FROZEN BIT_ULL(GLOBAL_STATUS_LBRS_FROZEN_BIT)
307
+#define GLOBAL_STATUS_TRACE_TOPAPMI_BIT 55
308
+#define GLOBAL_STATUS_TRACE_TOPAPMI BIT_ULL(GLOBAL_STATUS_TRACE_TOPAPMI_BIT)
309
+#define GLOBAL_STATUS_PERF_METRICS_OVF_BIT 48
310
+
311
+#define GLOBAL_CTRL_EN_PERF_METRICS 48
312
+/*
313
+ * We model guest LBR event tracing as another fixed-mode PMC like BTS.
314
+ *
315
+ * We choose bit 58 because it's used to indicate LBR stack frozen state
316
+ * for architectural perfmon v4, also we unconditionally mask that bit in
317
+ * the handle_pmi_common(), so it'll never be set in the overflow handling.
318
+ *
319
+ * With this fake counter assigned, the guest LBR event user (such as KVM),
320
+ * can program the LBR registers on its own, and we don't actually do anything
321
+ * with then in the host context.
322
+ */
323
+#define INTEL_PMC_IDX_FIXED_VLBR (GLOBAL_STATUS_LBRS_FROZEN_BIT)
324
+
325
+/*
326
+ * Pseudo-encoding the guest LBR event as event=0x00,umask=0x1b,
327
+ * since it would claim bit 58 which is effectively Fixed26.
328
+ */
329
+#define INTEL_FIXED_VLBR_EVENT 0x1b00
330
+
331
+/*
332
+ * Adaptive PEBS v4
333
+ */
334
+
335
+struct pebs_basic {
336
+ u64 format_size;
337
+ u64 ip;
338
+ u64 applicable_counters;
339
+ u64 tsc;
340
+};
341
+
342
+struct pebs_meminfo {
343
+ u64 address;
344
+ u64 aux;
345
+ u64 latency;
346
+ u64 tsx_tuning;
347
+};
348
+
349
+struct pebs_gprs {
350
+ u64 flags, ip, ax, cx, dx, bx, sp, bp, si, di;
351
+ u64 r8, r9, r10, r11, r12, r13, r14, r15;
352
+};
353
+
354
+struct pebs_xmm {
355
+ u64 xmm[16*2]; /* two entries for each register */
356
+};
178357
179358 /*
180359 * IBS cpuid feature detection
....@@ -228,6 +407,7 @@
228407 #define IBS_OP_ENABLE (1ULL<<17)
229408 #define IBS_OP_MAX_CNT 0x0000FFFFULL
230409 #define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */
410
+#define IBS_OP_MAX_CNT_EXT_MASK (0x7FULL<<20) /* separate upper 7 bits */
231411 #define IBS_RIP_INVALID (1ULL<<38)
232412
233413 #ifdef CONFIG_X86_LOCAL_APIC
....@@ -252,6 +432,11 @@
252432 #define PERF_EFLAGS_VM (1UL << 5)
253433
254434 struct pt_regs;
435
+struct x86_perf_regs {
436
+ struct pt_regs regs;
437
+ u64 *xmm_regs;
438
+};
439
+
255440 extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
256441 extern unsigned long perf_misc_flags(struct pt_regs *regs);
257442 #define perf_misc_flags(regs) perf_misc_flags(regs)
....@@ -264,14 +449,9 @@
264449 */
265450 #define perf_arch_fetch_caller_regs(regs, __ip) { \
266451 (regs)->ip = (__ip); \
267
- (regs)->bp = caller_frame_pointer(); \
452
+ (regs)->sp = (unsigned long)__builtin_frame_address(0); \
268453 (regs)->cs = __KERNEL_CS; \
269454 regs->flags = 0; \
270
- asm volatile( \
271
- _ASM_MOV "%%"_ASM_SP ", %0\n" \
272
- : "=m" ((regs)->sp) \
273
- :: "memory" \
274
- ); \
275455 }
276456
277457 struct perf_guest_switch_msr {
....@@ -279,16 +459,17 @@
279459 u64 host, guest;
280460 };
281461
282
-extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
462
+struct x86_pmu_lbr {
463
+ unsigned int nr;
464
+ unsigned int from;
465
+ unsigned int to;
466
+ unsigned int info;
467
+};
468
+
283469 extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
284470 extern void perf_check_microcode(void);
471
+extern int x86_perf_rdpmc_index(struct perf_event *event);
285472 #else
286
-static inline struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
287
-{
288
- *nr = 0;
289
- return NULL;
290
-}
291
-
292473 static inline void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
293474 {
294475 memset(cap, 0, sizeof(*cap));
....@@ -298,8 +479,28 @@
298479 static inline void perf_check_microcode(void) { }
299480 #endif
300481
482
+#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
483
+extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
484
+extern int x86_perf_get_lbr(struct x86_pmu_lbr *lbr);
485
+#else
486
+static inline struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
487
+{
488
+ *nr = 0;
489
+ return NULL;
490
+}
491
+static inline int x86_perf_get_lbr(struct x86_pmu_lbr *lbr)
492
+{
493
+ return -1;
494
+}
495
+#endif
496
+
301497 #ifdef CONFIG_CPU_SUP_INTEL
302498 extern void intel_pt_handle_vmx(int on);
499
+#else
500
+static inline void intel_pt_handle_vmx(int on)
501
+{
502
+
503
+}
303504 #endif
304505
305506 #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)