hc
2024-01-03 2f7c68cb55ecb7331f2381deb497c27155f32faf
kernel/arch/x86/events/intel/lbr.c
....@@ -8,17 +8,6 @@
88
99 #include "../perf_event.h"
1010
11
-enum {
12
- LBR_FORMAT_32 = 0x00,
13
- LBR_FORMAT_LIP = 0x01,
14
- LBR_FORMAT_EIP = 0x02,
15
- LBR_FORMAT_EIP_FLAGS = 0x03,
16
- LBR_FORMAT_EIP_FLAGS2 = 0x04,
17
- LBR_FORMAT_INFO = 0x05,
18
- LBR_FORMAT_TIME = 0x06,
19
- LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_TIME,
20
-};
21
-
2211 static const enum {
2312 LBR_EIP_FLAGS = 1,
2413 LBR_TSX = 2,
....@@ -143,7 +132,53 @@
143132 X86_BR_IRQ |\
144133 X86_BR_INT)
145134
135
+/*
136
+ * Intel LBR_CTL bits
137
+ *
138
+ * Hardware branch filter for Arch LBR
139
+ */
140
+#define ARCH_LBR_KERNEL_BIT 1 /* capture at ring0 */
141
+#define ARCH_LBR_USER_BIT 2 /* capture at ring > 0 */
142
+#define ARCH_LBR_CALL_STACK_BIT 3 /* enable call stack */
143
+#define ARCH_LBR_JCC_BIT 16 /* capture conditional branches */
144
+#define ARCH_LBR_REL_JMP_BIT 17 /* capture relative jumps */
145
+#define ARCH_LBR_IND_JMP_BIT 18 /* capture indirect jumps */
146
+#define ARCH_LBR_REL_CALL_BIT 19 /* capture relative calls */
147
+#define ARCH_LBR_IND_CALL_BIT 20 /* capture indirect calls */
148
+#define ARCH_LBR_RETURN_BIT 21 /* capture near returns */
149
+#define ARCH_LBR_OTHER_BRANCH_BIT 22 /* capture other branches */
150
+
151
+#define ARCH_LBR_KERNEL (1ULL << ARCH_LBR_KERNEL_BIT)
152
+#define ARCH_LBR_USER (1ULL << ARCH_LBR_USER_BIT)
153
+#define ARCH_LBR_CALL_STACK (1ULL << ARCH_LBR_CALL_STACK_BIT)
154
+#define ARCH_LBR_JCC (1ULL << ARCH_LBR_JCC_BIT)
155
+#define ARCH_LBR_REL_JMP (1ULL << ARCH_LBR_REL_JMP_BIT)
156
+#define ARCH_LBR_IND_JMP (1ULL << ARCH_LBR_IND_JMP_BIT)
157
+#define ARCH_LBR_REL_CALL (1ULL << ARCH_LBR_REL_CALL_BIT)
158
+#define ARCH_LBR_IND_CALL (1ULL << ARCH_LBR_IND_CALL_BIT)
159
+#define ARCH_LBR_RETURN (1ULL << ARCH_LBR_RETURN_BIT)
160
+#define ARCH_LBR_OTHER_BRANCH (1ULL << ARCH_LBR_OTHER_BRANCH_BIT)
161
+
162
+#define ARCH_LBR_ANY \
163
+ (ARCH_LBR_JCC |\
164
+ ARCH_LBR_REL_JMP |\
165
+ ARCH_LBR_IND_JMP |\
166
+ ARCH_LBR_REL_CALL |\
167
+ ARCH_LBR_IND_CALL |\
168
+ ARCH_LBR_RETURN |\
169
+ ARCH_LBR_OTHER_BRANCH)
170
+
171
+#define ARCH_LBR_CTL_MASK 0x7f000e
172
+
146173 static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc);
174
+
175
+static __always_inline bool is_lbr_call_stack_bit_set(u64 config)
176
+{
177
+ if (static_cpu_has(X86_FEATURE_ARCH_LBR))
178
+ return !!(config & ARCH_LBR_CALL_STACK);
179
+
180
+ return !!(config & LBR_CALL_STACK);
181
+}
147182
148183 /*
149184 * We only support LBR implementations that have FREEZE_LBRS_ON_PMI
....@@ -168,33 +203,46 @@
168203 */
169204 if (cpuc->lbr_sel)
170205 lbr_select = cpuc->lbr_sel->config & x86_pmu.lbr_sel_mask;
171
- if (!pmi && cpuc->lbr_sel)
206
+ if (!static_cpu_has(X86_FEATURE_ARCH_LBR) && !pmi && cpuc->lbr_sel)
172207 wrmsrl(MSR_LBR_SELECT, lbr_select);
173208
174209 rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
175210 orig_debugctl = debugctl;
176
- debugctl |= DEBUGCTLMSR_LBR;
211
+
212
+ if (!static_cpu_has(X86_FEATURE_ARCH_LBR))
213
+ debugctl |= DEBUGCTLMSR_LBR;
177214 /*
178215 * LBR callstack does not work well with FREEZE_LBRS_ON_PMI.
179216 * If FREEZE_LBRS_ON_PMI is set, PMI near call/return instructions
180217 * may cause superfluous increase/decrease of LBR_TOS.
181218 */
182
- if (!(lbr_select & LBR_CALL_STACK))
219
+ if (is_lbr_call_stack_bit_set(lbr_select))
220
+ debugctl &= ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
221
+ else
183222 debugctl |= DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
223
+
184224 if (orig_debugctl != debugctl)
185225 wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
226
+
227
+ if (static_cpu_has(X86_FEATURE_ARCH_LBR))
228
+ wrmsrl(MSR_ARCH_LBR_CTL, lbr_select | ARCH_LBR_CTL_LBREN);
186229 }
187230
188231 static void __intel_pmu_lbr_disable(void)
189232 {
190233 u64 debugctl;
191234
235
+ if (static_cpu_has(X86_FEATURE_ARCH_LBR)) {
236
+ wrmsrl(MSR_ARCH_LBR_CTL, 0);
237
+ return;
238
+ }
239
+
192240 rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
193241 debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
194242 wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
195243 }
196244
197
-static void intel_pmu_lbr_reset_32(void)
245
+void intel_pmu_lbr_reset_32(void)
198246 {
199247 int i;
200248
....@@ -202,7 +250,7 @@
202250 wrmsrl(x86_pmu.lbr_from + i, 0);
203251 }
204252
205
-static void intel_pmu_lbr_reset_64(void)
253
+void intel_pmu_lbr_reset_64(void)
206254 {
207255 int i;
208256
....@@ -210,8 +258,14 @@
210258 wrmsrl(x86_pmu.lbr_from + i, 0);
211259 wrmsrl(x86_pmu.lbr_to + i, 0);
212260 if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
213
- wrmsrl(MSR_LBR_INFO_0 + i, 0);
261
+ wrmsrl(x86_pmu.lbr_info + i, 0);
214262 }
263
+}
264
+
265
+static void intel_pmu_arch_lbr_reset(void)
266
+{
267
+ /* Write to ARCH_LBR_DEPTH MSR, all LBR entries are reset to 0 */
268
+ wrmsrl(MSR_ARCH_LBR_DEPTH, x86_pmu.lbr_nr);
215269 }
216270
217271 void intel_pmu_lbr_reset(void)
....@@ -221,10 +275,7 @@
221275 if (!x86_pmu.lbr_nr)
222276 return;
223277
224
- if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
225
- intel_pmu_lbr_reset_32();
226
- else
227
- intel_pmu_lbr_reset_64();
278
+ x86_pmu.lbr_reset();
228279
229280 cpuc->last_task_ctx = NULL;
230281 cpuc->last_log_id = 0;
....@@ -273,7 +324,7 @@
273324 return !tsx_support && (lbr_desc[lbr_format] & LBR_TSX);
274325 }
275326
276
-DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key);
327
+static DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key);
277328
278329 /* If quirk is enabled, ensure sign extension is 63 bits: */
279330 inline u64 lbr_from_signext_quirk_wr(u64 val)
....@@ -308,69 +359,97 @@
308359 return val;
309360 }
310361
311
-static inline void wrlbr_from(unsigned int idx, u64 val)
362
+static __always_inline void wrlbr_from(unsigned int idx, u64 val)
312363 {
313364 val = lbr_from_signext_quirk_wr(val);
314365 wrmsrl(x86_pmu.lbr_from + idx, val);
315366 }
316367
317
-static inline void wrlbr_to(unsigned int idx, u64 val)
368
+static __always_inline void wrlbr_to(unsigned int idx, u64 val)
318369 {
319370 wrmsrl(x86_pmu.lbr_to + idx, val);
320371 }
321372
322
-static inline u64 rdlbr_from(unsigned int idx)
373
+static __always_inline void wrlbr_info(unsigned int idx, u64 val)
374
+{
375
+ wrmsrl(x86_pmu.lbr_info + idx, val);
376
+}
377
+
378
+static __always_inline u64 rdlbr_from(unsigned int idx, struct lbr_entry *lbr)
323379 {
324380 u64 val;
381
+
382
+ if (lbr)
383
+ return lbr->from;
325384
326385 rdmsrl(x86_pmu.lbr_from + idx, val);
327386
328387 return lbr_from_signext_quirk_rd(val);
329388 }
330389
331
-static inline u64 rdlbr_to(unsigned int idx)
390
+static __always_inline u64 rdlbr_to(unsigned int idx, struct lbr_entry *lbr)
332391 {
333392 u64 val;
393
+
394
+ if (lbr)
395
+ return lbr->to;
334396
335397 rdmsrl(x86_pmu.lbr_to + idx, val);
336398
337399 return val;
338400 }
339401
340
-static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
402
+static __always_inline u64 rdlbr_info(unsigned int idx, struct lbr_entry *lbr)
341403 {
404
+ u64 val;
405
+
406
+ if (lbr)
407
+ return lbr->info;
408
+
409
+ rdmsrl(x86_pmu.lbr_info + idx, val);
410
+
411
+ return val;
412
+}
413
+
414
+static inline void
415
+wrlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info)
416
+{
417
+ wrlbr_from(idx, lbr->from);
418
+ wrlbr_to(idx, lbr->to);
419
+ if (need_info)
420
+ wrlbr_info(idx, lbr->info);
421
+}
422
+
423
+static inline bool
424
+rdlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info)
425
+{
426
+ u64 from = rdlbr_from(idx, NULL);
427
+
428
+ /* Don't read invalid entry */
429
+ if (!from)
430
+ return false;
431
+
432
+ lbr->from = from;
433
+ lbr->to = rdlbr_to(idx, NULL);
434
+ if (need_info)
435
+ lbr->info = rdlbr_info(idx, NULL);
436
+
437
+ return true;
438
+}
439
+
440
+void intel_pmu_lbr_restore(void *ctx)
441
+{
442
+ bool need_info = x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO;
342443 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
444
+ struct x86_perf_task_context *task_ctx = ctx;
343445 int i;
344446 unsigned lbr_idx, mask;
345
- u64 tos;
346
-
347
- if (task_ctx->lbr_callstack_users == 0 ||
348
- task_ctx->lbr_stack_state == LBR_NONE) {
349
- intel_pmu_lbr_reset();
350
- return;
351
- }
352
-
353
- tos = task_ctx->tos;
354
- /*
355
- * Does not restore the LBR registers, if
356
- * - No one else touched them, and
357
- * - Did not enter C6
358
- */
359
- if ((task_ctx == cpuc->last_task_ctx) &&
360
- (task_ctx->log_id == cpuc->last_log_id) &&
361
- rdlbr_from(tos)) {
362
- task_ctx->lbr_stack_state = LBR_NONE;
363
- return;
364
- }
447
+ u64 tos = task_ctx->tos;
365448
366449 mask = x86_pmu.lbr_nr - 1;
367450 for (i = 0; i < task_ctx->valid_lbrs; i++) {
368451 lbr_idx = (tos - i) & mask;
369
- wrlbr_from(lbr_idx, task_ctx->lbr_from[i]);
370
- wrlbr_to (lbr_idx, task_ctx->lbr_to[i]);
371
-
372
- if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
373
- wrmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
452
+ wrlbr_all(&task_ctx->lbr[i], lbr_idx, need_info);
374453 }
375454
376455 for (; i < x86_pmu.lbr_nr; i++) {
....@@ -378,49 +457,172 @@
378457 wrlbr_from(lbr_idx, 0);
379458 wrlbr_to(lbr_idx, 0);
380459 if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
381
- wrmsrl(MSR_LBR_INFO_0 + lbr_idx, 0);
460
+ wrlbr_info(lbr_idx, 0);
382461 }
383462
384463 wrmsrl(x86_pmu.lbr_tos, tos);
385
- task_ctx->lbr_stack_state = LBR_NONE;
464
+
465
+ if (cpuc->lbr_select)
466
+ wrmsrl(MSR_LBR_SELECT, task_ctx->lbr_sel);
386467 }
387468
388
-static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
469
+static void intel_pmu_arch_lbr_restore(void *ctx)
389470 {
390
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
391
- unsigned lbr_idx, mask;
392
- u64 tos, from;
471
+ struct x86_perf_task_context_arch_lbr *task_ctx = ctx;
472
+ struct lbr_entry *entries = task_ctx->entries;
393473 int i;
394474
395
- if (task_ctx->lbr_callstack_users == 0) {
396
- task_ctx->lbr_stack_state = LBR_NONE;
475
+ /* Fast reset the LBRs before restore if the call stack is not full. */
476
+ if (!entries[x86_pmu.lbr_nr - 1].from)
477
+ intel_pmu_arch_lbr_reset();
478
+
479
+ for (i = 0; i < x86_pmu.lbr_nr; i++) {
480
+ if (!entries[i].from)
481
+ break;
482
+ wrlbr_all(&entries[i], i, true);
483
+ }
484
+}
485
+
486
+/*
487
+ * Restore the Architecture LBR state from the xsave area in the perf
488
+ * context data for the task via the XRSTORS instruction.
489
+ */
490
+static void intel_pmu_arch_lbr_xrstors(void *ctx)
491
+{
492
+ struct x86_perf_task_context_arch_lbr_xsave *task_ctx = ctx;
493
+
494
+ copy_kernel_to_dynamic_supervisor(&task_ctx->xsave, XFEATURE_MASK_LBR);
495
+}
496
+
497
+static __always_inline bool lbr_is_reset_in_cstate(void *ctx)
498
+{
499
+ if (static_cpu_has(X86_FEATURE_ARCH_LBR))
500
+ return x86_pmu.lbr_deep_c_reset && !rdlbr_from(0, NULL);
501
+
502
+ return !rdlbr_from(((struct x86_perf_task_context *)ctx)->tos, NULL);
503
+}
504
+
505
+static void __intel_pmu_lbr_restore(void *ctx)
506
+{
507
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
508
+
509
+ if (task_context_opt(ctx)->lbr_callstack_users == 0 ||
510
+ task_context_opt(ctx)->lbr_stack_state == LBR_NONE) {
511
+ intel_pmu_lbr_reset();
397512 return;
398513 }
514
+
515
+ /*
516
+ * Does not restore the LBR registers, if
517
+ * - No one else touched them, and
518
+ * - Was not cleared in Cstate
519
+ */
520
+ if ((ctx == cpuc->last_task_ctx) &&
521
+ (task_context_opt(ctx)->log_id == cpuc->last_log_id) &&
522
+ !lbr_is_reset_in_cstate(ctx)) {
523
+ task_context_opt(ctx)->lbr_stack_state = LBR_NONE;
524
+ return;
525
+ }
526
+
527
+ x86_pmu.lbr_restore(ctx);
528
+
529
+ task_context_opt(ctx)->lbr_stack_state = LBR_NONE;
530
+}
531
+
532
+void intel_pmu_lbr_save(void *ctx)
533
+{
534
+ bool need_info = x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO;
535
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
536
+ struct x86_perf_task_context *task_ctx = ctx;
537
+ unsigned lbr_idx, mask;
538
+ u64 tos;
539
+ int i;
399540
400541 mask = x86_pmu.lbr_nr - 1;
401542 tos = intel_pmu_lbr_tos();
402543 for (i = 0; i < x86_pmu.lbr_nr; i++) {
403544 lbr_idx = (tos - i) & mask;
404
- from = rdlbr_from(lbr_idx);
405
- if (!from)
545
+ if (!rdlbr_all(&task_ctx->lbr[i], lbr_idx, need_info))
406546 break;
407
- task_ctx->lbr_from[i] = from;
408
- task_ctx->lbr_to[i] = rdlbr_to(lbr_idx);
409
- if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
410
- rdmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
411547 }
412548 task_ctx->valid_lbrs = i;
413549 task_ctx->tos = tos;
414
- task_ctx->lbr_stack_state = LBR_VALID;
415550
416
- cpuc->last_task_ctx = task_ctx;
417
- cpuc->last_log_id = ++task_ctx->log_id;
551
+ if (cpuc->lbr_select)
552
+ rdmsrl(MSR_LBR_SELECT, task_ctx->lbr_sel);
553
+}
554
+
555
+static void intel_pmu_arch_lbr_save(void *ctx)
556
+{
557
+ struct x86_perf_task_context_arch_lbr *task_ctx = ctx;
558
+ struct lbr_entry *entries = task_ctx->entries;
559
+ int i;
560
+
561
+ for (i = 0; i < x86_pmu.lbr_nr; i++) {
562
+ if (!rdlbr_all(&entries[i], i, true))
563
+ break;
564
+ }
565
+
566
+ /* LBR call stack is not full. Reset is required in restore. */
567
+ if (i < x86_pmu.lbr_nr)
568
+ entries[x86_pmu.lbr_nr - 1].from = 0;
569
+}
570
+
571
+/*
572
+ * Save the Architecture LBR state to the xsave area in the perf
573
+ * context data for the task via the XSAVES instruction.
574
+ */
575
+static void intel_pmu_arch_lbr_xsaves(void *ctx)
576
+{
577
+ struct x86_perf_task_context_arch_lbr_xsave *task_ctx = ctx;
578
+
579
+ copy_dynamic_supervisor_to_kernel(&task_ctx->xsave, XFEATURE_MASK_LBR);
580
+}
581
+
582
+static void __intel_pmu_lbr_save(void *ctx)
583
+{
584
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
585
+
586
+ if (task_context_opt(ctx)->lbr_callstack_users == 0) {
587
+ task_context_opt(ctx)->lbr_stack_state = LBR_NONE;
588
+ return;
589
+ }
590
+
591
+ x86_pmu.lbr_save(ctx);
592
+
593
+ task_context_opt(ctx)->lbr_stack_state = LBR_VALID;
594
+
595
+ cpuc->last_task_ctx = ctx;
596
+ cpuc->last_log_id = ++task_context_opt(ctx)->log_id;
597
+}
598
+
599
+void intel_pmu_lbr_swap_task_ctx(struct perf_event_context *prev,
600
+ struct perf_event_context *next)
601
+{
602
+ void *prev_ctx_data, *next_ctx_data;
603
+
604
+ swap(prev->task_ctx_data, next->task_ctx_data);
605
+
606
+ /*
607
+ * Architecture specific synchronization makes sense in
608
+ * case both prev->task_ctx_data and next->task_ctx_data
609
+ * pointers are allocated.
610
+ */
611
+
612
+ prev_ctx_data = next->task_ctx_data;
613
+ next_ctx_data = prev->task_ctx_data;
614
+
615
+ if (!prev_ctx_data || !next_ctx_data)
616
+ return;
617
+
618
+ swap(task_context_opt(prev_ctx_data)->lbr_callstack_users,
619
+ task_context_opt(next_ctx_data)->lbr_callstack_users);
418620 }
419621
420622 void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
421623 {
422624 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
423
- struct x86_perf_task_context *task_ctx;
625
+ void *task_ctx;
424626
425627 if (!cpuc->lbr_users)
426628 return;
....@@ -457,17 +659,17 @@
457659 void intel_pmu_lbr_add(struct perf_event *event)
458660 {
459661 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
460
- struct x86_perf_task_context *task_ctx;
461662
462663 if (!x86_pmu.lbr_nr)
463664 return;
464665
666
+ if (event->hw.flags & PERF_X86_EVENT_LBR_SELECT)
667
+ cpuc->lbr_select = 1;
668
+
465669 cpuc->br_sel = event->hw.branch_reg.reg;
466670
467
- if (branch_user_callstack(cpuc->br_sel) && event->ctx->task_ctx_data) {
468
- task_ctx = event->ctx->task_ctx_data;
469
- task_ctx->lbr_callstack_users++;
470
- }
671
+ if (branch_user_callstack(cpuc->br_sel) && event->ctx->task_ctx_data)
672
+ task_context_opt(event->ctx->task_ctx_data)->lbr_callstack_users++;
471673
472674 /*
473675 * Request pmu::sched_task() callback, which will fire inside the
....@@ -488,35 +690,88 @@
488690 * be 'new'. Conversely, a new event can get installed through the
489691 * context switch path for the first time.
490692 */
693
+ if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip > 0)
694
+ cpuc->lbr_pebs_users++;
491695 perf_sched_cb_inc(event->ctx->pmu);
492696 if (!cpuc->lbr_users++ && !event->total_time_running)
493697 intel_pmu_lbr_reset();
494698 }
495699
700
+void release_lbr_buffers(void)
701
+{
702
+ struct kmem_cache *kmem_cache;
703
+ struct cpu_hw_events *cpuc;
704
+ int cpu;
705
+
706
+ if (!static_cpu_has(X86_FEATURE_ARCH_LBR))
707
+ return;
708
+
709
+ for_each_possible_cpu(cpu) {
710
+ cpuc = per_cpu_ptr(&cpu_hw_events, cpu);
711
+ kmem_cache = x86_get_pmu(cpu)->task_ctx_cache;
712
+ if (kmem_cache && cpuc->lbr_xsave) {
713
+ kmem_cache_free(kmem_cache, cpuc->lbr_xsave);
714
+ cpuc->lbr_xsave = NULL;
715
+ }
716
+ }
717
+}
718
+
719
+void reserve_lbr_buffers(void)
720
+{
721
+ struct kmem_cache *kmem_cache;
722
+ struct cpu_hw_events *cpuc;
723
+ int cpu;
724
+
725
+ if (!static_cpu_has(X86_FEATURE_ARCH_LBR))
726
+ return;
727
+
728
+ for_each_possible_cpu(cpu) {
729
+ cpuc = per_cpu_ptr(&cpu_hw_events, cpu);
730
+ kmem_cache = x86_get_pmu(cpu)->task_ctx_cache;
731
+ if (!kmem_cache || cpuc->lbr_xsave)
732
+ continue;
733
+
734
+ cpuc->lbr_xsave = kmem_cache_alloc_node(kmem_cache,
735
+ GFP_KERNEL | __GFP_ZERO,
736
+ cpu_to_node(cpu));
737
+ }
738
+}
739
+
496740 void intel_pmu_lbr_del(struct perf_event *event)
497741 {
498742 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
499
- struct x86_perf_task_context *task_ctx;
500743
501744 if (!x86_pmu.lbr_nr)
502745 return;
503746
504747 if (branch_user_callstack(cpuc->br_sel) &&
505
- event->ctx->task_ctx_data) {
506
- task_ctx = event->ctx->task_ctx_data;
507
- task_ctx->lbr_callstack_users--;
508
- }
748
+ event->ctx->task_ctx_data)
749
+ task_context_opt(event->ctx->task_ctx_data)->lbr_callstack_users--;
509750
751
+ if (event->hw.flags & PERF_X86_EVENT_LBR_SELECT)
752
+ cpuc->lbr_select = 0;
753
+
754
+ if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip > 0)
755
+ cpuc->lbr_pebs_users--;
510756 cpuc->lbr_users--;
511757 WARN_ON_ONCE(cpuc->lbr_users < 0);
758
+ WARN_ON_ONCE(cpuc->lbr_pebs_users < 0);
512759 perf_sched_cb_dec(event->ctx->pmu);
760
+}
761
+
762
+static inline bool vlbr_exclude_host(void)
763
+{
764
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
765
+
766
+ return test_bit(INTEL_PMC_IDX_FIXED_VLBR,
767
+ (unsigned long *)&cpuc->intel_ctrl_guest_mask);
513768 }
514769
515770 void intel_pmu_lbr_enable_all(bool pmi)
516771 {
517772 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
518773
519
- if (cpuc->lbr_users)
774
+ if (cpuc->lbr_users && !vlbr_exclude_host())
520775 __intel_pmu_lbr_enable(pmi);
521776 }
522777
....@@ -524,11 +779,11 @@
524779 {
525780 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
526781
527
- if (cpuc->lbr_users)
782
+ if (cpuc->lbr_users && !vlbr_exclude_host())
528783 __intel_pmu_lbr_disable();
529784 }
530785
531
-static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
786
+void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
532787 {
533788 unsigned long mask = x86_pmu.lbr_nr - 1;
534789 u64 tos = intel_pmu_lbr_tos();
....@@ -557,6 +812,7 @@
557812 cpuc->lbr_entries[i].reserved = 0;
558813 }
559814 cpuc->lbr_stack.nr = i;
815
+ cpuc->lbr_stack.hw_idx = tos;
560816 }
561817
562818 /*
....@@ -564,7 +820,7 @@
564820 * is the same as the linear address, allowing us to merge the LIP and EIP
565821 * LBR formats.
566822 */
567
-static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
823
+void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
568824 {
569825 bool need_info = false, call_stack = false;
570826 unsigned long mask = x86_pmu.lbr_nr - 1;
....@@ -587,8 +843,8 @@
587843 u16 cycles = 0;
588844 int lbr_flags = lbr_desc[lbr_format];
589845
590
- from = rdlbr_from(lbr_idx);
591
- to = rdlbr_to(lbr_idx);
846
+ from = rdlbr_from(lbr_idx, NULL);
847
+ to = rdlbr_to(lbr_idx, NULL);
592848
593849 /*
594850 * Read LBR call stack entries
....@@ -600,7 +856,7 @@
600856 if (lbr_format == LBR_FORMAT_INFO && need_info) {
601857 u64 info;
602858
603
- rdmsrl(MSR_LBR_INFO_0 + lbr_idx, info);
859
+ info = rdlbr_info(lbr_idx, NULL);
604860 mis = !!(info & LBR_INFO_MISPRED);
605861 pred = !mis;
606862 in_tx = !!(info & LBR_INFO_IN_TX);
....@@ -652,19 +908,111 @@
652908 out++;
653909 }
654910 cpuc->lbr_stack.nr = out;
911
+ cpuc->lbr_stack.hw_idx = tos;
912
+}
913
+
914
+static __always_inline int get_lbr_br_type(u64 info)
915
+{
916
+ if (!static_cpu_has(X86_FEATURE_ARCH_LBR) || !x86_pmu.lbr_br_type)
917
+ return 0;
918
+
919
+ return (info & LBR_INFO_BR_TYPE) >> LBR_INFO_BR_TYPE_OFFSET;
920
+}
921
+
922
+static __always_inline bool get_lbr_mispred(u64 info)
923
+{
924
+ if (static_cpu_has(X86_FEATURE_ARCH_LBR) && !x86_pmu.lbr_mispred)
925
+ return 0;
926
+
927
+ return !!(info & LBR_INFO_MISPRED);
928
+}
929
+
930
+static __always_inline bool get_lbr_predicted(u64 info)
931
+{
932
+ if (static_cpu_has(X86_FEATURE_ARCH_LBR) && !x86_pmu.lbr_mispred)
933
+ return 0;
934
+
935
+ return !(info & LBR_INFO_MISPRED);
936
+}
937
+
938
+static __always_inline u16 get_lbr_cycles(u64 info)
939
+{
940
+ if (static_cpu_has(X86_FEATURE_ARCH_LBR) &&
941
+ !(x86_pmu.lbr_timed_lbr && info & LBR_INFO_CYC_CNT_VALID))
942
+ return 0;
943
+
944
+ return info & LBR_INFO_CYCLES;
945
+}
946
+
947
+static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc,
948
+ struct lbr_entry *entries)
949
+{
950
+ struct perf_branch_entry *e;
951
+ struct lbr_entry *lbr;
952
+ u64 from, to, info;
953
+ int i;
954
+
955
+ for (i = 0; i < x86_pmu.lbr_nr; i++) {
956
+ lbr = entries ? &entries[i] : NULL;
957
+ e = &cpuc->lbr_entries[i];
958
+
959
+ from = rdlbr_from(i, lbr);
960
+ /*
961
+ * Read LBR entries until invalid entry (0s) is detected.
962
+ */
963
+ if (!from)
964
+ break;
965
+
966
+ to = rdlbr_to(i, lbr);
967
+ info = rdlbr_info(i, lbr);
968
+
969
+ e->from = from;
970
+ e->to = to;
971
+ e->mispred = get_lbr_mispred(info);
972
+ e->predicted = get_lbr_predicted(info);
973
+ e->in_tx = !!(info & LBR_INFO_IN_TX);
974
+ e->abort = !!(info & LBR_INFO_ABORT);
975
+ e->cycles = get_lbr_cycles(info);
976
+ e->type = get_lbr_br_type(info);
977
+ e->reserved = 0;
978
+ }
979
+
980
+ cpuc->lbr_stack.nr = i;
981
+}
982
+
983
+static void intel_pmu_arch_lbr_read(struct cpu_hw_events *cpuc)
984
+{
985
+ intel_pmu_store_lbr(cpuc, NULL);
986
+}
987
+
988
+static void intel_pmu_arch_lbr_read_xsave(struct cpu_hw_events *cpuc)
989
+{
990
+ struct x86_perf_task_context_arch_lbr_xsave *xsave = cpuc->lbr_xsave;
991
+
992
+ if (!xsave) {
993
+ intel_pmu_store_lbr(cpuc, NULL);
994
+ return;
995
+ }
996
+ copy_dynamic_supervisor_to_kernel(&xsave->xsave, XFEATURE_MASK_LBR);
997
+
998
+ intel_pmu_store_lbr(cpuc, xsave->lbr.entries);
655999 }
6561000
6571001 void intel_pmu_lbr_read(void)
6581002 {
6591003 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
6601004
661
- if (!cpuc->lbr_users)
1005
+ /*
1006
+ * Don't read when all LBRs users are using adaptive PEBS.
1007
+ *
1008
+ * This could be smarter and actually check the event,
1009
+ * but this simple approach seems to work for now.
1010
+ */
1011
+ if (!cpuc->lbr_users || vlbr_exclude_host() ||
1012
+ cpuc->lbr_users == cpuc->lbr_pebs_users)
6621013 return;
6631014
664
- if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
665
- intel_pmu_lbr_read_32(cpuc);
666
- else
667
- intel_pmu_lbr_read_64(cpuc);
1015
+ x86_pmu.lbr_read(cpuc);
6681016
6691017 intel_pmu_lbr_filter(cpuc);
6701018 }
....@@ -763,6 +1111,19 @@
7631111
7641112 reg = &event->hw.branch_reg;
7651113 reg->idx = EXTRA_REG_LBR;
1114
+
1115
+ if (static_cpu_has(X86_FEATURE_ARCH_LBR)) {
1116
+ reg->config = mask;
1117
+
1118
+ /*
1119
+ * The Arch LBR HW can retrieve the common branch types
1120
+ * from the LBR_INFO. It doesn't require the high overhead
1121
+ * SW disassemble.
1122
+ * Enable the branch type by default for the Arch LBR.
1123
+ */
1124
+ reg->reg |= X86_BR_TYPE_SAVE;
1125
+ return 0;
1126
+ }
7661127
7671128 /*
7681129 * The first 9 bits (LBR_SEL_MASK) in LBR_SELECT operate
....@@ -931,6 +1292,7 @@
9311292 ret = X86_BR_ZERO_CALL;
9321293 break;
9331294 }
1295
+ fallthrough;
9341296 case 0x9a: /* call far absolute */
9351297 ret = X86_BR_CALL;
9361298 break;
....@@ -1019,6 +1381,27 @@
10191381 return PERF_BR_UNKNOWN;
10201382 }
10211383
1384
+enum {
1385
+ ARCH_LBR_BR_TYPE_JCC = 0,
1386
+ ARCH_LBR_BR_TYPE_NEAR_IND_JMP = 1,
1387
+ ARCH_LBR_BR_TYPE_NEAR_REL_JMP = 2,
1388
+ ARCH_LBR_BR_TYPE_NEAR_IND_CALL = 3,
1389
+ ARCH_LBR_BR_TYPE_NEAR_REL_CALL = 4,
1390
+ ARCH_LBR_BR_TYPE_NEAR_RET = 5,
1391
+ ARCH_LBR_BR_TYPE_KNOWN_MAX = ARCH_LBR_BR_TYPE_NEAR_RET,
1392
+
1393
+ ARCH_LBR_BR_TYPE_MAP_MAX = 16,
1394
+};
1395
+
1396
+static const int arch_lbr_br_type_map[ARCH_LBR_BR_TYPE_MAP_MAX] = {
1397
+ [ARCH_LBR_BR_TYPE_JCC] = X86_BR_JCC,
1398
+ [ARCH_LBR_BR_TYPE_NEAR_IND_JMP] = X86_BR_IND_JMP,
1399
+ [ARCH_LBR_BR_TYPE_NEAR_REL_JMP] = X86_BR_JMP,
1400
+ [ARCH_LBR_BR_TYPE_NEAR_IND_CALL] = X86_BR_IND_CALL,
1401
+ [ARCH_LBR_BR_TYPE_NEAR_REL_CALL] = X86_BR_CALL,
1402
+ [ARCH_LBR_BR_TYPE_NEAR_RET] = X86_BR_RET,
1403
+};
1404
+
10221405 /*
10231406 * implement actual branch filter based on user demand.
10241407 * Hardware may not exactly satisfy that request, thus
....@@ -1031,7 +1414,7 @@
10311414 {
10321415 u64 from, to;
10331416 int br_sel = cpuc->br_sel;
1034
- int i, j, type;
1417
+ int i, j, type, to_plm;
10351418 bool compress = false;
10361419
10371420 /* if sampling all branches, then nothing to filter */
....@@ -1043,8 +1426,19 @@
10431426
10441427 from = cpuc->lbr_entries[i].from;
10451428 to = cpuc->lbr_entries[i].to;
1429
+ type = cpuc->lbr_entries[i].type;
10461430
1047
- type = branch_type(from, to, cpuc->lbr_entries[i].abort);
1431
+ /*
1432
+ * Parse the branch type recorded in LBR_x_INFO MSR.
1433
+ * Doesn't support OTHER_BRANCH decoding for now.
1434
+ * OTHER_BRANCH branch type still rely on software decoding.
1435
+ */
1436
+ if (static_cpu_has(X86_FEATURE_ARCH_LBR) &&
1437
+ type <= ARCH_LBR_BR_TYPE_KNOWN_MAX) {
1438
+ to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER;
1439
+ type = arch_lbr_br_type_map[type] | to_plm;
1440
+ } else
1441
+ type = branch_type(from, to, cpuc->lbr_entries[i].abort);
10481442 if (type != X86_BR_NONE && (br_sel & X86_BR_ANYTX)) {
10491443 if (cpuc->lbr_entries[i].in_tx)
10501444 type |= X86_BR_IN_TX;
....@@ -1077,6 +1471,21 @@
10771471 }
10781472 i++;
10791473 }
1474
+}
1475
+
1476
+void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr)
1477
+{
1478
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1479
+
1480
+ /* Cannot get TOS for large PEBS and Arch LBR */
1481
+ if (static_cpu_has(X86_FEATURE_ARCH_LBR) ||
1482
+ (cpuc->n_pebs == cpuc->n_large_pebs))
1483
+ cpuc->lbr_stack.hw_idx = -1ULL;
1484
+ else
1485
+ cpuc->lbr_stack.hw_idx = intel_pmu_lbr_tos();
1486
+
1487
+ intel_pmu_store_lbr(cpuc, lbr);
1488
+ intel_pmu_lbr_filter(cpuc);
10801489 }
10811490
10821491 /*
....@@ -1132,6 +1541,26 @@
11321541 [PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL,
11331542 };
11341543
1544
+static int arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
1545
+ [PERF_SAMPLE_BRANCH_ANY_SHIFT] = ARCH_LBR_ANY,
1546
+ [PERF_SAMPLE_BRANCH_USER_SHIFT] = ARCH_LBR_USER,
1547
+ [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = ARCH_LBR_KERNEL,
1548
+ [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN,
1549
+ [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = ARCH_LBR_RETURN |
1550
+ ARCH_LBR_OTHER_BRANCH,
1551
+ [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = ARCH_LBR_REL_CALL |
1552
+ ARCH_LBR_IND_CALL |
1553
+ ARCH_LBR_OTHER_BRANCH,
1554
+ [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = ARCH_LBR_IND_CALL,
1555
+ [PERF_SAMPLE_BRANCH_COND_SHIFT] = ARCH_LBR_JCC,
1556
+ [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = ARCH_LBR_REL_CALL |
1557
+ ARCH_LBR_IND_CALL |
1558
+ ARCH_LBR_RETURN |
1559
+ ARCH_LBR_CALL_STACK,
1560
+ [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = ARCH_LBR_IND_JMP,
1561
+ [PERF_SAMPLE_BRANCH_CALL_SHIFT] = ARCH_LBR_REL_CALL,
1562
+};
1563
+
11351564 /* core */
11361565 void __init intel_pmu_lbr_init_core(void)
11371566 {
....@@ -1185,9 +1614,17 @@
11851614 */
11861615 }
11871616
1617
+static inline struct kmem_cache *
1618
+create_lbr_kmem_cache(size_t size, size_t align)
1619
+{
1620
+ return kmem_cache_create("x86_lbr", size, align, 0, NULL);
1621
+}
1622
+
11881623 /* haswell */
11891624 void intel_pmu_lbr_init_hsw(void)
11901625 {
1626
+ size_t size = sizeof(struct x86_perf_task_context);
1627
+
11911628 x86_pmu.lbr_nr = 16;
11921629 x86_pmu.lbr_tos = MSR_LBR_TOS;
11931630 x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
....@@ -1196,6 +1633,8 @@
11961633 x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
11971634 x86_pmu.lbr_sel_map = hsw_lbr_sel_map;
11981635
1636
+ x86_get_pmu(smp_processor_id())->task_ctx_cache = create_lbr_kmem_cache(size, 0);
1637
+
11991638 if (lbr_from_signext_quirk_needed())
12001639 static_branch_enable(&lbr_from_quirk_key);
12011640 }
....@@ -1203,13 +1642,18 @@
12031642 /* skylake */
12041643 __init void intel_pmu_lbr_init_skl(void)
12051644 {
1645
+ size_t size = sizeof(struct x86_perf_task_context);
1646
+
12061647 x86_pmu.lbr_nr = 32;
12071648 x86_pmu.lbr_tos = MSR_LBR_TOS;
12081649 x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
12091650 x86_pmu.lbr_to = MSR_LBR_NHM_TO;
1651
+ x86_pmu.lbr_info = MSR_LBR_INFO_0;
12101652
12111653 x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
12121654 x86_pmu.lbr_sel_map = hsw_lbr_sel_map;
1655
+
1656
+ x86_get_pmu(smp_processor_id())->task_ctx_cache = create_lbr_kmem_cache(size, 0);
12131657
12141658 /*
12151659 * SW branch filter usage:
....@@ -1277,3 +1721,155 @@
12771721 if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_LIP)
12781722 x86_pmu.intel_cap.lbr_format = LBR_FORMAT_EIP_FLAGS;
12791723 }
1724
+
1725
+/*
1726
+ * LBR state size is variable based on the max number of registers.
1727
+ * This calculates the expected state size, which should match
1728
+ * what the hardware enumerates for the size of XFEATURE_LBR.
1729
+ */
1730
+static inline unsigned int get_lbr_state_size(void)
1731
+{
1732
+ return sizeof(struct arch_lbr_state) +
1733
+ x86_pmu.lbr_nr * sizeof(struct lbr_entry);
1734
+}
1735
+
1736
+static bool is_arch_lbr_xsave_available(void)
1737
+{
1738
+ if (!boot_cpu_has(X86_FEATURE_XSAVES))
1739
+ return false;
1740
+
1741
+ /*
1742
+ * Check the LBR state with the corresponding software structure.
1743
+ * Disable LBR XSAVES support if the size doesn't match.
1744
+ */
1745
+ if (xfeature_size(XFEATURE_LBR) == 0)
1746
+ return false;
1747
+
1748
+ if (WARN_ON(xfeature_size(XFEATURE_LBR) != get_lbr_state_size()))
1749
+ return false;
1750
+
1751
+ return true;
1752
+}
1753
+
1754
+void __init intel_pmu_arch_lbr_init(void)
1755
+{
1756
+ struct pmu *pmu = x86_get_pmu(smp_processor_id());
1757
+ union cpuid28_eax eax;
1758
+ union cpuid28_ebx ebx;
1759
+ union cpuid28_ecx ecx;
1760
+ unsigned int unused_edx;
1761
+ bool arch_lbr_xsave;
1762
+ size_t size;
1763
+ u64 lbr_nr;
1764
+
1765
+ /* Arch LBR Capabilities */
1766
+ cpuid(28, &eax.full, &ebx.full, &ecx.full, &unused_edx);
1767
+
1768
+ lbr_nr = fls(eax.split.lbr_depth_mask) * 8;
1769
+ if (!lbr_nr)
1770
+ goto clear_arch_lbr;
1771
+
1772
+ /* Apply the max depth of Arch LBR */
1773
+ if (wrmsrl_safe(MSR_ARCH_LBR_DEPTH, lbr_nr))
1774
+ goto clear_arch_lbr;
1775
+
1776
+ x86_pmu.lbr_depth_mask = eax.split.lbr_depth_mask;
1777
+ x86_pmu.lbr_deep_c_reset = eax.split.lbr_deep_c_reset;
1778
+ x86_pmu.lbr_lip = eax.split.lbr_lip;
1779
+ x86_pmu.lbr_cpl = ebx.split.lbr_cpl;
1780
+ x86_pmu.lbr_filter = ebx.split.lbr_filter;
1781
+ x86_pmu.lbr_call_stack = ebx.split.lbr_call_stack;
1782
+ x86_pmu.lbr_mispred = ecx.split.lbr_mispred;
1783
+ x86_pmu.lbr_timed_lbr = ecx.split.lbr_timed_lbr;
1784
+ x86_pmu.lbr_br_type = ecx.split.lbr_br_type;
1785
+ x86_pmu.lbr_nr = lbr_nr;
1786
+
1787
+
1788
+ arch_lbr_xsave = is_arch_lbr_xsave_available();
1789
+ if (arch_lbr_xsave) {
1790
+ size = sizeof(struct x86_perf_task_context_arch_lbr_xsave) +
1791
+ get_lbr_state_size();
1792
+ pmu->task_ctx_cache = create_lbr_kmem_cache(size,
1793
+ XSAVE_ALIGNMENT);
1794
+ }
1795
+
1796
+ if (!pmu->task_ctx_cache) {
1797
+ arch_lbr_xsave = false;
1798
+
1799
+ size = sizeof(struct x86_perf_task_context_arch_lbr) +
1800
+ lbr_nr * sizeof(struct lbr_entry);
1801
+ pmu->task_ctx_cache = create_lbr_kmem_cache(size, 0);
1802
+ }
1803
+
1804
+ x86_pmu.lbr_from = MSR_ARCH_LBR_FROM_0;
1805
+ x86_pmu.lbr_to = MSR_ARCH_LBR_TO_0;
1806
+ x86_pmu.lbr_info = MSR_ARCH_LBR_INFO_0;
1807
+
1808
+ /* LBR callstack requires both CPL and Branch Filtering support */
1809
+ if (!x86_pmu.lbr_cpl ||
1810
+ !x86_pmu.lbr_filter ||
1811
+ !x86_pmu.lbr_call_stack)
1812
+ arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_NOT_SUPP;
1813
+
1814
+ if (!x86_pmu.lbr_cpl) {
1815
+ arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_NOT_SUPP;
1816
+ arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_NOT_SUPP;
1817
+ } else if (!x86_pmu.lbr_filter) {
1818
+ arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_NOT_SUPP;
1819
+ arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_NOT_SUPP;
1820
+ arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_NOT_SUPP;
1821
+ arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_NOT_SUPP;
1822
+ arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_NOT_SUPP;
1823
+ arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_NOT_SUPP;
1824
+ arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_NOT_SUPP;
1825
+ }
1826
+
1827
+ x86_pmu.lbr_ctl_mask = ARCH_LBR_CTL_MASK;
1828
+ x86_pmu.lbr_ctl_map = arch_lbr_ctl_map;
1829
+
1830
+ if (!x86_pmu.lbr_cpl && !x86_pmu.lbr_filter)
1831
+ x86_pmu.lbr_ctl_map = NULL;
1832
+
1833
+ x86_pmu.lbr_reset = intel_pmu_arch_lbr_reset;
1834
+ if (arch_lbr_xsave) {
1835
+ x86_pmu.lbr_save = intel_pmu_arch_lbr_xsaves;
1836
+ x86_pmu.lbr_restore = intel_pmu_arch_lbr_xrstors;
1837
+ x86_pmu.lbr_read = intel_pmu_arch_lbr_read_xsave;
1838
+ pr_cont("XSAVE ");
1839
+ } else {
1840
+ x86_pmu.lbr_save = intel_pmu_arch_lbr_save;
1841
+ x86_pmu.lbr_restore = intel_pmu_arch_lbr_restore;
1842
+ x86_pmu.lbr_read = intel_pmu_arch_lbr_read;
1843
+ }
1844
+
1845
+ pr_cont("Architectural LBR, ");
1846
+
1847
+ return;
1848
+
1849
+clear_arch_lbr:
1850
+ setup_clear_cpu_cap(X86_FEATURE_ARCH_LBR);
1851
+}
1852
+
1853
+/**
1854
+ * x86_perf_get_lbr - get the LBR records information
1855
+ *
1856
+ * @lbr: the caller's memory to store the LBR records information
1857
+ *
1858
+ * Returns: 0 indicates the LBR info has been successfully obtained
1859
+ */
1860
+int x86_perf_get_lbr(struct x86_pmu_lbr *lbr)
1861
+{
1862
+ int lbr_fmt = x86_pmu.intel_cap.lbr_format;
1863
+
1864
+ lbr->nr = x86_pmu.lbr_nr;
1865
+ lbr->from = x86_pmu.lbr_from;
1866
+ lbr->to = x86_pmu.lbr_to;
1867
+ lbr->info = (lbr_fmt == LBR_FORMAT_INFO) ? x86_pmu.lbr_info : 0;
1868
+
1869
+ return 0;
1870
+}
1871
+EXPORT_SYMBOL_GPL(x86_perf_get_lbr);
1872
+
1873
+struct event_constraint vlbr_constraint =
1874
+ __EVENT_CONSTRAINT(INTEL_FIXED_VLBR_EVENT, (1ULL << INTEL_PMC_IDX_FIXED_VLBR),
1875
+ FIXED_EVENT_FLAGS, 1, 0, PERF_X86_EVENT_LBR_SELECT);