forked from ~ljy/RK356X_SDK_RELEASE

hc
2024-05-10 ee930fffee469d076998274a2ca55e13dc1efb67
kernel/arch/x86/events/intel/pt.c
....@@ -1,15 +1,7 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * Intel(R) Processor Trace PMU driver for perf
34 * Copyright (c) 2013-2014, Intel Corporation.
4
- *
5
- * This program is free software; you can redistribute it and/or modify it
6
- * under the terms and conditions of the GNU General Public License,
7
- * version 2, as published by the Free Software Foundation.
8
- *
9
- * This program is distributed in the hope it will be useful, but WITHOUT
10
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12
- * more details.
135 *
146 * Intel PT is specified in the Intel Architecture Instruction Set Extensions
157 * Programming Reference:
....@@ -21,6 +13,8 @@
2113 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
2214
2315 #include <linux/types.h>
16
+#include <linux/bits.h>
17
+#include <linux/limits.h>
2418 #include <linux/slab.h>
2519 #include <linux/device.h>
2620
....@@ -68,6 +62,7 @@
6862 PT_CAP(topa_output, 0, CPUID_ECX, BIT(0)),
6963 PT_CAP(topa_multiple_entries, 0, CPUID_ECX, BIT(1)),
7064 PT_CAP(single_range_output, 0, CPUID_ECX, BIT(2)),
65
+ PT_CAP(output_subsys, 0, CPUID_ECX, BIT(3)),
7166 PT_CAP(payloads_lip, 0, CPUID_ECX, BIT(31)),
7267 PT_CAP(num_address_ranges, 1, CPUID_EAX, 0x7),
7368 PT_CAP(mtc_periods, 1, CPUID_EAX, 0xffff0000),
....@@ -75,14 +70,21 @@
7570 PT_CAP(psb_periods, 1, CPUID_EBX, 0xffff0000),
7671 };
7772
78
-static u32 pt_cap_get(enum pt_capabilities cap)
73
+u32 intel_pt_validate_cap(u32 *caps, enum pt_capabilities capability)
7974 {
80
- struct pt_cap_desc *cd = &pt_caps[cap];
81
- u32 c = pt_pmu.caps[cd->leaf * PT_CPUID_REGS_NUM + cd->reg];
75
+ struct pt_cap_desc *cd = &pt_caps[capability];
76
+ u32 c = caps[cd->leaf * PT_CPUID_REGS_NUM + cd->reg];
8277 unsigned int shift = __ffs(cd->mask);
8378
8479 return (c & cd->mask) >> shift;
8580 }
81
+EXPORT_SYMBOL_GPL(intel_pt_validate_cap);
82
+
83
+u32 intel_pt_validate_hw_cap(enum pt_capabilities cap)
84
+{
85
+ return intel_pt_validate_cap(pt_pmu.caps, cap);
86
+}
87
+EXPORT_SYMBOL_GPL(intel_pt_validate_hw_cap);
8688
8789 static ssize_t pt_cap_show(struct device *cdev,
8890 struct device_attribute *attr,
....@@ -92,10 +94,10 @@
9294 container_of(attr, struct dev_ext_attribute, attr);
9395 enum pt_capabilities cap = (long)ea->var;
9496
95
- return snprintf(buf, PAGE_SIZE, "%x\n", pt_cap_get(cap));
97
+ return snprintf(buf, PAGE_SIZE, "%x\n", intel_pt_validate_hw_cap(cap));
9698 }
9799
98
-static struct attribute_group pt_cap_group = {
100
+static struct attribute_group pt_cap_group __ro_after_init = {
99101 .name = "caps",
100102 };
101103
....@@ -204,9 +206,9 @@
204206
205207 /* model-specific quirks */
206208 switch (boot_cpu_data.x86_model) {
207
- case INTEL_FAM6_BROADWELL_CORE:
208
- case INTEL_FAM6_BROADWELL_XEON_D:
209
- case INTEL_FAM6_BROADWELL_GT3E:
209
+ case INTEL_FAM6_BROADWELL:
210
+ case INTEL_FAM6_BROADWELL_D:
211
+ case INTEL_FAM6_BROADWELL_G:
210212 case INTEL_FAM6_BROADWELL_X:
211213 /* not setting BRANCH_EN will #GP, erratum BDM106 */
212214 pt_pmu.branch_en_always_on = true;
....@@ -225,8 +227,6 @@
225227 if (reg & BIT(14))
226228 pt_pmu.vmx = true;
227229 }
228
-
229
- attrs = NULL;
230230
231231 for (i = 0; i < PT_CPUID_LEAVES; i++) {
232232 cpuid_count(20, i,
....@@ -310,16 +310,16 @@
310310 return false;
311311
312312 if (config & RTIT_CTL_CYC_PSB) {
313
- if (!pt_cap_get(PT_CAP_psb_cyc))
313
+ if (!intel_pt_validate_hw_cap(PT_CAP_psb_cyc))
314314 return false;
315315
316
- allowed = pt_cap_get(PT_CAP_psb_periods);
316
+ allowed = intel_pt_validate_hw_cap(PT_CAP_psb_periods);
317317 requested = (config & RTIT_CTL_PSB_FREQ) >>
318318 RTIT_CTL_PSB_FREQ_OFFSET;
319319 if (requested && (!(allowed & BIT(requested))))
320320 return false;
321321
322
- allowed = pt_cap_get(PT_CAP_cycle_thresholds);
322
+ allowed = intel_pt_validate_hw_cap(PT_CAP_cycle_thresholds);
323323 requested = (config & RTIT_CTL_CYC_THRESH) >>
324324 RTIT_CTL_CYC_THRESH_OFFSET;
325325 if (requested && (!(allowed & BIT(requested))))
....@@ -334,10 +334,10 @@
334334 * Spec says that setting mtc period bits while mtc bit in
335335 * CPUID is 0 will #GP, so better safe than sorry.
336336 */
337
- if (!pt_cap_get(PT_CAP_mtc))
337
+ if (!intel_pt_validate_hw_cap(PT_CAP_mtc))
338338 return false;
339339
340
- allowed = pt_cap_get(PT_CAP_mtc_periods);
340
+ allowed = intel_pt_validate_hw_cap(PT_CAP_mtc_periods);
341341 if (!allowed)
342342 return false;
343343
....@@ -349,11 +349,11 @@
349349 }
350350
351351 if (config & RTIT_CTL_PWR_EVT_EN &&
352
- !pt_cap_get(PT_CAP_power_event_trace))
352
+ !intel_pt_validate_hw_cap(PT_CAP_power_event_trace))
353353 return false;
354354
355355 if (config & RTIT_CTL_PTW) {
356
- if (!pt_cap_get(PT_CAP_ptwrite))
356
+ if (!intel_pt_validate_hw_cap(PT_CAP_ptwrite))
357357 return false;
358358
359359 /* FUPonPTW without PTW doesn't make sense */
....@@ -396,6 +396,20 @@
396396 * PT configuration helpers
397397 * These all are cpu affine and operate on a local PT
398398 */
399
+
400
+static void pt_config_start(struct perf_event *event)
401
+{
402
+ struct pt *pt = this_cpu_ptr(&pt_ctx);
403
+ u64 ctl = event->hw.config;
404
+
405
+ ctl |= RTIT_CTL_TRACEEN;
406
+ if (READ_ONCE(pt->vmx_on))
407
+ perf_aux_output_flag(&pt->handle, PERF_AUX_FLAG_PARTIAL);
408
+ else
409
+ wrmsrl(MSR_IA32_RTIT_CTL, ctl);
410
+
411
+ WRITE_ONCE(event->hw.config, ctl);
412
+}
399413
400414 /* Address ranges and their corresponding msr configuration registers */
401415 static const struct pt_address_range {
....@@ -460,7 +474,7 @@
460474 pt->filters.filter[range].msr_b = filter->msr_b;
461475 }
462476
463
- rtit_ctl |= filter->config << pt_address_ranges[range].reg_off;
477
+ rtit_ctl |= (u64)filter->config << pt_address_ranges[range].reg_off;
464478 }
465479
466480 return rtit_ctl;
....@@ -469,6 +483,7 @@
469483 static void pt_config(struct perf_event *event)
470484 {
471485 struct pt *pt = this_cpu_ptr(&pt_ctx);
486
+ struct pt_buffer *buf = perf_get_aux(&pt->handle);
472487 u64 reg;
473488
474489 /* First round: clear STATUS, in particular the PSB byte counter. */
....@@ -478,7 +493,9 @@
478493 }
479494
480495 reg = pt_config_filters(event);
481
- reg |= RTIT_CTL_TOPA | RTIT_CTL_TRACEEN;
496
+ reg |= RTIT_CTL_TRACEEN;
497
+ if (!buf->single)
498
+ reg |= RTIT_CTL_TOPA;
482499
483500 /*
484501 * Previously, we had BRANCH_EN on by default, but now that PT has
....@@ -501,10 +518,7 @@
501518 reg |= (event->attr.config & PT_CONFIG_MASK);
502519
503520 event->hw.config = reg;
504
- if (READ_ONCE(pt->vmx_on))
505
- perf_aux_output_flag(&pt->handle, PERF_AUX_FLAG_PARTIAL);
506
- else
507
- wrmsrl(MSR_IA32_RTIT_CTL, reg);
521
+ pt_config_start(event);
508522 }
509523
510524 static void pt_config_stop(struct perf_event *event)
....@@ -533,45 +547,89 @@
533547 wmb();
534548 }
535549
536
-static void pt_config_buffer(void *buf, unsigned int topa_idx,
537
- unsigned int output_off)
538
-{
539
- u64 reg;
540
-
541
- wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, virt_to_phys(buf));
542
-
543
- reg = 0x7f | ((u64)topa_idx << 7) | ((u64)output_off << 32);
544
-
545
- wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, reg);
546
-}
550
+/**
551
+ * struct topa - ToPA metadata
552
+ * @list: linkage to struct pt_buffer's list of tables
553
+ * @offset: offset of the first entry in this table in the buffer
554
+ * @size: total size of all entries in this table
555
+ * @last: index of the last initialized entry in this table
556
+ * @z_count: how many times the first entry repeats
557
+ */
558
+struct topa {
559
+ struct list_head list;
560
+ u64 offset;
561
+ size_t size;
562
+ int last;
563
+ unsigned int z_count;
564
+};
547565
548566 /*
549567 * Keep ToPA table-related metadata on the same page as the actual table,
550568 * taking up a few words from the top
551569 */
552570
553
-#define TENTS_PER_PAGE (((PAGE_SIZE - 40) / sizeof(struct topa_entry)) - 1)
571
+#define TENTS_PER_PAGE \
572
+ ((PAGE_SIZE - sizeof(struct topa)) / sizeof(struct topa_entry))
554573
555574 /**
556
- * struct topa - page-sized ToPA table with metadata at the top
575
+ * struct topa_page - page-sized ToPA table with metadata at the top
557576 * @table: actual ToPA table entries, as understood by PT hardware
558
- * @list: linkage to struct pt_buffer's list of tables
559
- * @phys: physical address of this page
560
- * @offset: offset of the first entry in this table in the buffer
561
- * @size: total size of all entries in this table
562
- * @last: index of the last initialized entry in this table
577
+ * @topa: metadata
563578 */
564
-struct topa {
579
+struct topa_page {
565580 struct topa_entry table[TENTS_PER_PAGE];
566
- struct list_head list;
567
- u64 phys;
568
- u64 offset;
569
- size_t size;
570
- int last;
581
+ struct topa topa;
571582 };
572583
584
+static inline struct topa_page *topa_to_page(struct topa *topa)
585
+{
586
+ return container_of(topa, struct topa_page, topa);
587
+}
588
+
589
+static inline struct topa_page *topa_entry_to_page(struct topa_entry *te)
590
+{
591
+ return (struct topa_page *)((unsigned long)te & PAGE_MASK);
592
+}
593
+
594
+static inline phys_addr_t topa_pfn(struct topa *topa)
595
+{
596
+ return PFN_DOWN(virt_to_phys(topa_to_page(topa)));
597
+}
598
+
573599 /* make -1 stand for the last table entry */
574
-#define TOPA_ENTRY(t, i) ((i) == -1 ? &(t)->table[(t)->last] : &(t)->table[(i)])
600
+#define TOPA_ENTRY(t, i) \
601
+ ((i) == -1 \
602
+ ? &topa_to_page(t)->table[(t)->last] \
603
+ : &topa_to_page(t)->table[(i)])
604
+#define TOPA_ENTRY_SIZE(t, i) (sizes(TOPA_ENTRY((t), (i))->size))
605
+#define TOPA_ENTRY_PAGES(t, i) (1 << TOPA_ENTRY((t), (i))->size)
606
+
607
+static void pt_config_buffer(struct pt_buffer *buf)
608
+{
609
+ struct pt *pt = this_cpu_ptr(&pt_ctx);
610
+ u64 reg, mask;
611
+ void *base;
612
+
613
+ if (buf->single) {
614
+ base = buf->data_pages[0];
615
+ mask = (buf->nr_pages * PAGE_SIZE - 1) >> 7;
616
+ } else {
617
+ base = topa_to_page(buf->cur)->table;
618
+ mask = (u64)buf->cur_idx;
619
+ }
620
+
621
+ reg = virt_to_phys(base);
622
+ if (pt->output_base != reg) {
623
+ pt->output_base = reg;
624
+ wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, reg);
625
+ }
626
+
627
+ reg = 0x7f | (mask << 7) | ((u64)buf->output_off << 32);
628
+ if (pt->output_mask != reg) {
629
+ pt->output_mask = reg;
630
+ wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, reg);
631
+ }
632
+}
575633
576634 /**
577635 * topa_alloc() - allocate page-sized ToPA table
....@@ -583,27 +641,26 @@
583641 static struct topa *topa_alloc(int cpu, gfp_t gfp)
584642 {
585643 int node = cpu_to_node(cpu);
586
- struct topa *topa;
644
+ struct topa_page *tp;
587645 struct page *p;
588646
589647 p = alloc_pages_node(node, gfp | __GFP_ZERO, 0);
590648 if (!p)
591649 return NULL;
592650
593
- topa = page_address(p);
594
- topa->last = 0;
595
- topa->phys = page_to_phys(p);
651
+ tp = page_address(p);
652
+ tp->topa.last = 0;
596653
597654 /*
598655 * In case of singe-entry ToPA, always put the self-referencing END
599656 * link as the 2nd entry in the table
600657 */
601
- if (!pt_cap_get(PT_CAP_topa_multiple_entries)) {
602
- TOPA_ENTRY(topa, 1)->base = topa->phys >> TOPA_SHIFT;
603
- TOPA_ENTRY(topa, 1)->end = 1;
658
+ if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries)) {
659
+ TOPA_ENTRY(&tp->topa, 1)->base = page_to_phys(p) >> TOPA_SHIFT;
660
+ TOPA_ENTRY(&tp->topa, 1)->end = 1;
604661 }
605662
606
- return topa;
663
+ return &tp->topa;
607664 }
608665
609666 /**
....@@ -638,12 +695,12 @@
638695 topa->offset = last->offset + last->size;
639696 buf->last = topa;
640697
641
- if (!pt_cap_get(PT_CAP_topa_multiple_entries))
698
+ if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries))
642699 return;
643700
644701 BUG_ON(last->last != TENTS_PER_PAGE - 1);
645702
646
- TOPA_ENTRY(last, -1)->base = topa->phys >> TOPA_SHIFT;
703
+ TOPA_ENTRY(last, -1)->base = topa_pfn(topa);
647704 TOPA_ENTRY(last, -1)->end = 1;
648705 }
649706
....@@ -654,7 +711,7 @@
654711 static bool topa_table_full(struct topa *topa)
655712 {
656713 /* single-entry ToPA is a special case */
657
- if (!pt_cap_get(PT_CAP_topa_multiple_entries))
714
+ if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries))
658715 return !!topa->last;
659716
660717 return topa->last == TENTS_PER_PAGE - 1;
....@@ -670,7 +727,7 @@
670727 *
671728 * Return: 0 on success or error code.
672729 */
673
-static int topa_insert_pages(struct pt_buffer *buf, gfp_t gfp)
730
+static int topa_insert_pages(struct pt_buffer *buf, int cpu, gfp_t gfp)
674731 {
675732 struct topa *topa = buf->last;
676733 int order = 0;
....@@ -681,16 +738,22 @@
681738 order = page_private(p);
682739
683740 if (topa_table_full(topa)) {
684
- topa = topa_alloc(buf->cpu, gfp);
741
+ topa = topa_alloc(cpu, gfp);
685742 if (!topa)
686743 return -ENOMEM;
687744
688745 topa_insert_table(buf, topa);
689746 }
690747
748
+ if (topa->z_count == topa->last - 1) {
749
+ if (order == TOPA_ENTRY(topa, topa->last - 1)->size)
750
+ topa->z_count++;
751
+ }
752
+
691753 TOPA_ENTRY(topa, -1)->base = page_to_phys(p) >> TOPA_SHIFT;
692754 TOPA_ENTRY(topa, -1)->size = order;
693
- if (!buf->snapshot && !pt_cap_get(PT_CAP_topa_multiple_entries)) {
755
+ if (!buf->snapshot &&
756
+ !intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries)) {
694757 TOPA_ENTRY(topa, -1)->intr = 1;
695758 TOPA_ENTRY(topa, -1)->stop = 1;
696759 }
....@@ -712,23 +775,26 @@
712775 struct topa *topa;
713776
714777 list_for_each_entry(topa, &buf->tables, list) {
778
+ struct topa_page *tp = topa_to_page(topa);
715779 int i;
716780
717
- pr_debug("# table @%p (%016Lx), off %llx size %zx\n", topa->table,
718
- topa->phys, topa->offset, topa->size);
781
+ pr_debug("# table @%p, off %llx size %zx\n", tp->table,
782
+ topa->offset, topa->size);
719783 for (i = 0; i < TENTS_PER_PAGE; i++) {
720784 pr_debug("# entry @%p (%lx sz %u %c%c%c) raw=%16llx\n",
721
- &topa->table[i],
722
- (unsigned long)topa->table[i].base << TOPA_SHIFT,
723
- sizes(topa->table[i].size),
724
- topa->table[i].end ? 'E' : ' ',
725
- topa->table[i].intr ? 'I' : ' ',
726
- topa->table[i].stop ? 'S' : ' ',
727
- *(u64 *)&topa->table[i]);
728
- if ((pt_cap_get(PT_CAP_topa_multiple_entries) &&
729
- topa->table[i].stop) ||
730
- topa->table[i].end)
785
+ &tp->table[i],
786
+ (unsigned long)tp->table[i].base << TOPA_SHIFT,
787
+ sizes(tp->table[i].size),
788
+ tp->table[i].end ? 'E' : ' ',
789
+ tp->table[i].intr ? 'I' : ' ',
790
+ tp->table[i].stop ? 'S' : ' ',
791
+ *(u64 *)&tp->table[i]);
792
+ if ((intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries) &&
793
+ tp->table[i].stop) ||
794
+ tp->table[i].end)
731795 break;
796
+ if (!i && topa->z_count)
797
+ i += topa->z_count;
732798 }
733799 }
734800 }
....@@ -765,12 +831,17 @@
765831 struct pt_buffer *buf = perf_get_aux(&pt->handle);
766832 u64 topa_idx, base, old;
767833
834
+ if (buf->single) {
835
+ local_set(&buf->data_size, buf->output_off);
836
+ return;
837
+ }
838
+
768839 /* offset of the first region in this table from the beginning of buf */
769840 base = buf->cur->offset + buf->output_off;
770841
771842 /* offset of the current output region within this table */
772843 for (topa_idx = 0; topa_idx < buf->cur_idx; topa_idx++)
773
- base += sizes(buf->cur->table[topa_idx].size);
844
+ base += TOPA_ENTRY_SIZE(buf->cur, topa_idx);
774845
775846 if (buf->snapshot) {
776847 local_set(&buf->data_size, base);
....@@ -790,7 +861,7 @@
790861 */
791862 static void *pt_buffer_region(struct pt_buffer *buf)
792863 {
793
- return phys_to_virt(buf->cur->table[buf->cur_idx].base << TOPA_SHIFT);
864
+ return phys_to_virt(TOPA_ENTRY(buf->cur, buf->cur_idx)->base << TOPA_SHIFT);
794865 }
795866
796867 /**
....@@ -799,7 +870,7 @@
799870 */
800871 static size_t pt_buffer_region_size(struct pt_buffer *buf)
801872 {
802
- return sizes(buf->cur->table[buf->cur_idx].size);
873
+ return TOPA_ENTRY_SIZE(buf->cur, buf->cur_idx);
803874 }
804875
805876 /**
....@@ -828,8 +899,9 @@
828899 * means we are already losing data; need to let the decoder
829900 * know.
830901 */
831
- if (!pt_cap_get(PT_CAP_topa_multiple_entries) ||
832
- buf->output_off == sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size)) {
902
+ if (!buf->single &&
903
+ (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries) ||
904
+ buf->output_off == pt_buffer_region_size(buf))) {
833905 perf_aux_output_flag(&pt->handle,
834906 PERF_AUX_FLAG_TRUNCATED);
835907 advance++;
....@@ -840,7 +912,8 @@
840912 * Also on single-entry ToPA implementations, interrupt will come
841913 * before the output reaches its output region's boundary.
842914 */
843
- if (!pt_cap_get(PT_CAP_topa_multiple_entries) && !buf->snapshot &&
915
+ if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries) &&
916
+ !buf->snapshot &&
844917 pt_buffer_region_size(buf) - buf->output_off <= TOPA_PMI_MARGIN) {
845918 void *head = pt_buffer_region(buf);
846919
....@@ -865,41 +938,114 @@
865938 */
866939 static void pt_read_offset(struct pt_buffer *buf)
867940 {
868
- u64 offset, base_topa;
941
+ struct pt *pt = this_cpu_ptr(&pt_ctx);
942
+ struct topa_page *tp;
869943
870
- rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, base_topa);
871
- buf->cur = phys_to_virt(base_topa);
944
+ if (!buf->single) {
945
+ rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, pt->output_base);
946
+ tp = phys_to_virt(pt->output_base);
947
+ buf->cur = &tp->topa;
948
+ }
872949
873
- rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, offset);
950
+ rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, pt->output_mask);
874951 /* offset within current output region */
875
- buf->output_off = offset >> 32;
952
+ buf->output_off = pt->output_mask >> 32;
876953 /* index of current output region within this table */
877
- buf->cur_idx = (offset & 0xffffff80) >> 7;
954
+ if (!buf->single)
955
+ buf->cur_idx = (pt->output_mask & 0xffffff80) >> 7;
878956 }
879957
880
-/**
881
- * pt_topa_next_entry() - obtain index of the first page in the next ToPA entry
882
- * @buf: PT buffer.
883
- * @pg: Page offset in the buffer.
884
- *
885
- * When advancing to the next output region (ToPA entry), given a page offset
886
- * into the buffer, we need to find the offset of the first page in the next
887
- * region.
888
- */
889
-static unsigned int pt_topa_next_entry(struct pt_buffer *buf, unsigned int pg)
958
+static struct topa_entry *
959
+pt_topa_entry_for_page(struct pt_buffer *buf, unsigned int pg)
890960 {
891
- struct topa_entry *te = buf->topa_index[pg];
961
+ struct topa_page *tp;
962
+ struct topa *topa;
963
+ unsigned int idx, cur_pg = 0, z_pg = 0, start_idx = 0;
892964
893
- /* one region */
894
- if (buf->first == buf->last && buf->first->last == 1)
895
- return pg;
965
+ /*
966
+ * Indicates a bug in the caller.
967
+ */
968
+ if (WARN_ON_ONCE(pg >= buf->nr_pages))
969
+ return NULL;
896970
897
- do {
898
- pg++;
899
- pg &= buf->nr_pages - 1;
900
- } while (buf->topa_index[pg] == te);
971
+ /*
972
+ * First, find the ToPA table where @pg fits. With high
973
+ * order allocations, there shouldn't be many of these.
974
+ */
975
+ list_for_each_entry(topa, &buf->tables, list) {
976
+ if (topa->offset + topa->size > pg << PAGE_SHIFT)
977
+ goto found;
978
+ }
901979
902
- return pg;
980
+ /*
981
+ * Hitting this means we have a problem in the ToPA
982
+ * allocation code.
983
+ */
984
+ WARN_ON_ONCE(1);
985
+
986
+ return NULL;
987
+
988
+found:
989
+ /*
990
+ * Indicates a problem in the ToPA allocation code.
991
+ */
992
+ if (WARN_ON_ONCE(topa->last == -1))
993
+ return NULL;
994
+
995
+ tp = topa_to_page(topa);
996
+ cur_pg = PFN_DOWN(topa->offset);
997
+ if (topa->z_count) {
998
+ z_pg = TOPA_ENTRY_PAGES(topa, 0) * (topa->z_count + 1);
999
+ start_idx = topa->z_count + 1;
1000
+ }
1001
+
1002
+ /*
1003
+ * Multiple entries at the beginning of the table have the same size,
1004
+ * ideally all of them; if @pg falls there, the search is done.
1005
+ */
1006
+ if (pg >= cur_pg && pg < cur_pg + z_pg) {
1007
+ idx = (pg - cur_pg) / TOPA_ENTRY_PAGES(topa, 0);
1008
+ return &tp->table[idx];
1009
+ }
1010
+
1011
+ /*
1012
+ * Otherwise, slow path: iterate through the remaining entries.
1013
+ */
1014
+ for (idx = start_idx, cur_pg += z_pg; idx < topa->last; idx++) {
1015
+ if (cur_pg + TOPA_ENTRY_PAGES(topa, idx) > pg)
1016
+ return &tp->table[idx];
1017
+
1018
+ cur_pg += TOPA_ENTRY_PAGES(topa, idx);
1019
+ }
1020
+
1021
+ /*
1022
+ * Means we couldn't find a ToPA entry in the table that does match.
1023
+ */
1024
+ WARN_ON_ONCE(1);
1025
+
1026
+ return NULL;
1027
+}
1028
+
1029
+static struct topa_entry *
1030
+pt_topa_prev_entry(struct pt_buffer *buf, struct topa_entry *te)
1031
+{
1032
+ unsigned long table = (unsigned long)te & ~(PAGE_SIZE - 1);
1033
+ struct topa_page *tp;
1034
+ struct topa *topa;
1035
+
1036
+ tp = (struct topa_page *)table;
1037
+ if (tp->table != te)
1038
+ return --te;
1039
+
1040
+ topa = &tp->topa;
1041
+ if (topa == buf->first)
1042
+ topa = buf->last;
1043
+ else
1044
+ topa = list_prev_entry(topa, list);
1045
+
1046
+ tp = topa_to_page(topa);
1047
+
1048
+ return &tp->table[topa->last - 1];
9031049 }
9041050
9051051 /**
....@@ -922,22 +1068,28 @@
9221068 unsigned long head = local64_read(&buf->head);
9231069 unsigned long idx, npages, wakeup;
9241070
1071
+ if (buf->single)
1072
+ return 0;
1073
+
9251074 /* can't stop in the middle of an output region */
926
- if (buf->output_off + handle->size + 1 <
927
- sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size)) {
1075
+ if (buf->output_off + handle->size + 1 < pt_buffer_region_size(buf)) {
9281076 perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
9291077 return -EINVAL;
9301078 }
9311079
9321080
9331081 /* single entry ToPA is handled by marking all regions STOP=1 INT=1 */
934
- if (!pt_cap_get(PT_CAP_topa_multiple_entries))
1082
+ if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries))
9351083 return 0;
9361084
9371085 /* clear STOP and INT from current entry */
938
- buf->topa_index[buf->stop_pos]->stop = 0;
939
- buf->topa_index[buf->stop_pos]->intr = 0;
940
- buf->topa_index[buf->intr_pos]->intr = 0;
1086
+ if (buf->stop_te) {
1087
+ buf->stop_te->stop = 0;
1088
+ buf->stop_te->intr = 0;
1089
+ }
1090
+
1091
+ if (buf->intr_te)
1092
+ buf->intr_te->intr = 0;
9411093
9421094 /* how many pages till the STOP marker */
9431095 npages = handle->size >> PAGE_SHIFT;
....@@ -948,7 +1100,12 @@
9481100
9491101 idx = (head >> PAGE_SHIFT) + npages;
9501102 idx &= buf->nr_pages - 1;
951
- buf->stop_pos = idx;
1103
+
1104
+ if (idx != buf->stop_pos) {
1105
+ buf->stop_pos = idx;
1106
+ buf->stop_te = pt_topa_entry_for_page(buf, idx);
1107
+ buf->stop_te = pt_topa_prev_entry(buf, buf->stop_te);
1108
+ }
9521109
9531110 wakeup = handle->wakeup >> PAGE_SHIFT;
9541111
....@@ -958,48 +1115,17 @@
9581115 idx = wakeup;
9591116
9601117 idx &= buf->nr_pages - 1;
961
- buf->intr_pos = idx;
962
-
963
- buf->topa_index[buf->stop_pos]->stop = 1;
964
- buf->topa_index[buf->stop_pos]->intr = 1;
965
- buf->topa_index[buf->intr_pos]->intr = 1;
966
-
967
- return 0;
968
-}
969
-
970
-/**
971
- * pt_buffer_setup_topa_index() - build topa_index[] table of regions
972
- * @buf: PT buffer.
973
- *
974
- * topa_index[] references output regions indexed by offset into the
975
- * buffer for purposes of quick reverse lookup.
976
- */
977
-static void pt_buffer_setup_topa_index(struct pt_buffer *buf)
978
-{
979
- struct topa *cur = buf->first, *prev = buf->last;
980
- struct topa_entry *te_cur = TOPA_ENTRY(cur, 0),
981
- *te_prev = TOPA_ENTRY(prev, prev->last - 1);
982
- int pg = 0, idx = 0;
983
-
984
- while (pg < buf->nr_pages) {
985
- int tidx;
986
-
987
- /* pages within one topa entry */
988
- for (tidx = 0; tidx < 1 << te_cur->size; tidx++, pg++)
989
- buf->topa_index[pg] = te_prev;
990
-
991
- te_prev = te_cur;
992
-
993
- if (idx == cur->last - 1) {
994
- /* advance to next topa table */
995
- idx = 0;
996
- cur = list_entry(cur->list.next, struct topa, list);
997
- } else {
998
- idx++;
999
- }
1000
- te_cur = TOPA_ENTRY(cur, idx);
1118
+ if (idx != buf->intr_pos) {
1119
+ buf->intr_pos = idx;
1120
+ buf->intr_te = pt_topa_entry_for_page(buf, idx);
1121
+ buf->intr_te = pt_topa_prev_entry(buf, buf->intr_te);
10011122 }
10021123
1124
+ buf->stop_te->stop = 1;
1125
+ buf->stop_te->intr = 1;
1126
+ buf->intr_te->intr = 1;
1127
+
1128
+ return 0;
10031129 }
10041130
10051131 /**
....@@ -1019,18 +1145,24 @@
10191145 */
10201146 static void pt_buffer_reset_offsets(struct pt_buffer *buf, unsigned long head)
10211147 {
1148
+ struct topa_page *cur_tp;
1149
+ struct topa_entry *te;
10221150 int pg;
10231151
10241152 if (buf->snapshot)
10251153 head &= (buf->nr_pages << PAGE_SHIFT) - 1;
10261154
1027
- pg = (head >> PAGE_SHIFT) & (buf->nr_pages - 1);
1028
- pg = pt_topa_next_entry(buf, pg);
1155
+ if (!buf->single) {
1156
+ pg = (head >> PAGE_SHIFT) & (buf->nr_pages - 1);
1157
+ te = pt_topa_entry_for_page(buf, pg);
10291158
1030
- buf->cur = (struct topa *)((unsigned long)buf->topa_index[pg] & PAGE_MASK);
1031
- buf->cur_idx = ((unsigned long)buf->topa_index[pg] -
1032
- (unsigned long)buf->cur) / sizeof(struct topa_entry);
1033
- buf->output_off = head & (sizes(buf->cur->table[buf->cur_idx].size) - 1);
1159
+ cur_tp = topa_entry_to_page(te);
1160
+ buf->cur = &cur_tp->topa;
1161
+ buf->cur_idx = te - TOPA_ENTRY(buf->cur, 0);
1162
+ buf->output_off = head & (pt_buffer_region_size(buf) - 1);
1163
+ } else {
1164
+ buf->output_off = head;
1165
+ }
10341166
10351167 local64_set(&buf->head, head);
10361168 local_set(&buf->data_size, 0);
....@@ -1043,6 +1175,9 @@
10431175 static void pt_buffer_fini_topa(struct pt_buffer *buf)
10441176 {
10451177 struct topa *topa, *iter;
1178
+
1179
+ if (buf->single)
1180
+ return;
10461181
10471182 list_for_each_entry_safe(topa, iter, &buf->tables, list) {
10481183 /*
....@@ -1059,36 +1194,73 @@
10591194 * @size: Total size of all regions within this ToPA.
10601195 * @gfp: Allocation flags.
10611196 */
1062
-static int pt_buffer_init_topa(struct pt_buffer *buf, unsigned long nr_pages,
1063
- gfp_t gfp)
1197
+static int pt_buffer_init_topa(struct pt_buffer *buf, int cpu,
1198
+ unsigned long nr_pages, gfp_t gfp)
10641199 {
10651200 struct topa *topa;
10661201 int err;
10671202
1068
- topa = topa_alloc(buf->cpu, gfp);
1203
+ topa = topa_alloc(cpu, gfp);
10691204 if (!topa)
10701205 return -ENOMEM;
10711206
10721207 topa_insert_table(buf, topa);
10731208
10741209 while (buf->nr_pages < nr_pages) {
1075
- err = topa_insert_pages(buf, gfp);
1210
+ err = topa_insert_pages(buf, cpu, gfp);
10761211 if (err) {
10771212 pt_buffer_fini_topa(buf);
10781213 return -ENOMEM;
10791214 }
10801215 }
10811216
1082
- pt_buffer_setup_topa_index(buf);
1083
-
10841217 /* link last table to the first one, unless we're double buffering */
1085
- if (pt_cap_get(PT_CAP_topa_multiple_entries)) {
1086
- TOPA_ENTRY(buf->last, -1)->base = buf->first->phys >> TOPA_SHIFT;
1218
+ if (intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries)) {
1219
+ TOPA_ENTRY(buf->last, -1)->base = topa_pfn(buf->first);
10871220 TOPA_ENTRY(buf->last, -1)->end = 1;
10881221 }
10891222
10901223 pt_topa_dump(buf);
10911224 return 0;
1225
+}
1226
+
1227
+static int pt_buffer_try_single(struct pt_buffer *buf, int nr_pages)
1228
+{
1229
+ struct page *p = virt_to_page(buf->data_pages[0]);
1230
+ int ret = -ENOTSUPP, order = 0;
1231
+
1232
+ /*
1233
+ * We can use single range output mode
1234
+ * + in snapshot mode, where we don't need interrupts;
1235
+ * + if the hardware supports it;
1236
+ * + if the entire buffer is one contiguous allocation.
1237
+ */
1238
+ if (!buf->snapshot)
1239
+ goto out;
1240
+
1241
+ if (!intel_pt_validate_hw_cap(PT_CAP_single_range_output))
1242
+ goto out;
1243
+
1244
+ if (PagePrivate(p))
1245
+ order = page_private(p);
1246
+
1247
+ if (1 << order != nr_pages)
1248
+ goto out;
1249
+
1250
+ /*
1251
+ * Some processors cannot always support single range for more than
1252
+ * 4KB - refer errata TGL052, ADL037 and RPL017. Future processors might
1253
+ * also be affected, so for now rather than trying to keep track of
1254
+ * which ones, just disable it for all.
1255
+ */
1256
+ if (nr_pages > 1)
1257
+ goto out;
1258
+
1259
+ buf->single = true;
1260
+ buf->nr_pages = nr_pages;
1261
+ ret = 0;
1262
+out:
1263
+ return ret;
10921264 }
10931265
10941266 /**
....@@ -1113,22 +1285,33 @@
11131285 if (!nr_pages)
11141286 return NULL;
11151287
1288
+ /*
1289
+ * Only support AUX sampling in snapshot mode, where we don't
1290
+ * generate NMIs.
1291
+ */
1292
+ if (event->attr.aux_sample_size && !snapshot)
1293
+ return NULL;
1294
+
11161295 if (cpu == -1)
11171296 cpu = raw_smp_processor_id();
11181297 node = cpu_to_node(cpu);
11191298
1120
- buf = kzalloc_node(offsetof(struct pt_buffer, topa_index[nr_pages]),
1121
- GFP_KERNEL, node);
1299
+ buf = kzalloc_node(sizeof(struct pt_buffer), GFP_KERNEL, node);
11221300 if (!buf)
11231301 return NULL;
11241302
1125
- buf->cpu = cpu;
11261303 buf->snapshot = snapshot;
11271304 buf->data_pages = pages;
1305
+ buf->stop_pos = -1;
1306
+ buf->intr_pos = -1;
11281307
11291308 INIT_LIST_HEAD(&buf->tables);
11301309
1131
- ret = pt_buffer_init_topa(buf, nr_pages, GFP_KERNEL);
1310
+ ret = pt_buffer_try_single(buf, nr_pages);
1311
+ if (!ret)
1312
+ return buf;
1313
+
1314
+ ret = pt_buffer_init_topa(buf, cpu, nr_pages, GFP_KERNEL);
11321315 if (ret) {
11331316 kfree(buf);
11341317 return NULL;
....@@ -1154,7 +1337,7 @@
11541337 struct pt_filters *filters;
11551338 int node = event->cpu == -1 ? -1 : cpu_to_node(event->cpu);
11561339
1157
- if (!pt_cap_get(PT_CAP_num_address_ranges))
1340
+ if (!intel_pt_validate_hw_cap(PT_CAP_num_address_ranges))
11581341 return 0;
11591342
11601343 filters = kzalloc_node(sizeof(struct pt_filters), GFP_KERNEL, node);
....@@ -1176,10 +1359,36 @@
11761359 event->hw.addr_filters = NULL;
11771360 }
11781361
1179
-static inline bool valid_kernel_ip(unsigned long ip)
1362
+#ifdef CONFIG_X86_64
1363
+static u64 canonical_address(u64 vaddr, u8 vaddr_bits)
11801364 {
1181
- return virt_addr_valid(ip) && kernel_ip(ip);
1365
+ return ((s64)vaddr << (64 - vaddr_bits)) >> (64 - vaddr_bits);
11821366 }
1367
+
1368
+static u64 is_canonical_address(u64 vaddr, u8 vaddr_bits)
1369
+{
1370
+ return canonical_address(vaddr, vaddr_bits) == vaddr;
1371
+}
1372
+
1373
+/* Clamp to a canonical address greater-than-or-equal-to the address given */
1374
+static u64 clamp_to_ge_canonical_addr(u64 vaddr, u8 vaddr_bits)
1375
+{
1376
+ return is_canonical_address(vaddr, vaddr_bits) ?
1377
+ vaddr :
1378
+ -BIT_ULL(vaddr_bits - 1);
1379
+}
1380
+
1381
+/* Clamp to a canonical address less-than-or-equal-to the address given */
1382
+static u64 clamp_to_le_canonical_addr(u64 vaddr, u8 vaddr_bits)
1383
+{
1384
+ return is_canonical_address(vaddr, vaddr_bits) ?
1385
+ vaddr :
1386
+ BIT_ULL(vaddr_bits - 1) - 1;
1387
+}
1388
+#else
1389
+#define clamp_to_ge_canonical_addr(x, y) (x)
1390
+#define clamp_to_le_canonical_addr(x, y) (x)
1391
+#endif
11831392
11841393 static int pt_event_addr_filters_validate(struct list_head *filters)
11851394 {
....@@ -1195,15 +1404,7 @@
11951404 filter->action == PERF_ADDR_FILTER_ACTION_START)
11961405 return -EOPNOTSUPP;
11971406
1198
- if (!filter->path.dentry) {
1199
- if (!valid_kernel_ip(filter->offset))
1200
- return -EINVAL;
1201
-
1202
- if (!valid_kernel_ip(filter->offset + filter->size))
1203
- return -EINVAL;
1204
- }
1205
-
1206
- if (++range > pt_cap_get(PT_CAP_num_address_ranges))
1407
+ if (++range > intel_pt_validate_hw_cap(PT_CAP_num_address_ranges))
12071408 return -EOPNOTSUPP;
12081409 }
12091410
....@@ -1226,9 +1427,26 @@
12261427 if (filter->path.dentry && !fr[range].start) {
12271428 msr_a = msr_b = 0;
12281429 } else {
1229
- /* apply the offset */
1230
- msr_a = fr[range].start;
1231
- msr_b = msr_a + fr[range].size - 1;
1430
+ unsigned long n = fr[range].size - 1;
1431
+ unsigned long a = fr[range].start;
1432
+ unsigned long b;
1433
+
1434
+ if (a > ULONG_MAX - n)
1435
+ b = ULONG_MAX;
1436
+ else
1437
+ b = a + n;
1438
+ /*
1439
+ * Apply the offset. 64-bit addresses written to the
1440
+ * MSRs must be canonical, but the range can encompass
1441
+ * non-canonical addresses. Since software cannot
1442
+ * execute at non-canonical addresses, adjusting to
1443
+ * canonical addresses does not affect the result of the
1444
+ * address filter.
1445
+ */
1446
+ msr_a = clamp_to_ge_canonical_addr(a, boot_cpu_data.x86_virt_bits);
1447
+ msr_b = clamp_to_le_canonical_addr(b, boot_cpu_data.x86_virt_bits);
1448
+ if (msr_b < msr_a)
1449
+ msr_a = msr_b = 0;
12321450 }
12331451
12341452 filters->filter[range].msr_a = msr_a;
....@@ -1294,9 +1512,8 @@
12941512 return;
12951513 }
12961514
1297
- pt_config_buffer(buf->cur->table, buf->cur_idx,
1298
- buf->output_off);
1299
- pt_config(event);
1515
+ pt_config_buffer(buf);
1516
+ pt_config_start(event);
13001517 }
13011518 }
13021519
....@@ -1359,8 +1576,7 @@
13591576 WRITE_ONCE(pt->handle_nmi, 1);
13601577 hwc->state = 0;
13611578
1362
- pt_config_buffer(buf->cur->table, buf->cur_idx,
1363
- buf->output_off);
1579
+ pt_config_buffer(buf);
13641580 pt_config(event);
13651581
13661582 return;
....@@ -1409,6 +1625,52 @@
14091625 buf->nr_pages << PAGE_SHIFT);
14101626 perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0));
14111627 }
1628
+}
1629
+
1630
+static long pt_event_snapshot_aux(struct perf_event *event,
1631
+ struct perf_output_handle *handle,
1632
+ unsigned long size)
1633
+{
1634
+ struct pt *pt = this_cpu_ptr(&pt_ctx);
1635
+ struct pt_buffer *buf = perf_get_aux(&pt->handle);
1636
+ unsigned long from = 0, to;
1637
+ long ret;
1638
+
1639
+ if (WARN_ON_ONCE(!buf))
1640
+ return 0;
1641
+
1642
+ /*
1643
+ * Sampling is only allowed on snapshot events;
1644
+ * see pt_buffer_setup_aux().
1645
+ */
1646
+ if (WARN_ON_ONCE(!buf->snapshot))
1647
+ return 0;
1648
+
1649
+ /*
1650
+ * Here, handle_nmi tells us if the tracing is on
1651
+ */
1652
+ if (READ_ONCE(pt->handle_nmi))
1653
+ pt_config_stop(event);
1654
+
1655
+ pt_read_offset(buf);
1656
+ pt_update_head(pt);
1657
+
1658
+ to = local_read(&buf->data_size);
1659
+ if (to < size)
1660
+ from = buf->nr_pages << PAGE_SHIFT;
1661
+ from += to - size;
1662
+
1663
+ ret = perf_output_copy_aux(&pt->handle, handle, from, to);
1664
+
1665
+ /*
1666
+ * If the tracing was on when we turned up, restart it.
1667
+ * Compiler barrier not needed as we couldn't have been
1668
+ * preempted by anything that touches pt->handle_nmi.
1669
+ */
1670
+ if (pt->handle_nmi)
1671
+ pt_config_start(event);
1672
+
1673
+ return ret;
14121674 }
14131675
14141676 static void pt_event_del(struct perf_event *event, int mode)
....@@ -1479,6 +1741,11 @@
14791741 pt_event_stop(pt->handle.event, PERF_EF_UPDATE);
14801742 }
14811743
1744
+int is_intel_pt_event(struct perf_event *event)
1745
+{
1746
+ return event->pmu == &pt_pmu.pmu;
1747
+}
1748
+
14821749 static __init int pt_init(void)
14831750 {
14841751 int ret, cpu, prior_warn = 0;
....@@ -1509,14 +1776,13 @@
15091776 if (ret)
15101777 return ret;
15111778
1512
- if (!pt_cap_get(PT_CAP_topa_output)) {
1779
+ if (!intel_pt_validate_hw_cap(PT_CAP_topa_output)) {
15131780 pr_warn("ToPA output is not supported on this CPU\n");
15141781 return -ENODEV;
15151782 }
15161783
1517
- if (!pt_cap_get(PT_CAP_topa_multiple_entries))
1518
- pt_pmu.pmu.capabilities =
1519
- PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_AUX_SW_DOUBLEBUF;
1784
+ if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries))
1785
+ pt_pmu.pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG;
15201786
15211787 pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE;
15221788 pt_pmu.pmu.attr_groups = pt_attr_groups;
....@@ -1526,13 +1792,14 @@
15261792 pt_pmu.pmu.del = pt_event_del;
15271793 pt_pmu.pmu.start = pt_event_start;
15281794 pt_pmu.pmu.stop = pt_event_stop;
1795
+ pt_pmu.pmu.snapshot_aux = pt_event_snapshot_aux;
15291796 pt_pmu.pmu.read = pt_event_read;
15301797 pt_pmu.pmu.setup_aux = pt_buffer_setup_aux;
15311798 pt_pmu.pmu.free_aux = pt_buffer_free_aux;
15321799 pt_pmu.pmu.addr_filters_sync = pt_event_addr_filters_sync;
15331800 pt_pmu.pmu.addr_filters_validate = pt_event_addr_filters_validate;
15341801 pt_pmu.pmu.nr_addr_filters =
1535
- pt_cap_get(PT_CAP_num_address_ranges);
1802
+ intel_pt_validate_hw_cap(PT_CAP_num_address_ranges);
15361803
15371804 ret = perf_pmu_register(&pt_pmu.pmu, "intel_pt", -1);
15381805