.. | .. |
---|
7 | 7 | #include <asm/perf_event.h> |
---|
8 | 8 | #include <asm/tlbflush.h> |
---|
9 | 9 | #include <asm/insn.h> |
---|
| 10 | +#include <asm/io.h> |
---|
10 | 11 | |
---|
11 | 12 | #include "../perf_event.h" |
---|
12 | 13 | |
---|
.. | .. |
---|
337 | 338 | struct debug_store *ds = hwev->ds; |
---|
338 | 339 | size_t bsiz = x86_pmu.pebs_buffer_size; |
---|
339 | 340 | int max, node = cpu_to_node(cpu); |
---|
340 | | - void *buffer, *ibuffer, *cea; |
---|
| 341 | + void *buffer, *insn_buff, *cea; |
---|
341 | 342 | |
---|
342 | 343 | if (!x86_pmu.pebs) |
---|
343 | 344 | return 0; |
---|
.. | .. |
---|
351 | 352 | * buffer then. |
---|
352 | 353 | */ |
---|
353 | 354 | if (x86_pmu.intel_cap.pebs_format < 2) { |
---|
354 | | - ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node); |
---|
355 | | - if (!ibuffer) { |
---|
| 355 | + insn_buff = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node); |
---|
| 356 | + if (!insn_buff) { |
---|
356 | 357 | dsfree_pages(buffer, bsiz); |
---|
357 | 358 | return -ENOMEM; |
---|
358 | 359 | } |
---|
359 | | - per_cpu(insn_buffer, cpu) = ibuffer; |
---|
| 360 | + per_cpu(insn_buffer, cpu) = insn_buff; |
---|
360 | 361 | } |
---|
361 | 362 | hwev->ds_pebs_vaddr = buffer; |
---|
362 | 363 | /* Update the cpu entry area mapping */ |
---|
.. | .. |
---|
641 | 642 | rcu_read_lock(); |
---|
642 | 643 | perf_prepare_sample(&header, &data, event, ®s); |
---|
643 | 644 | |
---|
644 | | - if (perf_output_begin(&handle, event, header.size * |
---|
645 | | - (top - base - skip))) |
---|
| 645 | + if (perf_output_begin(&handle, &data, event, |
---|
| 646 | + header.size * (top - base - skip))) |
---|
646 | 647 | goto unlock; |
---|
647 | 648 | |
---|
648 | 649 | for (at = base; at < top; at++) { |
---|
.. | .. |
---|
669 | 670 | |
---|
670 | 671 | static inline void intel_pmu_drain_pebs_buffer(void) |
---|
671 | 672 | { |
---|
672 | | - struct pt_regs regs; |
---|
| 673 | + struct perf_sample_data data; |
---|
673 | 674 | |
---|
674 | | - x86_pmu.drain_pebs(®s); |
---|
| 675 | + x86_pmu.drain_pebs(NULL, &data); |
---|
675 | 676 | } |
---|
676 | 677 | |
---|
677 | 678 | /* |
---|
.. | .. |
---|
849 | 850 | EVENT_CONSTRAINT_END |
---|
850 | 851 | }; |
---|
851 | 852 | |
---|
| 853 | +struct event_constraint intel_icl_pebs_event_constraints[] = { |
---|
| 854 | + INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */ |
---|
| 855 | + INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL), /* SLOTS */ |
---|
| 856 | + |
---|
| 857 | + INTEL_PLD_CONSTRAINT(0x1cd, 0xff), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ |
---|
| 858 | + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */ |
---|
| 859 | + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */ |
---|
| 860 | + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */ |
---|
| 861 | + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */ |
---|
| 862 | + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */ |
---|
| 863 | + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */ |
---|
| 864 | + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */ |
---|
| 865 | + |
---|
| 866 | + INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf), /* MEM_LOAD_*_RETIRED.* */ |
---|
| 867 | + |
---|
| 868 | + INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_INST_RETIRED.* */ |
---|
| 869 | + |
---|
| 870 | + /* |
---|
| 871 | + * Everything else is handled by PMU_FL_PEBS_ALL, because we |
---|
| 872 | + * need the full constraints from the main table. |
---|
| 873 | + */ |
---|
| 874 | + |
---|
| 875 | + EVENT_CONSTRAINT_END |
---|
| 876 | +}; |
---|
| 877 | + |
---|
852 | 878 | struct event_constraint *intel_pebs_constraints(struct perf_event *event) |
---|
853 | 879 | { |
---|
854 | 880 | struct event_constraint *c; |
---|
.. | .. |
---|
858 | 884 | |
---|
859 | 885 | if (x86_pmu.pebs_constraints) { |
---|
860 | 886 | for_each_event_constraint(c, x86_pmu.pebs_constraints) { |
---|
861 | | - if ((event->hw.config & c->cmask) == c->code) { |
---|
| 887 | + if (constraint_match(c, event->hw.config)) { |
---|
862 | 888 | event->hw.flags |= c->flags; |
---|
863 | 889 | return c; |
---|
864 | 890 | } |
---|
.. | .. |
---|
882 | 908 | */ |
---|
883 | 909 | static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc) |
---|
884 | 910 | { |
---|
| 911 | + if (cpuc->n_pebs == cpuc->n_pebs_via_pt) |
---|
| 912 | + return false; |
---|
| 913 | + |
---|
885 | 914 | return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs); |
---|
886 | 915 | } |
---|
887 | 916 | |
---|
.. | .. |
---|
899 | 928 | u64 threshold; |
---|
900 | 929 | int reserved; |
---|
901 | 930 | |
---|
| 931 | + if (cpuc->n_pebs_via_pt) |
---|
| 932 | + return; |
---|
| 933 | + |
---|
902 | 934 | if (x86_pmu.flags & PMU_FL_PEBS_ALL) |
---|
903 | 935 | reserved = x86_pmu.max_pebs_events + x86_pmu.num_counters_fixed; |
---|
904 | 936 | else |
---|
.. | .. |
---|
906 | 938 | |
---|
907 | 939 | if (cpuc->n_pebs == cpuc->n_large_pebs) { |
---|
908 | 940 | threshold = ds->pebs_absolute_maximum - |
---|
909 | | - reserved * x86_pmu.pebs_record_size; |
---|
| 941 | + reserved * cpuc->pebs_record_size; |
---|
910 | 942 | } else { |
---|
911 | | - threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size; |
---|
| 943 | + threshold = ds->pebs_buffer_base + cpuc->pebs_record_size; |
---|
912 | 944 | } |
---|
913 | 945 | |
---|
914 | 946 | ds->pebs_interrupt_threshold = threshold; |
---|
915 | 947 | } |
---|
916 | 948 | |
---|
917 | | -static void |
---|
918 | | -pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc, struct pmu *pmu) |
---|
| 949 | +static void adaptive_pebs_record_size_update(void) |
---|
919 | 950 | { |
---|
| 951 | + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
---|
| 952 | + u64 pebs_data_cfg = cpuc->pebs_data_cfg; |
---|
| 953 | + int sz = sizeof(struct pebs_basic); |
---|
| 954 | + |
---|
| 955 | + if (pebs_data_cfg & PEBS_DATACFG_MEMINFO) |
---|
| 956 | + sz += sizeof(struct pebs_meminfo); |
---|
| 957 | + if (pebs_data_cfg & PEBS_DATACFG_GP) |
---|
| 958 | + sz += sizeof(struct pebs_gprs); |
---|
| 959 | + if (pebs_data_cfg & PEBS_DATACFG_XMMS) |
---|
| 960 | + sz += sizeof(struct pebs_xmm); |
---|
| 961 | + if (pebs_data_cfg & PEBS_DATACFG_LBRS) |
---|
| 962 | + sz += x86_pmu.lbr_nr * sizeof(struct lbr_entry); |
---|
| 963 | + |
---|
| 964 | + cpuc->pebs_record_size = sz; |
---|
| 965 | +} |
---|
| 966 | + |
---|
| 967 | +#define PERF_PEBS_MEMINFO_TYPE (PERF_SAMPLE_ADDR | PERF_SAMPLE_DATA_SRC | \ |
---|
| 968 | + PERF_SAMPLE_PHYS_ADDR | PERF_SAMPLE_WEIGHT | \ |
---|
| 969 | + PERF_SAMPLE_TRANSACTION) |
---|
| 970 | + |
---|
| 971 | +static u64 pebs_update_adaptive_cfg(struct perf_event *event) |
---|
| 972 | +{ |
---|
| 973 | + struct perf_event_attr *attr = &event->attr; |
---|
| 974 | + u64 sample_type = attr->sample_type; |
---|
| 975 | + u64 pebs_data_cfg = 0; |
---|
| 976 | + bool gprs, tsx_weight; |
---|
| 977 | + |
---|
| 978 | + if (!(sample_type & ~(PERF_SAMPLE_IP|PERF_SAMPLE_TIME)) && |
---|
| 979 | + attr->precise_ip > 1) |
---|
| 980 | + return pebs_data_cfg; |
---|
| 981 | + |
---|
| 982 | + if (sample_type & PERF_PEBS_MEMINFO_TYPE) |
---|
| 983 | + pebs_data_cfg |= PEBS_DATACFG_MEMINFO; |
---|
| 984 | + |
---|
| 985 | + /* |
---|
| 986 | + * We need GPRs when: |
---|
| 987 | + * + user requested them |
---|
| 988 | + * + precise_ip < 2 for the non event IP |
---|
| 989 | + * + For RTM TSX weight we need GPRs for the abort code. |
---|
| 990 | + */ |
---|
| 991 | + gprs = (sample_type & PERF_SAMPLE_REGS_INTR) && |
---|
| 992 | + (attr->sample_regs_intr & PEBS_GP_REGS); |
---|
| 993 | + |
---|
| 994 | + tsx_weight = (sample_type & PERF_SAMPLE_WEIGHT) && |
---|
| 995 | + ((attr->config & INTEL_ARCH_EVENT_MASK) == |
---|
| 996 | + x86_pmu.rtm_abort_event); |
---|
| 997 | + |
---|
| 998 | + if (gprs || (attr->precise_ip < 2) || tsx_weight) |
---|
| 999 | + pebs_data_cfg |= PEBS_DATACFG_GP; |
---|
| 1000 | + |
---|
| 1001 | + if ((sample_type & PERF_SAMPLE_REGS_INTR) && |
---|
| 1002 | + (attr->sample_regs_intr & PERF_REG_EXTENDED_MASK)) |
---|
| 1003 | + pebs_data_cfg |= PEBS_DATACFG_XMMS; |
---|
| 1004 | + |
---|
| 1005 | + if (sample_type & PERF_SAMPLE_BRANCH_STACK) { |
---|
| 1006 | + /* |
---|
| 1007 | + * For now always log all LBRs. Could configure this |
---|
| 1008 | + * later. |
---|
| 1009 | + */ |
---|
| 1010 | + pebs_data_cfg |= PEBS_DATACFG_LBRS | |
---|
| 1011 | + ((x86_pmu.lbr_nr-1) << PEBS_DATACFG_LBR_SHIFT); |
---|
| 1012 | + } |
---|
| 1013 | + |
---|
| 1014 | + return pebs_data_cfg; |
---|
| 1015 | +} |
---|
| 1016 | + |
---|
| 1017 | +static void |
---|
| 1018 | +pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc, |
---|
| 1019 | + struct perf_event *event, bool add) |
---|
| 1020 | +{ |
---|
| 1021 | + struct pmu *pmu = event->ctx->pmu; |
---|
920 | 1022 | /* |
---|
921 | 1023 | * Make sure we get updated with the first PEBS |
---|
922 | 1024 | * event. It will trigger also during removal, but |
---|
.. | .. |
---|
933 | 1035 | update = true; |
---|
934 | 1036 | } |
---|
935 | 1037 | |
---|
| 1038 | + /* |
---|
| 1039 | + * The PEBS record doesn't shrink on pmu::del(). Doing so would require |
---|
| 1040 | + * iterating all remaining PEBS events to reconstruct the config. |
---|
| 1041 | + */ |
---|
| 1042 | + if (x86_pmu.intel_cap.pebs_baseline && add) { |
---|
| 1043 | + u64 pebs_data_cfg; |
---|
| 1044 | + |
---|
| 1045 | + /* Clear pebs_data_cfg and pebs_record_size for first PEBS. */ |
---|
| 1046 | + if (cpuc->n_pebs == 1) { |
---|
| 1047 | + cpuc->pebs_data_cfg = 0; |
---|
| 1048 | + cpuc->pebs_record_size = sizeof(struct pebs_basic); |
---|
| 1049 | + } |
---|
| 1050 | + |
---|
| 1051 | + pebs_data_cfg = pebs_update_adaptive_cfg(event); |
---|
| 1052 | + |
---|
| 1053 | + /* Update pebs_record_size if new event requires more data. */ |
---|
| 1054 | + if (pebs_data_cfg & ~cpuc->pebs_data_cfg) { |
---|
| 1055 | + cpuc->pebs_data_cfg |= pebs_data_cfg; |
---|
| 1056 | + adaptive_pebs_record_size_update(); |
---|
| 1057 | + update = true; |
---|
| 1058 | + } |
---|
| 1059 | + } |
---|
| 1060 | + |
---|
936 | 1061 | if (update) |
---|
937 | 1062 | pebs_update_threshold(cpuc); |
---|
938 | 1063 | } |
---|
.. | .. |
---|
946 | 1071 | cpuc->n_pebs++; |
---|
947 | 1072 | if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS) |
---|
948 | 1073 | cpuc->n_large_pebs++; |
---|
| 1074 | + if (hwc->flags & PERF_X86_EVENT_PEBS_VIA_PT) |
---|
| 1075 | + cpuc->n_pebs_via_pt++; |
---|
949 | 1076 | |
---|
950 | | - pebs_update_state(needed_cb, cpuc, event->ctx->pmu); |
---|
| 1077 | + pebs_update_state(needed_cb, cpuc, event, true); |
---|
| 1078 | +} |
---|
| 1079 | + |
---|
| 1080 | +static void intel_pmu_pebs_via_pt_disable(struct perf_event *event) |
---|
| 1081 | +{ |
---|
| 1082 | + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
---|
| 1083 | + |
---|
| 1084 | + if (!is_pebs_pt(event)) |
---|
| 1085 | + return; |
---|
| 1086 | + |
---|
| 1087 | + if (!(cpuc->pebs_enabled & ~PEBS_VIA_PT_MASK)) |
---|
| 1088 | + cpuc->pebs_enabled &= ~PEBS_VIA_PT_MASK; |
---|
| 1089 | +} |
---|
| 1090 | + |
---|
| 1091 | +static void intel_pmu_pebs_via_pt_enable(struct perf_event *event) |
---|
| 1092 | +{ |
---|
| 1093 | + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
---|
| 1094 | + struct hw_perf_event *hwc = &event->hw; |
---|
| 1095 | + struct debug_store *ds = cpuc->ds; |
---|
| 1096 | + |
---|
| 1097 | + if (!is_pebs_pt(event)) |
---|
| 1098 | + return; |
---|
| 1099 | + |
---|
| 1100 | + if (!(event->hw.flags & PERF_X86_EVENT_LARGE_PEBS)) |
---|
| 1101 | + cpuc->pebs_enabled |= PEBS_PMI_AFTER_EACH_RECORD; |
---|
| 1102 | + |
---|
| 1103 | + cpuc->pebs_enabled |= PEBS_OUTPUT_PT; |
---|
| 1104 | + |
---|
| 1105 | + wrmsrl(MSR_RELOAD_PMC0 + hwc->idx, ds->pebs_event_reset[hwc->idx]); |
---|
951 | 1106 | } |
---|
952 | 1107 | |
---|
953 | 1108 | void intel_pmu_pebs_enable(struct perf_event *event) |
---|
.. | .. |
---|
960 | 1115 | |
---|
961 | 1116 | cpuc->pebs_enabled |= 1ULL << hwc->idx; |
---|
962 | 1117 | |
---|
963 | | - if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) |
---|
| 1118 | + if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) && (x86_pmu.version < 5)) |
---|
964 | 1119 | cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32); |
---|
965 | 1120 | else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST) |
---|
966 | 1121 | cpuc->pebs_enabled |= 1ULL << 63; |
---|
| 1122 | + |
---|
| 1123 | + if (x86_pmu.intel_cap.pebs_baseline) { |
---|
| 1124 | + hwc->config |= ICL_EVENTSEL_ADAPTIVE; |
---|
| 1125 | + if (cpuc->pebs_data_cfg != cpuc->active_pebs_data_cfg) { |
---|
| 1126 | + wrmsrl(MSR_PEBS_DATA_CFG, cpuc->pebs_data_cfg); |
---|
| 1127 | + cpuc->active_pebs_data_cfg = cpuc->pebs_data_cfg; |
---|
| 1128 | + } |
---|
| 1129 | + } |
---|
967 | 1130 | |
---|
968 | 1131 | /* |
---|
969 | 1132 | * Use auto-reload if possible to save a MSR write in the PMI. |
---|
.. | .. |
---|
979 | 1142 | } else { |
---|
980 | 1143 | ds->pebs_event_reset[hwc->idx] = 0; |
---|
981 | 1144 | } |
---|
| 1145 | + |
---|
| 1146 | + intel_pmu_pebs_via_pt_enable(event); |
---|
982 | 1147 | } |
---|
983 | 1148 | |
---|
984 | 1149 | void intel_pmu_pebs_del(struct perf_event *event) |
---|
.. | .. |
---|
990 | 1155 | cpuc->n_pebs--; |
---|
991 | 1156 | if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS) |
---|
992 | 1157 | cpuc->n_large_pebs--; |
---|
| 1158 | + if (hwc->flags & PERF_X86_EVENT_PEBS_VIA_PT) |
---|
| 1159 | + cpuc->n_pebs_via_pt--; |
---|
993 | 1160 | |
---|
994 | | - pebs_update_state(needed_cb, cpuc, event->ctx->pmu); |
---|
| 1161 | + pebs_update_state(needed_cb, cpuc, event, false); |
---|
995 | 1162 | } |
---|
996 | 1163 | |
---|
997 | 1164 | void intel_pmu_pebs_disable(struct perf_event *event) |
---|
.. | .. |
---|
999 | 1166 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
---|
1000 | 1167 | struct hw_perf_event *hwc = &event->hw; |
---|
1001 | 1168 | |
---|
1002 | | - if (cpuc->n_pebs == cpuc->n_large_pebs) |
---|
| 1169 | + if (cpuc->n_pebs == cpuc->n_large_pebs && |
---|
| 1170 | + cpuc->n_pebs != cpuc->n_pebs_via_pt) |
---|
1003 | 1171 | intel_pmu_drain_pebs_buffer(); |
---|
1004 | 1172 | |
---|
1005 | 1173 | cpuc->pebs_enabled &= ~(1ULL << hwc->idx); |
---|
1006 | 1174 | |
---|
1007 | | - if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) |
---|
| 1175 | + if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) && |
---|
| 1176 | + (x86_pmu.version < 5)) |
---|
1008 | 1177 | cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32)); |
---|
1009 | 1178 | else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST) |
---|
1010 | 1179 | cpuc->pebs_enabled &= ~(1ULL << 63); |
---|
| 1180 | + |
---|
| 1181 | + intel_pmu_pebs_via_pt_disable(event); |
---|
1011 | 1182 | |
---|
1012 | 1183 | if (cpuc->enabled) |
---|
1013 | 1184 | wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); |
---|
.. | .. |
---|
1125 | 1296 | return 0; |
---|
1126 | 1297 | } |
---|
1127 | 1298 | |
---|
1128 | | -static inline u64 intel_hsw_weight(struct pebs_record_skl *pebs) |
---|
| 1299 | +static inline u64 intel_get_tsx_weight(u64 tsx_tuning) |
---|
1129 | 1300 | { |
---|
1130 | | - if (pebs->tsx_tuning) { |
---|
1131 | | - union hsw_tsx_tuning tsx = { .value = pebs->tsx_tuning }; |
---|
| 1301 | + if (tsx_tuning) { |
---|
| 1302 | + union hsw_tsx_tuning tsx = { .value = tsx_tuning }; |
---|
1132 | 1303 | return tsx.cycles_last_block; |
---|
1133 | 1304 | } |
---|
1134 | 1305 | return 0; |
---|
1135 | 1306 | } |
---|
1136 | 1307 | |
---|
1137 | | -static inline u64 intel_hsw_transaction(struct pebs_record_skl *pebs) |
---|
| 1308 | +static inline u64 intel_get_tsx_transaction(u64 tsx_tuning, u64 ax) |
---|
1138 | 1309 | { |
---|
1139 | | - u64 txn = (pebs->tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32; |
---|
| 1310 | + u64 txn = (tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32; |
---|
1140 | 1311 | |
---|
1141 | 1312 | /* For RTM XABORTs also log the abort code from AX */ |
---|
1142 | | - if ((txn & PERF_TXN_TRANSACTION) && (pebs->ax & 1)) |
---|
1143 | | - txn |= ((pebs->ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT; |
---|
| 1313 | + if ((txn & PERF_TXN_TRANSACTION) && (ax & 1)) |
---|
| 1314 | + txn |= ((ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT; |
---|
1144 | 1315 | return txn; |
---|
1145 | 1316 | } |
---|
1146 | 1317 | |
---|
1147 | | -static void setup_pebs_sample_data(struct perf_event *event, |
---|
1148 | | - struct pt_regs *iregs, void *__pebs, |
---|
1149 | | - struct perf_sample_data *data, |
---|
1150 | | - struct pt_regs *regs) |
---|
| 1318 | +static inline u64 get_pebs_status(void *n) |
---|
1151 | 1319 | { |
---|
| 1320 | + if (x86_pmu.intel_cap.pebs_format < 4) |
---|
| 1321 | + return ((struct pebs_record_nhm *)n)->status; |
---|
| 1322 | + return ((struct pebs_basic *)n)->applicable_counters; |
---|
| 1323 | +} |
---|
| 1324 | + |
---|
1152 | 1325 | #define PERF_X86_EVENT_PEBS_HSW_PREC \ |
---|
1153 | 1326 | (PERF_X86_EVENT_PEBS_ST_HSW | \ |
---|
1154 | 1327 | PERF_X86_EVENT_PEBS_LD_HSW | \ |
---|
1155 | 1328 | PERF_X86_EVENT_PEBS_NA_HSW) |
---|
| 1329 | + |
---|
| 1330 | +static u64 get_data_src(struct perf_event *event, u64 aux) |
---|
| 1331 | +{ |
---|
| 1332 | + u64 val = PERF_MEM_NA; |
---|
| 1333 | + int fl = event->hw.flags; |
---|
| 1334 | + bool fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC); |
---|
| 1335 | + |
---|
| 1336 | + if (fl & PERF_X86_EVENT_PEBS_LDLAT) |
---|
| 1337 | + val = load_latency_data(aux); |
---|
| 1338 | + else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC)) |
---|
| 1339 | + val = precise_datala_hsw(event, aux); |
---|
| 1340 | + else if (fst) |
---|
| 1341 | + val = precise_store_data(aux); |
---|
| 1342 | + return val; |
---|
| 1343 | +} |
---|
| 1344 | + |
---|
| 1345 | +static void setup_pebs_fixed_sample_data(struct perf_event *event, |
---|
| 1346 | + struct pt_regs *iregs, void *__pebs, |
---|
| 1347 | + struct perf_sample_data *data, |
---|
| 1348 | + struct pt_regs *regs) |
---|
| 1349 | +{ |
---|
1156 | 1350 | /* |
---|
1157 | 1351 | * We cast to the biggest pebs_record but are careful not to |
---|
1158 | 1352 | * unconditionally access the 'extra' entries. |
---|
.. | .. |
---|
1160 | 1354 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
---|
1161 | 1355 | struct pebs_record_skl *pebs = __pebs; |
---|
1162 | 1356 | u64 sample_type; |
---|
1163 | | - int fll, fst, dsrc; |
---|
1164 | | - int fl = event->hw.flags; |
---|
| 1357 | + int fll; |
---|
1165 | 1358 | |
---|
1166 | 1359 | if (pebs == NULL) |
---|
1167 | 1360 | return; |
---|
1168 | 1361 | |
---|
1169 | 1362 | sample_type = event->attr.sample_type; |
---|
1170 | | - dsrc = sample_type & PERF_SAMPLE_DATA_SRC; |
---|
1171 | | - |
---|
1172 | | - fll = fl & PERF_X86_EVENT_PEBS_LDLAT; |
---|
1173 | | - fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC); |
---|
| 1363 | + fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT; |
---|
1174 | 1364 | |
---|
1175 | 1365 | perf_sample_data_init(data, 0, event->hw.last_period); |
---|
1176 | 1366 | |
---|
.. | .. |
---|
1185 | 1375 | /* |
---|
1186 | 1376 | * data.data_src encodes the data source |
---|
1187 | 1377 | */ |
---|
1188 | | - if (dsrc) { |
---|
1189 | | - u64 val = PERF_MEM_NA; |
---|
1190 | | - if (fll) |
---|
1191 | | - val = load_latency_data(pebs->dse); |
---|
1192 | | - else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC)) |
---|
1193 | | - val = precise_datala_hsw(event, pebs->dse); |
---|
1194 | | - else if (fst) |
---|
1195 | | - val = precise_store_data(pebs->dse); |
---|
1196 | | - data->data_src.val = val; |
---|
1197 | | - } |
---|
| 1378 | + if (sample_type & PERF_SAMPLE_DATA_SRC) |
---|
| 1379 | + data->data_src.val = get_data_src(event, pebs->dse); |
---|
1198 | 1380 | |
---|
1199 | 1381 | /* |
---|
1200 | 1382 | * We must however always use iregs for the unwinder to stay sane; the |
---|
1201 | 1383 | * record BP,SP,IP can point into thin air when the record is from a |
---|
1202 | | - * previous PMI context or an (I)RET happend between the record and |
---|
| 1384 | + * previous PMI context or an (I)RET happened between the record and |
---|
1203 | 1385 | * PMI. |
---|
1204 | 1386 | */ |
---|
1205 | 1387 | if (sample_type & PERF_SAMPLE_CALLCHAIN) |
---|
.. | .. |
---|
1281 | 1463 | if (x86_pmu.intel_cap.pebs_format >= 2) { |
---|
1282 | 1464 | /* Only set the TSX weight when no memory weight. */ |
---|
1283 | 1465 | if ((sample_type & PERF_SAMPLE_WEIGHT) && !fll) |
---|
1284 | | - data->weight = intel_hsw_weight(pebs); |
---|
| 1466 | + data->weight = intel_get_tsx_weight(pebs->tsx_tuning); |
---|
1285 | 1467 | |
---|
1286 | 1468 | if (sample_type & PERF_SAMPLE_TRANSACTION) |
---|
1287 | | - data->txn = intel_hsw_transaction(pebs); |
---|
| 1469 | + data->txn = intel_get_tsx_transaction(pebs->tsx_tuning, |
---|
| 1470 | + pebs->ax); |
---|
1288 | 1471 | } |
---|
1289 | 1472 | |
---|
1290 | 1473 | /* |
---|
.. | .. |
---|
1299 | 1482 | |
---|
1300 | 1483 | if (has_branch_stack(event)) |
---|
1301 | 1484 | data->br_stack = &cpuc->lbr_stack; |
---|
| 1485 | +} |
---|
| 1486 | + |
---|
| 1487 | +static void adaptive_pebs_save_regs(struct pt_regs *regs, |
---|
| 1488 | + struct pebs_gprs *gprs) |
---|
| 1489 | +{ |
---|
| 1490 | + regs->ax = gprs->ax; |
---|
| 1491 | + regs->bx = gprs->bx; |
---|
| 1492 | + regs->cx = gprs->cx; |
---|
| 1493 | + regs->dx = gprs->dx; |
---|
| 1494 | + regs->si = gprs->si; |
---|
| 1495 | + regs->di = gprs->di; |
---|
| 1496 | + regs->bp = gprs->bp; |
---|
| 1497 | + regs->sp = gprs->sp; |
---|
| 1498 | +#ifndef CONFIG_X86_32 |
---|
| 1499 | + regs->r8 = gprs->r8; |
---|
| 1500 | + regs->r9 = gprs->r9; |
---|
| 1501 | + regs->r10 = gprs->r10; |
---|
| 1502 | + regs->r11 = gprs->r11; |
---|
| 1503 | + regs->r12 = gprs->r12; |
---|
| 1504 | + regs->r13 = gprs->r13; |
---|
| 1505 | + regs->r14 = gprs->r14; |
---|
| 1506 | + regs->r15 = gprs->r15; |
---|
| 1507 | +#endif |
---|
| 1508 | +} |
---|
| 1509 | + |
---|
| 1510 | +/* |
---|
| 1511 | + * With adaptive PEBS the layout depends on what fields are configured. |
---|
| 1512 | + */ |
---|
| 1513 | + |
---|
| 1514 | +static void setup_pebs_adaptive_sample_data(struct perf_event *event, |
---|
| 1515 | + struct pt_regs *iregs, void *__pebs, |
---|
| 1516 | + struct perf_sample_data *data, |
---|
| 1517 | + struct pt_regs *regs) |
---|
| 1518 | +{ |
---|
| 1519 | + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
---|
| 1520 | + struct pebs_basic *basic = __pebs; |
---|
| 1521 | + void *next_record = basic + 1; |
---|
| 1522 | + u64 sample_type; |
---|
| 1523 | + u64 format_size; |
---|
| 1524 | + struct pebs_meminfo *meminfo = NULL; |
---|
| 1525 | + struct pebs_gprs *gprs = NULL; |
---|
| 1526 | + struct x86_perf_regs *perf_regs; |
---|
| 1527 | + |
---|
| 1528 | + if (basic == NULL) |
---|
| 1529 | + return; |
---|
| 1530 | + |
---|
| 1531 | + perf_regs = container_of(regs, struct x86_perf_regs, regs); |
---|
| 1532 | + perf_regs->xmm_regs = NULL; |
---|
| 1533 | + |
---|
| 1534 | + sample_type = event->attr.sample_type; |
---|
| 1535 | + format_size = basic->format_size; |
---|
| 1536 | + perf_sample_data_init(data, 0, event->hw.last_period); |
---|
| 1537 | + data->period = event->hw.last_period; |
---|
| 1538 | + |
---|
| 1539 | + if (event->attr.use_clockid == 0) |
---|
| 1540 | + data->time = native_sched_clock_from_tsc(basic->tsc); |
---|
| 1541 | + |
---|
| 1542 | + /* |
---|
| 1543 | + * We must however always use iregs for the unwinder to stay sane; the |
---|
| 1544 | + * record BP,SP,IP can point into thin air when the record is from a |
---|
| 1545 | + * previous PMI context or an (I)RET happened between the record and |
---|
| 1546 | + * PMI. |
---|
| 1547 | + */ |
---|
| 1548 | + if (sample_type & PERF_SAMPLE_CALLCHAIN) |
---|
| 1549 | + data->callchain = perf_callchain(event, iregs); |
---|
| 1550 | + |
---|
| 1551 | + *regs = *iregs; |
---|
| 1552 | + /* The ip in basic is EventingIP */ |
---|
| 1553 | + set_linear_ip(regs, basic->ip); |
---|
| 1554 | + regs->flags = PERF_EFLAGS_EXACT; |
---|
| 1555 | + |
---|
| 1556 | + /* |
---|
| 1557 | + * The record for MEMINFO is in front of GP |
---|
| 1558 | + * But PERF_SAMPLE_TRANSACTION needs gprs->ax. |
---|
| 1559 | + * Save the pointer here but process later. |
---|
| 1560 | + */ |
---|
| 1561 | + if (format_size & PEBS_DATACFG_MEMINFO) { |
---|
| 1562 | + meminfo = next_record; |
---|
| 1563 | + next_record = meminfo + 1; |
---|
| 1564 | + } |
---|
| 1565 | + |
---|
| 1566 | + if (format_size & PEBS_DATACFG_GP) { |
---|
| 1567 | + gprs = next_record; |
---|
| 1568 | + next_record = gprs + 1; |
---|
| 1569 | + |
---|
| 1570 | + if (event->attr.precise_ip < 2) { |
---|
| 1571 | + set_linear_ip(regs, gprs->ip); |
---|
| 1572 | + regs->flags &= ~PERF_EFLAGS_EXACT; |
---|
| 1573 | + } |
---|
| 1574 | + |
---|
| 1575 | + if (sample_type & PERF_SAMPLE_REGS_INTR) |
---|
| 1576 | + adaptive_pebs_save_regs(regs, gprs); |
---|
| 1577 | + } |
---|
| 1578 | + |
---|
| 1579 | + if (format_size & PEBS_DATACFG_MEMINFO) { |
---|
| 1580 | + if (sample_type & PERF_SAMPLE_WEIGHT) |
---|
| 1581 | + data->weight = meminfo->latency ?: |
---|
| 1582 | + intel_get_tsx_weight(meminfo->tsx_tuning); |
---|
| 1583 | + |
---|
| 1584 | + if (sample_type & PERF_SAMPLE_DATA_SRC) |
---|
| 1585 | + data->data_src.val = get_data_src(event, meminfo->aux); |
---|
| 1586 | + |
---|
| 1587 | + if (sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR)) |
---|
| 1588 | + data->addr = meminfo->address; |
---|
| 1589 | + |
---|
| 1590 | + if (sample_type & PERF_SAMPLE_TRANSACTION) |
---|
| 1591 | + data->txn = intel_get_tsx_transaction(meminfo->tsx_tuning, |
---|
| 1592 | + gprs ? gprs->ax : 0); |
---|
| 1593 | + } |
---|
| 1594 | + |
---|
| 1595 | + if (format_size & PEBS_DATACFG_XMMS) { |
---|
| 1596 | + struct pebs_xmm *xmm = next_record; |
---|
| 1597 | + |
---|
| 1598 | + next_record = xmm + 1; |
---|
| 1599 | + perf_regs->xmm_regs = xmm->xmm; |
---|
| 1600 | + } |
---|
| 1601 | + |
---|
| 1602 | + if (format_size & PEBS_DATACFG_LBRS) { |
---|
| 1603 | + struct lbr_entry *lbr = next_record; |
---|
| 1604 | + int num_lbr = ((format_size >> PEBS_DATACFG_LBR_SHIFT) |
---|
| 1605 | + & 0xff) + 1; |
---|
| 1606 | + next_record = next_record + num_lbr * sizeof(struct lbr_entry); |
---|
| 1607 | + |
---|
| 1608 | + if (has_branch_stack(event)) { |
---|
| 1609 | + intel_pmu_store_pebs_lbrs(lbr); |
---|
| 1610 | + data->br_stack = &cpuc->lbr_stack; |
---|
| 1611 | + } |
---|
| 1612 | + } |
---|
| 1613 | + |
---|
| 1614 | + WARN_ONCE(next_record != __pebs + (format_size >> 48), |
---|
| 1615 | + "PEBS record size %llu, expected %llu, config %llx\n", |
---|
| 1616 | + format_size >> 48, |
---|
| 1617 | + (u64)(next_record - __pebs), |
---|
| 1618 | + basic->format_size); |
---|
1302 | 1619 | } |
---|
1303 | 1620 | |
---|
1304 | 1621 | static inline void * |
---|
.. | .. |
---|
1318 | 1635 | if (base == NULL) |
---|
1319 | 1636 | return NULL; |
---|
1320 | 1637 | |
---|
1321 | | - for (at = base; at < top; at += x86_pmu.pebs_record_size) { |
---|
1322 | | - struct pebs_record_nhm *p = at; |
---|
| 1638 | + for (at = base; at < top; at += cpuc->pebs_record_size) { |
---|
| 1639 | + unsigned long status = get_pebs_status(at); |
---|
1323 | 1640 | |
---|
1324 | | - if (test_bit(bit, (unsigned long *)&p->status)) { |
---|
| 1641 | + if (test_bit(bit, (unsigned long *)&status)) { |
---|
1325 | 1642 | /* PEBS v3 has accurate status bits */ |
---|
1326 | 1643 | if (x86_pmu.intel_cap.pebs_format >= 3) |
---|
1327 | 1644 | return at; |
---|
1328 | 1645 | |
---|
1329 | | - if (p->status == (1 << bit)) |
---|
| 1646 | + if (status == (1 << bit)) |
---|
1330 | 1647 | return at; |
---|
1331 | 1648 | |
---|
1332 | 1649 | /* clear non-PEBS bit and re-check */ |
---|
1333 | | - pebs_status = p->status & cpuc->pebs_enabled; |
---|
| 1650 | + pebs_status = status & cpuc->pebs_enabled; |
---|
1334 | 1651 | pebs_status &= PEBS_COUNTER_MASK; |
---|
1335 | 1652 | if (pebs_status == (1 << bit)) |
---|
1336 | 1653 | return at; |
---|
.. | .. |
---|
1409 | 1726 | return 0; |
---|
1410 | 1727 | } |
---|
1411 | 1728 | |
---|
1412 | | -static void __intel_pmu_pebs_event(struct perf_event *event, |
---|
1413 | | - struct pt_regs *iregs, |
---|
1414 | | - void *base, void *top, |
---|
1415 | | - int bit, int count) |
---|
| 1729 | +static __always_inline void |
---|
| 1730 | +__intel_pmu_pebs_event(struct perf_event *event, |
---|
| 1731 | + struct pt_regs *iregs, |
---|
| 1732 | + struct perf_sample_data *data, |
---|
| 1733 | + void *base, void *top, |
---|
| 1734 | + int bit, int count, |
---|
| 1735 | + void (*setup_sample)(struct perf_event *, |
---|
| 1736 | + struct pt_regs *, |
---|
| 1737 | + void *, |
---|
| 1738 | + struct perf_sample_data *, |
---|
| 1739 | + struct pt_regs *)) |
---|
1416 | 1740 | { |
---|
| 1741 | + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
---|
1417 | 1742 | struct hw_perf_event *hwc = &event->hw; |
---|
1418 | | - struct perf_sample_data data; |
---|
1419 | | - struct pt_regs regs; |
---|
| 1743 | + struct x86_perf_regs perf_regs; |
---|
| 1744 | + struct pt_regs *regs = &perf_regs.regs; |
---|
1420 | 1745 | void *at = get_next_pebs_record_by_bit(base, top, bit); |
---|
| 1746 | + static struct pt_regs dummy_iregs; |
---|
1421 | 1747 | |
---|
1422 | 1748 | if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) { |
---|
1423 | 1749 | /* |
---|
.. | .. |
---|
1430 | 1756 | } else if (!intel_pmu_save_and_restart(event)) |
---|
1431 | 1757 | return; |
---|
1432 | 1758 | |
---|
| 1759 | + if (!iregs) |
---|
| 1760 | + iregs = &dummy_iregs; |
---|
| 1761 | + |
---|
1433 | 1762 | while (count > 1) { |
---|
1434 | | - setup_pebs_sample_data(event, iregs, at, &data, ®s); |
---|
1435 | | - perf_event_output(event, &data, ®s); |
---|
1436 | | - at += x86_pmu.pebs_record_size; |
---|
| 1763 | + setup_sample(event, iregs, at, data, regs); |
---|
| 1764 | + perf_event_output(event, data, regs); |
---|
| 1765 | + at += cpuc->pebs_record_size; |
---|
1437 | 1766 | at = get_next_pebs_record_by_bit(at, top, bit); |
---|
1438 | 1767 | count--; |
---|
1439 | 1768 | } |
---|
1440 | 1769 | |
---|
1441 | | - setup_pebs_sample_data(event, iregs, at, &data, ®s); |
---|
1442 | | - |
---|
1443 | | - /* |
---|
1444 | | - * All but the last records are processed. |
---|
1445 | | - * The last one is left to be able to call the overflow handler. |
---|
1446 | | - */ |
---|
1447 | | - if (perf_event_overflow(event, &data, ®s)) { |
---|
1448 | | - x86_pmu_stop(event, 0); |
---|
1449 | | - return; |
---|
| 1770 | + setup_sample(event, iregs, at, data, regs); |
---|
| 1771 | + if (iregs == &dummy_iregs) { |
---|
| 1772 | + /* |
---|
| 1773 | + * The PEBS records may be drained in the non-overflow context, |
---|
| 1774 | + * e.g., large PEBS + context switch. Perf should treat the |
---|
| 1775 | + * last record the same as other PEBS records, and doesn't |
---|
| 1776 | + * invoke the generic overflow handler. |
---|
| 1777 | + */ |
---|
| 1778 | + perf_event_output(event, data, regs); |
---|
| 1779 | + } else { |
---|
| 1780 | + /* |
---|
| 1781 | + * All but the last records are processed. |
---|
| 1782 | + * The last one is left to be able to call the overflow handler. |
---|
| 1783 | + */ |
---|
| 1784 | + if (perf_event_overflow(event, data, regs)) |
---|
| 1785 | + x86_pmu_stop(event, 0); |
---|
1450 | 1786 | } |
---|
1451 | | - |
---|
1452 | 1787 | } |
---|
1453 | 1788 | |
---|
1454 | | -static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) |
---|
| 1789 | +static void intel_pmu_drain_pebs_core(struct pt_regs *iregs, struct perf_sample_data *data) |
---|
1455 | 1790 | { |
---|
1456 | 1791 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
---|
1457 | 1792 | struct debug_store *ds = cpuc->ds; |
---|
.. | .. |
---|
1485 | 1820 | return; |
---|
1486 | 1821 | } |
---|
1487 | 1822 | |
---|
1488 | | - __intel_pmu_pebs_event(event, iregs, at, top, 0, n); |
---|
| 1823 | + __intel_pmu_pebs_event(event, iregs, data, at, top, 0, n, |
---|
| 1824 | + setup_pebs_fixed_sample_data); |
---|
1489 | 1825 | } |
---|
1490 | 1826 | |
---|
1491 | | -static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) |
---|
| 1827 | +static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, int size) |
---|
| 1828 | +{ |
---|
| 1829 | + struct perf_event *event; |
---|
| 1830 | + int bit; |
---|
| 1831 | + |
---|
| 1832 | + /* |
---|
| 1833 | + * The drain_pebs() could be called twice in a short period |
---|
| 1834 | + * for auto-reload event in pmu::read(). There are no |
---|
| 1835 | + * overflows have happened in between. |
---|
| 1836 | + * It needs to call intel_pmu_save_and_restart_reload() to |
---|
| 1837 | + * update the event->count for this case. |
---|
| 1838 | + */ |
---|
| 1839 | + for_each_set_bit(bit, (unsigned long *)&cpuc->pebs_enabled, size) { |
---|
| 1840 | + event = cpuc->events[bit]; |
---|
| 1841 | + if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD) |
---|
| 1842 | + intel_pmu_save_and_restart_reload(event, 0); |
---|
| 1843 | + } |
---|
| 1844 | +} |
---|
| 1845 | + |
---|
| 1846 | +static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_data *data) |
---|
1492 | 1847 | { |
---|
1493 | 1848 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
---|
1494 | 1849 | struct debug_store *ds = cpuc->ds; |
---|
.. | .. |
---|
1515 | 1870 | } |
---|
1516 | 1871 | |
---|
1517 | 1872 | if (unlikely(base >= top)) { |
---|
1518 | | - /* |
---|
1519 | | - * The drain_pebs() could be called twice in a short period |
---|
1520 | | - * for auto-reload event in pmu::read(). There are no |
---|
1521 | | - * overflows have happened in between. |
---|
1522 | | - * It needs to call intel_pmu_save_and_restart_reload() to |
---|
1523 | | - * update the event->count for this case. |
---|
1524 | | - */ |
---|
1525 | | - for_each_set_bit(bit, (unsigned long *)&cpuc->pebs_enabled, |
---|
1526 | | - size) { |
---|
1527 | | - event = cpuc->events[bit]; |
---|
1528 | | - if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD) |
---|
1529 | | - intel_pmu_save_and_restart_reload(event, 0); |
---|
1530 | | - } |
---|
| 1873 | + intel_pmu_pebs_event_update_no_drain(cpuc, size); |
---|
1531 | 1874 | return; |
---|
1532 | 1875 | } |
---|
1533 | 1876 | |
---|
.. | .. |
---|
1540 | 1883 | |
---|
1541 | 1884 | /* PEBS v3 has more accurate status bits */ |
---|
1542 | 1885 | if (x86_pmu.intel_cap.pebs_format >= 3) { |
---|
1543 | | - for_each_set_bit(bit, (unsigned long *)&pebs_status, |
---|
1544 | | - size) |
---|
| 1886 | + for_each_set_bit(bit, (unsigned long *)&pebs_status, size) |
---|
1545 | 1887 | counts[bit]++; |
---|
1546 | 1888 | |
---|
1547 | 1889 | continue; |
---|
.. | .. |
---|
1579 | 1921 | * that caused the PEBS record. It's called collision. |
---|
1580 | 1922 | * If collision happened, the record will be dropped. |
---|
1581 | 1923 | */ |
---|
1582 | | - if (p->status != (1ULL << bit)) { |
---|
1583 | | - for_each_set_bit(i, (unsigned long *)&pebs_status, |
---|
1584 | | - x86_pmu.max_pebs_events) |
---|
| 1924 | + if (pebs_status != (1ULL << bit)) { |
---|
| 1925 | + for_each_set_bit(i, (unsigned long *)&pebs_status, size) |
---|
1585 | 1926 | error[i]++; |
---|
1586 | 1927 | continue; |
---|
1587 | 1928 | } |
---|
.. | .. |
---|
1589 | 1930 | counts[bit]++; |
---|
1590 | 1931 | } |
---|
1591 | 1932 | |
---|
1592 | | - for (bit = 0; bit < size; bit++) { |
---|
| 1933 | + for_each_set_bit(bit, (unsigned long *)&mask, size) { |
---|
1593 | 1934 | if ((counts[bit] == 0) && (error[bit] == 0)) |
---|
1594 | 1935 | continue; |
---|
1595 | 1936 | |
---|
.. | .. |
---|
1604 | 1945 | if (error[bit]) { |
---|
1605 | 1946 | perf_log_lost_samples(event, error[bit]); |
---|
1606 | 1947 | |
---|
1607 | | - if (perf_event_account_interrupt(event)) |
---|
| 1948 | + if (iregs && perf_event_account_interrupt(event)) |
---|
1608 | 1949 | x86_pmu_stop(event, 0); |
---|
1609 | 1950 | } |
---|
1610 | 1951 | |
---|
1611 | 1952 | if (counts[bit]) { |
---|
1612 | | - __intel_pmu_pebs_event(event, iregs, base, |
---|
1613 | | - top, bit, counts[bit]); |
---|
| 1953 | + __intel_pmu_pebs_event(event, iregs, data, base, |
---|
| 1954 | + top, bit, counts[bit], |
---|
| 1955 | + setup_pebs_fixed_sample_data); |
---|
1614 | 1956 | } |
---|
| 1957 | + } |
---|
| 1958 | +} |
---|
| 1959 | + |
---|
| 1960 | +static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_data *data) |
---|
| 1961 | +{ |
---|
| 1962 | + short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {}; |
---|
| 1963 | + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
---|
| 1964 | + struct debug_store *ds = cpuc->ds; |
---|
| 1965 | + struct perf_event *event; |
---|
| 1966 | + void *base, *at, *top; |
---|
| 1967 | + int bit, size; |
---|
| 1968 | + u64 mask; |
---|
| 1969 | + |
---|
| 1970 | + if (!x86_pmu.pebs_active) |
---|
| 1971 | + return; |
---|
| 1972 | + |
---|
| 1973 | + base = (struct pebs_basic *)(unsigned long)ds->pebs_buffer_base; |
---|
| 1974 | + top = (struct pebs_basic *)(unsigned long)ds->pebs_index; |
---|
| 1975 | + |
---|
| 1976 | + ds->pebs_index = ds->pebs_buffer_base; |
---|
| 1977 | + |
---|
| 1978 | + mask = ((1ULL << x86_pmu.max_pebs_events) - 1) | |
---|
| 1979 | + (((1ULL << x86_pmu.num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED); |
---|
| 1980 | + size = INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed; |
---|
| 1981 | + |
---|
| 1982 | + if (unlikely(base >= top)) { |
---|
| 1983 | + intel_pmu_pebs_event_update_no_drain(cpuc, size); |
---|
| 1984 | + return; |
---|
| 1985 | + } |
---|
| 1986 | + |
---|
| 1987 | + for (at = base; at < top; at += cpuc->pebs_record_size) { |
---|
| 1988 | + u64 pebs_status; |
---|
| 1989 | + |
---|
| 1990 | + pebs_status = get_pebs_status(at) & cpuc->pebs_enabled; |
---|
| 1991 | + pebs_status &= mask; |
---|
| 1992 | + |
---|
| 1993 | + for_each_set_bit(bit, (unsigned long *)&pebs_status, size) |
---|
| 1994 | + counts[bit]++; |
---|
| 1995 | + } |
---|
| 1996 | + |
---|
| 1997 | + for_each_set_bit(bit, (unsigned long *)&mask, size) { |
---|
| 1998 | + if (counts[bit] == 0) |
---|
| 1999 | + continue; |
---|
| 2000 | + |
---|
| 2001 | + event = cpuc->events[bit]; |
---|
| 2002 | + if (WARN_ON_ONCE(!event)) |
---|
| 2003 | + continue; |
---|
| 2004 | + |
---|
| 2005 | + if (WARN_ON_ONCE(!event->attr.precise_ip)) |
---|
| 2006 | + continue; |
---|
| 2007 | + |
---|
| 2008 | + __intel_pmu_pebs_event(event, iregs, data, base, |
---|
| 2009 | + top, bit, counts[bit], |
---|
| 2010 | + setup_pebs_adaptive_sample_data); |
---|
1615 | 2011 | } |
---|
1616 | 2012 | } |
---|
1617 | 2013 | |
---|
.. | .. |
---|
1630 | 2026 | x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS); |
---|
1631 | 2027 | x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS); |
---|
1632 | 2028 | x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE; |
---|
| 2029 | + if (x86_pmu.version <= 4) |
---|
| 2030 | + x86_pmu.pebs_no_isolation = 1; |
---|
| 2031 | + |
---|
1633 | 2032 | if (x86_pmu.pebs) { |
---|
1634 | 2033 | char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-'; |
---|
| 2034 | + char *pebs_qual = ""; |
---|
1635 | 2035 | int format = x86_pmu.intel_cap.pebs_format; |
---|
| 2036 | + |
---|
| 2037 | + if (format < 4) |
---|
| 2038 | + x86_pmu.intel_cap.pebs_baseline = 0; |
---|
1636 | 2039 | |
---|
1637 | 2040 | switch (format) { |
---|
1638 | 2041 | case 0: |
---|
.. | .. |
---|
1669 | 2072 | x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME; |
---|
1670 | 2073 | break; |
---|
1671 | 2074 | |
---|
| 2075 | + case 4: |
---|
| 2076 | + x86_pmu.drain_pebs = intel_pmu_drain_pebs_icl; |
---|
| 2077 | + x86_pmu.pebs_record_size = sizeof(struct pebs_basic); |
---|
| 2078 | + if (x86_pmu.intel_cap.pebs_baseline) { |
---|
| 2079 | + x86_pmu.large_pebs_flags |= |
---|
| 2080 | + PERF_SAMPLE_BRANCH_STACK | |
---|
| 2081 | + PERF_SAMPLE_TIME; |
---|
| 2082 | + x86_pmu.flags |= PMU_FL_PEBS_ALL; |
---|
| 2083 | + pebs_qual = "-baseline"; |
---|
| 2084 | + x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_EXTENDED_REGS; |
---|
| 2085 | + } else { |
---|
| 2086 | + /* Only basic record supported */ |
---|
| 2087 | + x86_pmu.large_pebs_flags &= |
---|
| 2088 | + ~(PERF_SAMPLE_ADDR | |
---|
| 2089 | + PERF_SAMPLE_TIME | |
---|
| 2090 | + PERF_SAMPLE_DATA_SRC | |
---|
| 2091 | + PERF_SAMPLE_TRANSACTION | |
---|
| 2092 | + PERF_SAMPLE_REGS_USER | |
---|
| 2093 | + PERF_SAMPLE_REGS_INTR); |
---|
| 2094 | + } |
---|
| 2095 | + pr_cont("PEBS fmt4%c%s, ", pebs_type, pebs_qual); |
---|
| 2096 | + |
---|
| 2097 | + if (x86_pmu.intel_cap.pebs_output_pt_available) { |
---|
| 2098 | + pr_cont("PEBS-via-PT, "); |
---|
| 2099 | + x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_AUX_OUTPUT; |
---|
| 2100 | + } |
---|
| 2101 | + |
---|
| 2102 | + break; |
---|
| 2103 | + |
---|
1672 | 2104 | default: |
---|
1673 | 2105 | pr_cont("no PEBS fmt%d%c, ", format, pebs_type); |
---|
1674 | 2106 | x86_pmu.pebs = 0; |
---|