forked from ~ljy/RK356X_SDK_RELEASE

hc
2024-05-13 9d77db3c730780c8ef5ccd4b66403ff5675cfe4e
kernel/arch/x86/events/intel/ds.c
....@@ -7,6 +7,7 @@
77 #include <asm/perf_event.h>
88 #include <asm/tlbflush.h>
99 #include <asm/insn.h>
10
+#include <asm/io.h>
1011
1112 #include "../perf_event.h"
1213
....@@ -337,7 +338,7 @@
337338 struct debug_store *ds = hwev->ds;
338339 size_t bsiz = x86_pmu.pebs_buffer_size;
339340 int max, node = cpu_to_node(cpu);
340
- void *buffer, *ibuffer, *cea;
341
+ void *buffer, *insn_buff, *cea;
341342
342343 if (!x86_pmu.pebs)
343344 return 0;
....@@ -351,12 +352,12 @@
351352 * buffer then.
352353 */
353354 if (x86_pmu.intel_cap.pebs_format < 2) {
354
- ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
355
- if (!ibuffer) {
355
+ insn_buff = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
356
+ if (!insn_buff) {
356357 dsfree_pages(buffer, bsiz);
357358 return -ENOMEM;
358359 }
359
- per_cpu(insn_buffer, cpu) = ibuffer;
360
+ per_cpu(insn_buffer, cpu) = insn_buff;
360361 }
361362 hwev->ds_pebs_vaddr = buffer;
362363 /* Update the cpu entry area mapping */
....@@ -641,8 +642,8 @@
641642 rcu_read_lock();
642643 perf_prepare_sample(&header, &data, event, &regs);
643644
644
- if (perf_output_begin(&handle, event, header.size *
645
- (top - base - skip)))
645
+ if (perf_output_begin(&handle, &data, event,
646
+ header.size * (top - base - skip)))
646647 goto unlock;
647648
648649 for (at = base; at < top; at++) {
....@@ -669,9 +670,9 @@
669670
670671 static inline void intel_pmu_drain_pebs_buffer(void)
671672 {
672
- struct pt_regs regs;
673
+ struct perf_sample_data data;
673674
674
- x86_pmu.drain_pebs(&regs);
675
+ x86_pmu.drain_pebs(NULL, &data);
675676 }
676677
677678 /*
....@@ -849,6 +850,31 @@
849850 EVENT_CONSTRAINT_END
850851 };
851852
853
+struct event_constraint intel_icl_pebs_event_constraints[] = {
854
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */
855
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL), /* SLOTS */
856
+
857
+ INTEL_PLD_CONSTRAINT(0x1cd, 0xff), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
858
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */
859
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */
860
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */
861
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */
862
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */
863
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */
864
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */
865
+
866
+ INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf), /* MEM_LOAD_*_RETIRED.* */
867
+
868
+ INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_INST_RETIRED.* */
869
+
870
+ /*
871
+ * Everything else is handled by PMU_FL_PEBS_ALL, because we
872
+ * need the full constraints from the main table.
873
+ */
874
+
875
+ EVENT_CONSTRAINT_END
876
+};
877
+
852878 struct event_constraint *intel_pebs_constraints(struct perf_event *event)
853879 {
854880 struct event_constraint *c;
....@@ -858,7 +884,7 @@
858884
859885 if (x86_pmu.pebs_constraints) {
860886 for_each_event_constraint(c, x86_pmu.pebs_constraints) {
861
- if ((event->hw.config & c->cmask) == c->code) {
887
+ if (constraint_match(c, event->hw.config)) {
862888 event->hw.flags |= c->flags;
863889 return c;
864890 }
....@@ -882,6 +908,9 @@
882908 */
883909 static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc)
884910 {
911
+ if (cpuc->n_pebs == cpuc->n_pebs_via_pt)
912
+ return false;
913
+
885914 return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs);
886915 }
887916
....@@ -899,6 +928,9 @@
899928 u64 threshold;
900929 int reserved;
901930
931
+ if (cpuc->n_pebs_via_pt)
932
+ return;
933
+
902934 if (x86_pmu.flags & PMU_FL_PEBS_ALL)
903935 reserved = x86_pmu.max_pebs_events + x86_pmu.num_counters_fixed;
904936 else
....@@ -906,17 +938,87 @@
906938
907939 if (cpuc->n_pebs == cpuc->n_large_pebs) {
908940 threshold = ds->pebs_absolute_maximum -
909
- reserved * x86_pmu.pebs_record_size;
941
+ reserved * cpuc->pebs_record_size;
910942 } else {
911
- threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size;
943
+ threshold = ds->pebs_buffer_base + cpuc->pebs_record_size;
912944 }
913945
914946 ds->pebs_interrupt_threshold = threshold;
915947 }
916948
917
-static void
918
-pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc, struct pmu *pmu)
949
+static void adaptive_pebs_record_size_update(void)
919950 {
951
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
952
+ u64 pebs_data_cfg = cpuc->pebs_data_cfg;
953
+ int sz = sizeof(struct pebs_basic);
954
+
955
+ if (pebs_data_cfg & PEBS_DATACFG_MEMINFO)
956
+ sz += sizeof(struct pebs_meminfo);
957
+ if (pebs_data_cfg & PEBS_DATACFG_GP)
958
+ sz += sizeof(struct pebs_gprs);
959
+ if (pebs_data_cfg & PEBS_DATACFG_XMMS)
960
+ sz += sizeof(struct pebs_xmm);
961
+ if (pebs_data_cfg & PEBS_DATACFG_LBRS)
962
+ sz += x86_pmu.lbr_nr * sizeof(struct lbr_entry);
963
+
964
+ cpuc->pebs_record_size = sz;
965
+}
966
+
967
+#define PERF_PEBS_MEMINFO_TYPE (PERF_SAMPLE_ADDR | PERF_SAMPLE_DATA_SRC | \
968
+ PERF_SAMPLE_PHYS_ADDR | PERF_SAMPLE_WEIGHT | \
969
+ PERF_SAMPLE_TRANSACTION)
970
+
971
+static u64 pebs_update_adaptive_cfg(struct perf_event *event)
972
+{
973
+ struct perf_event_attr *attr = &event->attr;
974
+ u64 sample_type = attr->sample_type;
975
+ u64 pebs_data_cfg = 0;
976
+ bool gprs, tsx_weight;
977
+
978
+ if (!(sample_type & ~(PERF_SAMPLE_IP|PERF_SAMPLE_TIME)) &&
979
+ attr->precise_ip > 1)
980
+ return pebs_data_cfg;
981
+
982
+ if (sample_type & PERF_PEBS_MEMINFO_TYPE)
983
+ pebs_data_cfg |= PEBS_DATACFG_MEMINFO;
984
+
985
+ /*
986
+ * We need GPRs when:
987
+ * + user requested them
988
+ * + precise_ip < 2 for the non event IP
989
+ * + For RTM TSX weight we need GPRs for the abort code.
990
+ */
991
+ gprs = (sample_type & PERF_SAMPLE_REGS_INTR) &&
992
+ (attr->sample_regs_intr & PEBS_GP_REGS);
993
+
994
+ tsx_weight = (sample_type & PERF_SAMPLE_WEIGHT) &&
995
+ ((attr->config & INTEL_ARCH_EVENT_MASK) ==
996
+ x86_pmu.rtm_abort_event);
997
+
998
+ if (gprs || (attr->precise_ip < 2) || tsx_weight)
999
+ pebs_data_cfg |= PEBS_DATACFG_GP;
1000
+
1001
+ if ((sample_type & PERF_SAMPLE_REGS_INTR) &&
1002
+ (attr->sample_regs_intr & PERF_REG_EXTENDED_MASK))
1003
+ pebs_data_cfg |= PEBS_DATACFG_XMMS;
1004
+
1005
+ if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
1006
+ /*
1007
+ * For now always log all LBRs. Could configure this
1008
+ * later.
1009
+ */
1010
+ pebs_data_cfg |= PEBS_DATACFG_LBRS |
1011
+ ((x86_pmu.lbr_nr-1) << PEBS_DATACFG_LBR_SHIFT);
1012
+ }
1013
+
1014
+ return pebs_data_cfg;
1015
+}
1016
+
1017
+static void
1018
+pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
1019
+ struct perf_event *event, bool add)
1020
+{
1021
+ struct pmu *pmu = event->ctx->pmu;
9201022 /*
9211023 * Make sure we get updated with the first PEBS
9221024 * event. It will trigger also during removal, but
....@@ -933,6 +1035,29 @@
9331035 update = true;
9341036 }
9351037
1038
+ /*
1039
+ * The PEBS record doesn't shrink on pmu::del(). Doing so would require
1040
+ * iterating all remaining PEBS events to reconstruct the config.
1041
+ */
1042
+ if (x86_pmu.intel_cap.pebs_baseline && add) {
1043
+ u64 pebs_data_cfg;
1044
+
1045
+ /* Clear pebs_data_cfg and pebs_record_size for first PEBS. */
1046
+ if (cpuc->n_pebs == 1) {
1047
+ cpuc->pebs_data_cfg = 0;
1048
+ cpuc->pebs_record_size = sizeof(struct pebs_basic);
1049
+ }
1050
+
1051
+ pebs_data_cfg = pebs_update_adaptive_cfg(event);
1052
+
1053
+ /* Update pebs_record_size if new event requires more data. */
1054
+ if (pebs_data_cfg & ~cpuc->pebs_data_cfg) {
1055
+ cpuc->pebs_data_cfg |= pebs_data_cfg;
1056
+ adaptive_pebs_record_size_update();
1057
+ update = true;
1058
+ }
1059
+ }
1060
+
9361061 if (update)
9371062 pebs_update_threshold(cpuc);
9381063 }
....@@ -946,8 +1071,38 @@
9461071 cpuc->n_pebs++;
9471072 if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)
9481073 cpuc->n_large_pebs++;
1074
+ if (hwc->flags & PERF_X86_EVENT_PEBS_VIA_PT)
1075
+ cpuc->n_pebs_via_pt++;
9491076
950
- pebs_update_state(needed_cb, cpuc, event->ctx->pmu);
1077
+ pebs_update_state(needed_cb, cpuc, event, true);
1078
+}
1079
+
1080
+static void intel_pmu_pebs_via_pt_disable(struct perf_event *event)
1081
+{
1082
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1083
+
1084
+ if (!is_pebs_pt(event))
1085
+ return;
1086
+
1087
+ if (!(cpuc->pebs_enabled & ~PEBS_VIA_PT_MASK))
1088
+ cpuc->pebs_enabled &= ~PEBS_VIA_PT_MASK;
1089
+}
1090
+
1091
+static void intel_pmu_pebs_via_pt_enable(struct perf_event *event)
1092
+{
1093
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1094
+ struct hw_perf_event *hwc = &event->hw;
1095
+ struct debug_store *ds = cpuc->ds;
1096
+
1097
+ if (!is_pebs_pt(event))
1098
+ return;
1099
+
1100
+ if (!(event->hw.flags & PERF_X86_EVENT_LARGE_PEBS))
1101
+ cpuc->pebs_enabled |= PEBS_PMI_AFTER_EACH_RECORD;
1102
+
1103
+ cpuc->pebs_enabled |= PEBS_OUTPUT_PT;
1104
+
1105
+ wrmsrl(MSR_RELOAD_PMC0 + hwc->idx, ds->pebs_event_reset[hwc->idx]);
9511106 }
9521107
9531108 void intel_pmu_pebs_enable(struct perf_event *event)
....@@ -960,10 +1115,18 @@
9601115
9611116 cpuc->pebs_enabled |= 1ULL << hwc->idx;
9621117
963
- if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
1118
+ if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) && (x86_pmu.version < 5))
9641119 cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
9651120 else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
9661121 cpuc->pebs_enabled |= 1ULL << 63;
1122
+
1123
+ if (x86_pmu.intel_cap.pebs_baseline) {
1124
+ hwc->config |= ICL_EVENTSEL_ADAPTIVE;
1125
+ if (cpuc->pebs_data_cfg != cpuc->active_pebs_data_cfg) {
1126
+ wrmsrl(MSR_PEBS_DATA_CFG, cpuc->pebs_data_cfg);
1127
+ cpuc->active_pebs_data_cfg = cpuc->pebs_data_cfg;
1128
+ }
1129
+ }
9671130
9681131 /*
9691132 * Use auto-reload if possible to save a MSR write in the PMI.
....@@ -979,6 +1142,8 @@
9791142 } else {
9801143 ds->pebs_event_reset[hwc->idx] = 0;
9811144 }
1145
+
1146
+ intel_pmu_pebs_via_pt_enable(event);
9821147 }
9831148
9841149 void intel_pmu_pebs_del(struct perf_event *event)
....@@ -990,8 +1155,10 @@
9901155 cpuc->n_pebs--;
9911156 if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)
9921157 cpuc->n_large_pebs--;
1158
+ if (hwc->flags & PERF_X86_EVENT_PEBS_VIA_PT)
1159
+ cpuc->n_pebs_via_pt--;
9931160
994
- pebs_update_state(needed_cb, cpuc, event->ctx->pmu);
1161
+ pebs_update_state(needed_cb, cpuc, event, false);
9951162 }
9961163
9971164 void intel_pmu_pebs_disable(struct perf_event *event)
....@@ -999,15 +1166,19 @@
9991166 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
10001167 struct hw_perf_event *hwc = &event->hw;
10011168
1002
- if (cpuc->n_pebs == cpuc->n_large_pebs)
1169
+ if (cpuc->n_pebs == cpuc->n_large_pebs &&
1170
+ cpuc->n_pebs != cpuc->n_pebs_via_pt)
10031171 intel_pmu_drain_pebs_buffer();
10041172
10051173 cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
10061174
1007
- if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
1175
+ if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) &&
1176
+ (x86_pmu.version < 5))
10081177 cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32));
10091178 else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
10101179 cpuc->pebs_enabled &= ~(1ULL << 63);
1180
+
1181
+ intel_pmu_pebs_via_pt_disable(event);
10111182
10121183 if (cpuc->enabled)
10131184 wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
....@@ -1125,34 +1296,57 @@
11251296 return 0;
11261297 }
11271298
1128
-static inline u64 intel_hsw_weight(struct pebs_record_skl *pebs)
1299
+static inline u64 intel_get_tsx_weight(u64 tsx_tuning)
11291300 {
1130
- if (pebs->tsx_tuning) {
1131
- union hsw_tsx_tuning tsx = { .value = pebs->tsx_tuning };
1301
+ if (tsx_tuning) {
1302
+ union hsw_tsx_tuning tsx = { .value = tsx_tuning };
11321303 return tsx.cycles_last_block;
11331304 }
11341305 return 0;
11351306 }
11361307
1137
-static inline u64 intel_hsw_transaction(struct pebs_record_skl *pebs)
1308
+static inline u64 intel_get_tsx_transaction(u64 tsx_tuning, u64 ax)
11381309 {
1139
- u64 txn = (pebs->tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32;
1310
+ u64 txn = (tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32;
11401311
11411312 /* For RTM XABORTs also log the abort code from AX */
1142
- if ((txn & PERF_TXN_TRANSACTION) && (pebs->ax & 1))
1143
- txn |= ((pebs->ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT;
1313
+ if ((txn & PERF_TXN_TRANSACTION) && (ax & 1))
1314
+ txn |= ((ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT;
11441315 return txn;
11451316 }
11461317
1147
-static void setup_pebs_sample_data(struct perf_event *event,
1148
- struct pt_regs *iregs, void *__pebs,
1149
- struct perf_sample_data *data,
1150
- struct pt_regs *regs)
1318
+static inline u64 get_pebs_status(void *n)
11511319 {
1320
+ if (x86_pmu.intel_cap.pebs_format < 4)
1321
+ return ((struct pebs_record_nhm *)n)->status;
1322
+ return ((struct pebs_basic *)n)->applicable_counters;
1323
+}
1324
+
11521325 #define PERF_X86_EVENT_PEBS_HSW_PREC \
11531326 (PERF_X86_EVENT_PEBS_ST_HSW | \
11541327 PERF_X86_EVENT_PEBS_LD_HSW | \
11551328 PERF_X86_EVENT_PEBS_NA_HSW)
1329
+
1330
+static u64 get_data_src(struct perf_event *event, u64 aux)
1331
+{
1332
+ u64 val = PERF_MEM_NA;
1333
+ int fl = event->hw.flags;
1334
+ bool fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC);
1335
+
1336
+ if (fl & PERF_X86_EVENT_PEBS_LDLAT)
1337
+ val = load_latency_data(aux);
1338
+ else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC))
1339
+ val = precise_datala_hsw(event, aux);
1340
+ else if (fst)
1341
+ val = precise_store_data(aux);
1342
+ return val;
1343
+}
1344
+
1345
+static void setup_pebs_fixed_sample_data(struct perf_event *event,
1346
+ struct pt_regs *iregs, void *__pebs,
1347
+ struct perf_sample_data *data,
1348
+ struct pt_regs *regs)
1349
+{
11561350 /*
11571351 * We cast to the biggest pebs_record but are careful not to
11581352 * unconditionally access the 'extra' entries.
....@@ -1160,17 +1354,13 @@
11601354 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
11611355 struct pebs_record_skl *pebs = __pebs;
11621356 u64 sample_type;
1163
- int fll, fst, dsrc;
1164
- int fl = event->hw.flags;
1357
+ int fll;
11651358
11661359 if (pebs == NULL)
11671360 return;
11681361
11691362 sample_type = event->attr.sample_type;
1170
- dsrc = sample_type & PERF_SAMPLE_DATA_SRC;
1171
-
1172
- fll = fl & PERF_X86_EVENT_PEBS_LDLAT;
1173
- fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC);
1363
+ fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT;
11741364
11751365 perf_sample_data_init(data, 0, event->hw.last_period);
11761366
....@@ -1185,21 +1375,13 @@
11851375 /*
11861376 * data.data_src encodes the data source
11871377 */
1188
- if (dsrc) {
1189
- u64 val = PERF_MEM_NA;
1190
- if (fll)
1191
- val = load_latency_data(pebs->dse);
1192
- else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC))
1193
- val = precise_datala_hsw(event, pebs->dse);
1194
- else if (fst)
1195
- val = precise_store_data(pebs->dse);
1196
- data->data_src.val = val;
1197
- }
1378
+ if (sample_type & PERF_SAMPLE_DATA_SRC)
1379
+ data->data_src.val = get_data_src(event, pebs->dse);
11981380
11991381 /*
12001382 * We must however always use iregs for the unwinder to stay sane; the
12011383 * record BP,SP,IP can point into thin air when the record is from a
1202
- * previous PMI context or an (I)RET happend between the record and
1384
+ * previous PMI context or an (I)RET happened between the record and
12031385 * PMI.
12041386 */
12051387 if (sample_type & PERF_SAMPLE_CALLCHAIN)
....@@ -1281,10 +1463,11 @@
12811463 if (x86_pmu.intel_cap.pebs_format >= 2) {
12821464 /* Only set the TSX weight when no memory weight. */
12831465 if ((sample_type & PERF_SAMPLE_WEIGHT) && !fll)
1284
- data->weight = intel_hsw_weight(pebs);
1466
+ data->weight = intel_get_tsx_weight(pebs->tsx_tuning);
12851467
12861468 if (sample_type & PERF_SAMPLE_TRANSACTION)
1287
- data->txn = intel_hsw_transaction(pebs);
1469
+ data->txn = intel_get_tsx_transaction(pebs->tsx_tuning,
1470
+ pebs->ax);
12881471 }
12891472
12901473 /*
....@@ -1299,6 +1482,140 @@
12991482
13001483 if (has_branch_stack(event))
13011484 data->br_stack = &cpuc->lbr_stack;
1485
+}
1486
+
1487
+static void adaptive_pebs_save_regs(struct pt_regs *regs,
1488
+ struct pebs_gprs *gprs)
1489
+{
1490
+ regs->ax = gprs->ax;
1491
+ regs->bx = gprs->bx;
1492
+ regs->cx = gprs->cx;
1493
+ regs->dx = gprs->dx;
1494
+ regs->si = gprs->si;
1495
+ regs->di = gprs->di;
1496
+ regs->bp = gprs->bp;
1497
+ regs->sp = gprs->sp;
1498
+#ifndef CONFIG_X86_32
1499
+ regs->r8 = gprs->r8;
1500
+ regs->r9 = gprs->r9;
1501
+ regs->r10 = gprs->r10;
1502
+ regs->r11 = gprs->r11;
1503
+ regs->r12 = gprs->r12;
1504
+ regs->r13 = gprs->r13;
1505
+ regs->r14 = gprs->r14;
1506
+ regs->r15 = gprs->r15;
1507
+#endif
1508
+}
1509
+
1510
+/*
1511
+ * With adaptive PEBS the layout depends on what fields are configured.
1512
+ */
1513
+
1514
+static void setup_pebs_adaptive_sample_data(struct perf_event *event,
1515
+ struct pt_regs *iregs, void *__pebs,
1516
+ struct perf_sample_data *data,
1517
+ struct pt_regs *regs)
1518
+{
1519
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1520
+ struct pebs_basic *basic = __pebs;
1521
+ void *next_record = basic + 1;
1522
+ u64 sample_type;
1523
+ u64 format_size;
1524
+ struct pebs_meminfo *meminfo = NULL;
1525
+ struct pebs_gprs *gprs = NULL;
1526
+ struct x86_perf_regs *perf_regs;
1527
+
1528
+ if (basic == NULL)
1529
+ return;
1530
+
1531
+ perf_regs = container_of(regs, struct x86_perf_regs, regs);
1532
+ perf_regs->xmm_regs = NULL;
1533
+
1534
+ sample_type = event->attr.sample_type;
1535
+ format_size = basic->format_size;
1536
+ perf_sample_data_init(data, 0, event->hw.last_period);
1537
+ data->period = event->hw.last_period;
1538
+
1539
+ if (event->attr.use_clockid == 0)
1540
+ data->time = native_sched_clock_from_tsc(basic->tsc);
1541
+
1542
+ /*
1543
+ * We must however always use iregs for the unwinder to stay sane; the
1544
+ * record BP,SP,IP can point into thin air when the record is from a
1545
+ * previous PMI context or an (I)RET happened between the record and
1546
+ * PMI.
1547
+ */
1548
+ if (sample_type & PERF_SAMPLE_CALLCHAIN)
1549
+ data->callchain = perf_callchain(event, iregs);
1550
+
1551
+ *regs = *iregs;
1552
+ /* The ip in basic is EventingIP */
1553
+ set_linear_ip(regs, basic->ip);
1554
+ regs->flags = PERF_EFLAGS_EXACT;
1555
+
1556
+ /*
1557
+ * The record for MEMINFO is in front of GP
1558
+ * But PERF_SAMPLE_TRANSACTION needs gprs->ax.
1559
+ * Save the pointer here but process later.
1560
+ */
1561
+ if (format_size & PEBS_DATACFG_MEMINFO) {
1562
+ meminfo = next_record;
1563
+ next_record = meminfo + 1;
1564
+ }
1565
+
1566
+ if (format_size & PEBS_DATACFG_GP) {
1567
+ gprs = next_record;
1568
+ next_record = gprs + 1;
1569
+
1570
+ if (event->attr.precise_ip < 2) {
1571
+ set_linear_ip(regs, gprs->ip);
1572
+ regs->flags &= ~PERF_EFLAGS_EXACT;
1573
+ }
1574
+
1575
+ if (sample_type & PERF_SAMPLE_REGS_INTR)
1576
+ adaptive_pebs_save_regs(regs, gprs);
1577
+ }
1578
+
1579
+ if (format_size & PEBS_DATACFG_MEMINFO) {
1580
+ if (sample_type & PERF_SAMPLE_WEIGHT)
1581
+ data->weight = meminfo->latency ?:
1582
+ intel_get_tsx_weight(meminfo->tsx_tuning);
1583
+
1584
+ if (sample_type & PERF_SAMPLE_DATA_SRC)
1585
+ data->data_src.val = get_data_src(event, meminfo->aux);
1586
+
1587
+ if (sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR))
1588
+ data->addr = meminfo->address;
1589
+
1590
+ if (sample_type & PERF_SAMPLE_TRANSACTION)
1591
+ data->txn = intel_get_tsx_transaction(meminfo->tsx_tuning,
1592
+ gprs ? gprs->ax : 0);
1593
+ }
1594
+
1595
+ if (format_size & PEBS_DATACFG_XMMS) {
1596
+ struct pebs_xmm *xmm = next_record;
1597
+
1598
+ next_record = xmm + 1;
1599
+ perf_regs->xmm_regs = xmm->xmm;
1600
+ }
1601
+
1602
+ if (format_size & PEBS_DATACFG_LBRS) {
1603
+ struct lbr_entry *lbr = next_record;
1604
+ int num_lbr = ((format_size >> PEBS_DATACFG_LBR_SHIFT)
1605
+ & 0xff) + 1;
1606
+ next_record = next_record + num_lbr * sizeof(struct lbr_entry);
1607
+
1608
+ if (has_branch_stack(event)) {
1609
+ intel_pmu_store_pebs_lbrs(lbr);
1610
+ data->br_stack = &cpuc->lbr_stack;
1611
+ }
1612
+ }
1613
+
1614
+ WARN_ONCE(next_record != __pebs + (format_size >> 48),
1615
+ "PEBS record size %llu, expected %llu, config %llx\n",
1616
+ format_size >> 48,
1617
+ (u64)(next_record - __pebs),
1618
+ basic->format_size);
13021619 }
13031620
13041621 static inline void *
....@@ -1318,19 +1635,19 @@
13181635 if (base == NULL)
13191636 return NULL;
13201637
1321
- for (at = base; at < top; at += x86_pmu.pebs_record_size) {
1322
- struct pebs_record_nhm *p = at;
1638
+ for (at = base; at < top; at += cpuc->pebs_record_size) {
1639
+ unsigned long status = get_pebs_status(at);
13231640
1324
- if (test_bit(bit, (unsigned long *)&p->status)) {
1641
+ if (test_bit(bit, (unsigned long *)&status)) {
13251642 /* PEBS v3 has accurate status bits */
13261643 if (x86_pmu.intel_cap.pebs_format >= 3)
13271644 return at;
13281645
1329
- if (p->status == (1 << bit))
1646
+ if (status == (1 << bit))
13301647 return at;
13311648
13321649 /* clear non-PEBS bit and re-check */
1333
- pebs_status = p->status & cpuc->pebs_enabled;
1650
+ pebs_status = status & cpuc->pebs_enabled;
13341651 pebs_status &= PEBS_COUNTER_MASK;
13351652 if (pebs_status == (1 << bit))
13361653 return at;
....@@ -1409,15 +1726,24 @@
14091726 return 0;
14101727 }
14111728
1412
-static void __intel_pmu_pebs_event(struct perf_event *event,
1413
- struct pt_regs *iregs,
1414
- void *base, void *top,
1415
- int bit, int count)
1729
+static __always_inline void
1730
+__intel_pmu_pebs_event(struct perf_event *event,
1731
+ struct pt_regs *iregs,
1732
+ struct perf_sample_data *data,
1733
+ void *base, void *top,
1734
+ int bit, int count,
1735
+ void (*setup_sample)(struct perf_event *,
1736
+ struct pt_regs *,
1737
+ void *,
1738
+ struct perf_sample_data *,
1739
+ struct pt_regs *))
14161740 {
1741
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
14171742 struct hw_perf_event *hwc = &event->hw;
1418
- struct perf_sample_data data;
1419
- struct pt_regs regs;
1743
+ struct x86_perf_regs perf_regs;
1744
+ struct pt_regs *regs = &perf_regs.regs;
14201745 void *at = get_next_pebs_record_by_bit(base, top, bit);
1746
+ static struct pt_regs dummy_iregs;
14211747
14221748 if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
14231749 /*
....@@ -1430,28 +1756,37 @@
14301756 } else if (!intel_pmu_save_and_restart(event))
14311757 return;
14321758
1759
+ if (!iregs)
1760
+ iregs = &dummy_iregs;
1761
+
14331762 while (count > 1) {
1434
- setup_pebs_sample_data(event, iregs, at, &data, &regs);
1435
- perf_event_output(event, &data, &regs);
1436
- at += x86_pmu.pebs_record_size;
1763
+ setup_sample(event, iregs, at, data, regs);
1764
+ perf_event_output(event, data, regs);
1765
+ at += cpuc->pebs_record_size;
14371766 at = get_next_pebs_record_by_bit(at, top, bit);
14381767 count--;
14391768 }
14401769
1441
- setup_pebs_sample_data(event, iregs, at, &data, &regs);
1442
-
1443
- /*
1444
- * All but the last records are processed.
1445
- * The last one is left to be able to call the overflow handler.
1446
- */
1447
- if (perf_event_overflow(event, &data, &regs)) {
1448
- x86_pmu_stop(event, 0);
1449
- return;
1770
+ setup_sample(event, iregs, at, data, regs);
1771
+ if (iregs == &dummy_iregs) {
1772
+ /*
1773
+ * The PEBS records may be drained in the non-overflow context,
1774
+ * e.g., large PEBS + context switch. Perf should treat the
1775
+ * last record the same as other PEBS records, and doesn't
1776
+ * invoke the generic overflow handler.
1777
+ */
1778
+ perf_event_output(event, data, regs);
1779
+ } else {
1780
+ /*
1781
+ * All but the last records are processed.
1782
+ * The last one is left to be able to call the overflow handler.
1783
+ */
1784
+ if (perf_event_overflow(event, data, regs))
1785
+ x86_pmu_stop(event, 0);
14501786 }
1451
-
14521787 }
14531788
1454
-static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
1789
+static void intel_pmu_drain_pebs_core(struct pt_regs *iregs, struct perf_sample_data *data)
14551790 {
14561791 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
14571792 struct debug_store *ds = cpuc->ds;
....@@ -1485,10 +1820,30 @@
14851820 return;
14861821 }
14871822
1488
- __intel_pmu_pebs_event(event, iregs, at, top, 0, n);
1823
+ __intel_pmu_pebs_event(event, iregs, data, at, top, 0, n,
1824
+ setup_pebs_fixed_sample_data);
14891825 }
14901826
1491
-static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
1827
+static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, int size)
1828
+{
1829
+ struct perf_event *event;
1830
+ int bit;
1831
+
1832
+ /*
1833
+ * The drain_pebs() could be called twice in a short period
1834
+ * for auto-reload event in pmu::read(). There are no
1835
+ * overflows have happened in between.
1836
+ * It needs to call intel_pmu_save_and_restart_reload() to
1837
+ * update the event->count for this case.
1838
+ */
1839
+ for_each_set_bit(bit, (unsigned long *)&cpuc->pebs_enabled, size) {
1840
+ event = cpuc->events[bit];
1841
+ if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
1842
+ intel_pmu_save_and_restart_reload(event, 0);
1843
+ }
1844
+}
1845
+
1846
+static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_data *data)
14921847 {
14931848 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
14941849 struct debug_store *ds = cpuc->ds;
....@@ -1515,19 +1870,7 @@
15151870 }
15161871
15171872 if (unlikely(base >= top)) {
1518
- /*
1519
- * The drain_pebs() could be called twice in a short period
1520
- * for auto-reload event in pmu::read(). There are no
1521
- * overflows have happened in between.
1522
- * It needs to call intel_pmu_save_and_restart_reload() to
1523
- * update the event->count for this case.
1524
- */
1525
- for_each_set_bit(bit, (unsigned long *)&cpuc->pebs_enabled,
1526
- size) {
1527
- event = cpuc->events[bit];
1528
- if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
1529
- intel_pmu_save_and_restart_reload(event, 0);
1530
- }
1873
+ intel_pmu_pebs_event_update_no_drain(cpuc, size);
15311874 return;
15321875 }
15331876
....@@ -1540,8 +1883,7 @@
15401883
15411884 /* PEBS v3 has more accurate status bits */
15421885 if (x86_pmu.intel_cap.pebs_format >= 3) {
1543
- for_each_set_bit(bit, (unsigned long *)&pebs_status,
1544
- size)
1886
+ for_each_set_bit(bit, (unsigned long *)&pebs_status, size)
15451887 counts[bit]++;
15461888
15471889 continue;
....@@ -1579,9 +1921,8 @@
15791921 * that caused the PEBS record. It's called collision.
15801922 * If collision happened, the record will be dropped.
15811923 */
1582
- if (p->status != (1ULL << bit)) {
1583
- for_each_set_bit(i, (unsigned long *)&pebs_status,
1584
- x86_pmu.max_pebs_events)
1924
+ if (pebs_status != (1ULL << bit)) {
1925
+ for_each_set_bit(i, (unsigned long *)&pebs_status, size)
15851926 error[i]++;
15861927 continue;
15871928 }
....@@ -1589,7 +1930,7 @@
15891930 counts[bit]++;
15901931 }
15911932
1592
- for (bit = 0; bit < size; bit++) {
1933
+ for_each_set_bit(bit, (unsigned long *)&mask, size) {
15931934 if ((counts[bit] == 0) && (error[bit] == 0))
15941935 continue;
15951936
....@@ -1604,14 +1945,69 @@
16041945 if (error[bit]) {
16051946 perf_log_lost_samples(event, error[bit]);
16061947
1607
- if (perf_event_account_interrupt(event))
1948
+ if (iregs && perf_event_account_interrupt(event))
16081949 x86_pmu_stop(event, 0);
16091950 }
16101951
16111952 if (counts[bit]) {
1612
- __intel_pmu_pebs_event(event, iregs, base,
1613
- top, bit, counts[bit]);
1953
+ __intel_pmu_pebs_event(event, iregs, data, base,
1954
+ top, bit, counts[bit],
1955
+ setup_pebs_fixed_sample_data);
16141956 }
1957
+ }
1958
+}
1959
+
1960
+static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_data *data)
1961
+{
1962
+ short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
1963
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1964
+ struct debug_store *ds = cpuc->ds;
1965
+ struct perf_event *event;
1966
+ void *base, *at, *top;
1967
+ int bit, size;
1968
+ u64 mask;
1969
+
1970
+ if (!x86_pmu.pebs_active)
1971
+ return;
1972
+
1973
+ base = (struct pebs_basic *)(unsigned long)ds->pebs_buffer_base;
1974
+ top = (struct pebs_basic *)(unsigned long)ds->pebs_index;
1975
+
1976
+ ds->pebs_index = ds->pebs_buffer_base;
1977
+
1978
+ mask = ((1ULL << x86_pmu.max_pebs_events) - 1) |
1979
+ (((1ULL << x86_pmu.num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED);
1980
+ size = INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed;
1981
+
1982
+ if (unlikely(base >= top)) {
1983
+ intel_pmu_pebs_event_update_no_drain(cpuc, size);
1984
+ return;
1985
+ }
1986
+
1987
+ for (at = base; at < top; at += cpuc->pebs_record_size) {
1988
+ u64 pebs_status;
1989
+
1990
+ pebs_status = get_pebs_status(at) & cpuc->pebs_enabled;
1991
+ pebs_status &= mask;
1992
+
1993
+ for_each_set_bit(bit, (unsigned long *)&pebs_status, size)
1994
+ counts[bit]++;
1995
+ }
1996
+
1997
+ for_each_set_bit(bit, (unsigned long *)&mask, size) {
1998
+ if (counts[bit] == 0)
1999
+ continue;
2000
+
2001
+ event = cpuc->events[bit];
2002
+ if (WARN_ON_ONCE(!event))
2003
+ continue;
2004
+
2005
+ if (WARN_ON_ONCE(!event->attr.precise_ip))
2006
+ continue;
2007
+
2008
+ __intel_pmu_pebs_event(event, iregs, data, base,
2009
+ top, bit, counts[bit],
2010
+ setup_pebs_adaptive_sample_data);
16152011 }
16162012 }
16172013
....@@ -1630,9 +2026,16 @@
16302026 x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS);
16312027 x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
16322028 x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
2029
+ if (x86_pmu.version <= 4)
2030
+ x86_pmu.pebs_no_isolation = 1;
2031
+
16332032 if (x86_pmu.pebs) {
16342033 char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-';
2034
+ char *pebs_qual = "";
16352035 int format = x86_pmu.intel_cap.pebs_format;
2036
+
2037
+ if (format < 4)
2038
+ x86_pmu.intel_cap.pebs_baseline = 0;
16362039
16372040 switch (format) {
16382041 case 0:
....@@ -1669,6 +2072,35 @@
16692072 x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME;
16702073 break;
16712074
2075
+ case 4:
2076
+ x86_pmu.drain_pebs = intel_pmu_drain_pebs_icl;
2077
+ x86_pmu.pebs_record_size = sizeof(struct pebs_basic);
2078
+ if (x86_pmu.intel_cap.pebs_baseline) {
2079
+ x86_pmu.large_pebs_flags |=
2080
+ PERF_SAMPLE_BRANCH_STACK |
2081
+ PERF_SAMPLE_TIME;
2082
+ x86_pmu.flags |= PMU_FL_PEBS_ALL;
2083
+ pebs_qual = "-baseline";
2084
+ x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_EXTENDED_REGS;
2085
+ } else {
2086
+ /* Only basic record supported */
2087
+ x86_pmu.large_pebs_flags &=
2088
+ ~(PERF_SAMPLE_ADDR |
2089
+ PERF_SAMPLE_TIME |
2090
+ PERF_SAMPLE_DATA_SRC |
2091
+ PERF_SAMPLE_TRANSACTION |
2092
+ PERF_SAMPLE_REGS_USER |
2093
+ PERF_SAMPLE_REGS_INTR);
2094
+ }
2095
+ pr_cont("PEBS fmt4%c%s, ", pebs_type, pebs_qual);
2096
+
2097
+ if (x86_pmu.intel_cap.pebs_output_pt_available) {
2098
+ pr_cont("PEBS-via-PT, ");
2099
+ x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_AUX_OUTPUT;
2100
+ }
2101
+
2102
+ break;
2103
+
16722104 default:
16732105 pr_cont("no PEBS fmt%d%c, ", format, pebs_type);
16742106 x86_pmu.pebs = 0;