forked from ~ljy/RK356X_SDK_RELEASE

hc
2023-12-11 072de836f53be56a70cecf70b43ae43b7ce17376
kernel/arch/powerpc/perf/imc-pmu.c
....@@ -1,14 +1,10 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 * In-Memory Collection (IMC) Performance Monitor counter support.
34 *
45 * Copyright (C) 2017 Madhavan Srinivasan, IBM Corporation.
56 * (C) 2017 Anju T Sudhakar, IBM Corporation.
67 * (C) 2017 Hemant K Shaw, IBM Corporation.
7
- *
8
- * This program is free software; you can redistribute it and/or
9
- * modify it under the terms of the GNU General Public License
10
- * as published by the Free Software Foundation; either version
11
- * 2 of the License, or later version.
128 */
139 #include <linux/perf_event.h>
1410 #include <linux/slab.h>
....@@ -28,13 +24,13 @@
2824 static DEFINE_PER_CPU(struct imc_pmu_ref *, local_nest_imc_refc);
2925 static struct imc_pmu **per_nest_pmu_arr;
3026 static cpumask_t nest_imc_cpumask;
31
-struct imc_pmu_ref *nest_imc_refc;
27
+static struct imc_pmu_ref *nest_imc_refc;
3228 static int nest_pmus;
3329
3430 /* Core IMC data structures and variables */
3531
3632 static cpumask_t core_imc_cpumask;
37
-struct imc_pmu_ref *core_imc_refc;
33
+static struct imc_pmu_ref *core_imc_refc;
3834 static struct imc_pmu *core_imc_pmu;
3935
4036 /* Thread IMC data structures and variables */
....@@ -43,12 +39,27 @@
4339 static struct imc_pmu *thread_imc_pmu;
4440 static int thread_imc_mem_size;
4541
46
-struct imc_pmu *imc_event_to_pmu(struct perf_event *event)
42
+/* Trace IMC data structures */
43
+static DEFINE_PER_CPU(u64 *, trace_imc_mem);
44
+static struct imc_pmu_ref *trace_imc_refc;
45
+static int trace_imc_mem_size;
46
+
47
+/*
48
+ * Global data structure used to avoid races between thread,
49
+ * core and trace-imc
50
+ */
51
+static struct imc_pmu_ref imc_global_refc = {
52
+ .lock = __MUTEX_INITIALIZER(imc_global_refc.lock),
53
+ .id = 0,
54
+ .refc = 0,
55
+};
56
+
57
+static struct imc_pmu *imc_event_to_pmu(struct perf_event *event)
4758 {
4859 return container_of(event->pmu, struct imc_pmu, pmu);
4960 }
5061
51
-PMU_FORMAT_ATTR(event, "config:0-40");
62
+PMU_FORMAT_ATTR(event, "config:0-61");
5263 PMU_FORMAT_ATTR(offset, "config:0-31");
5364 PMU_FORMAT_ATTR(rvalue, "config:32");
5465 PMU_FORMAT_ATTR(mode, "config:33-40");
....@@ -63,6 +74,25 @@
6374 static struct attribute_group imc_format_group = {
6475 .name = "format",
6576 .attrs = imc_format_attrs,
77
+};
78
+
79
+/* Format attribute for imc trace-mode */
80
+PMU_FORMAT_ATTR(cpmc_reserved, "config:0-19");
81
+PMU_FORMAT_ATTR(cpmc_event, "config:20-27");
82
+PMU_FORMAT_ATTR(cpmc_samplesel, "config:28-29");
83
+PMU_FORMAT_ATTR(cpmc_load, "config:30-61");
84
+static struct attribute *trace_imc_format_attrs[] = {
85
+ &format_attr_event.attr,
86
+ &format_attr_cpmc_reserved.attr,
87
+ &format_attr_cpmc_event.attr,
88
+ &format_attr_cpmc_samplesel.attr,
89
+ &format_attr_cpmc_load.attr,
90
+ NULL,
91
+};
92
+
93
+static struct attribute_group trace_imc_format_group = {
94
+.name = "format",
95
+.attrs = trace_imc_format_attrs,
6696 };
6797
6898 /* Get the cpumask printed to a buffer "buf" */
....@@ -342,7 +372,14 @@
342372 */
343373 nid = cpu_to_node(cpu);
344374 l_cpumask = cpumask_of_node(nid);
345
- target = cpumask_any_but(l_cpumask, cpu);
375
+ target = cpumask_last(l_cpumask);
376
+
377
+ /*
378
+ * If this(target) is the last cpu in the cpumask for this chip,
379
+ * check for any possible online cpu in the chip.
380
+ */
381
+ if (unlikely(target == cpu))
382
+ target = cpumask_any_but(l_cpumask, cpu);
346383
347384 /*
348385 * Update the cpumask with the target cpu and
....@@ -473,15 +510,6 @@
473510 if (event->hw.sample_period)
474511 return -EINVAL;
475512
476
- /* unsupported modes and filters */
477
- if (event->attr.exclude_user ||
478
- event->attr.exclude_kernel ||
479
- event->attr.exclude_hv ||
480
- event->attr.exclude_idle ||
481
- event->attr.exclude_host ||
482
- event->attr.exclude_guest)
483
- return -EINVAL;
484
-
485513 if (event->cpu < 0)
486514 return -EINVAL;
487515
....@@ -559,6 +587,7 @@
559587 {
560588 int nid, rc = 0, core_id = (cpu / threads_per_core);
561589 struct imc_mem_info *mem_info;
590
+ struct page *page;
562591
563592 /*
564593 * alloc_pages_node() will allocate memory for core in the
....@@ -569,11 +598,12 @@
569598 mem_info->id = core_id;
570599
571600 /* We need only vbase for core counters */
572
- mem_info->vbase = page_address(alloc_pages_node(nid,
573
- GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE |
574
- __GFP_NOWARN, get_order(size)));
575
- if (!mem_info->vbase)
601
+ page = alloc_pages_node(nid,
602
+ GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE |
603
+ __GFP_NOWARN, get_order(size));
604
+ if (!page)
576605 return -ENOMEM;
606
+ mem_info->vbase = page_address(page);
577607
578608 /* Init the mutex */
579609 core_imc_refc[core_id].id = core_id;
....@@ -656,7 +686,10 @@
656686 return 0;
657687
658688 /* Find any online cpu in that core except the current "cpu" */
659
- ncpu = cpumask_any_but(cpu_sibling_mask(cpu), cpu);
689
+ ncpu = cpumask_last(cpu_sibling_mask(cpu));
690
+
691
+ if (unlikely(ncpu == cpu))
692
+ ncpu = cpumask_any_but(cpu_sibling_mask(cpu), cpu);
660693
661694 if (ncpu >= 0 && ncpu < nr_cpu_ids) {
662695 cpumask_set_cpu(ncpu, &core_imc_cpumask);
....@@ -675,6 +708,16 @@
675708 return -EINVAL;
676709
677710 ref->refc = 0;
711
+ /*
712
+ * Reduce the global reference count, if this is the
713
+ * last cpu in this core and core-imc event running
714
+ * in this cpu.
715
+ */
716
+ mutex_lock(&imc_global_refc.lock);
717
+ if (imc_global_refc.id == IMC_DOMAIN_CORE)
718
+ imc_global_refc.refc--;
719
+
720
+ mutex_unlock(&imc_global_refc.lock);
678721 }
679722 return 0;
680723 }
....@@ -685,6 +728,23 @@
685728 "perf/powerpc/imc_core:online",
686729 ppc_core_imc_cpu_online,
687730 ppc_core_imc_cpu_offline);
731
+}
732
+
733
+static void reset_global_refc(struct perf_event *event)
734
+{
735
+ mutex_lock(&imc_global_refc.lock);
736
+ imc_global_refc.refc--;
737
+
738
+ /*
739
+ * If no other thread is running any
740
+ * event for this domain(thread/core/trace),
741
+ * set the global id to zero.
742
+ */
743
+ if (imc_global_refc.refc <= 0) {
744
+ imc_global_refc.refc = 0;
745
+ imc_global_refc.id = 0;
746
+ }
747
+ mutex_unlock(&imc_global_refc.lock);
688748 }
689749
690750 static void core_imc_counters_release(struct perf_event *event)
....@@ -736,6 +796,8 @@
736796 ref->refc = 0;
737797 }
738798 mutex_unlock(&ref->lock);
799
+
800
+ reset_global_refc(event);
739801 }
740802
741803 static int core_imc_event_init(struct perf_event *event)
....@@ -751,15 +813,6 @@
751813
752814 /* Sampling not supported */
753815 if (event->hw.sample_period)
754
- return -EINVAL;
755
-
756
- /* unsupported modes and filters */
757
- if (event->attr.exclude_user ||
758
- event->attr.exclude_kernel ||
759
- event->attr.exclude_hv ||
760
- event->attr.exclude_idle ||
761
- event->attr.exclude_host ||
762
- event->attr.exclude_guest)
763816 return -EINVAL;
764817
765818 if (event->cpu < 0)
....@@ -805,14 +858,40 @@
805858 ++ref->refc;
806859 mutex_unlock(&ref->lock);
807860
861
+ /*
862
+ * Since the system can run either in accumulation or trace-mode
863
+ * of IMC at a time, core-imc events are allowed only if no other
864
+ * trace/thread imc events are enabled/monitored.
865
+ *
866
+ * Take the global lock, and check the refc.id
867
+ * to know whether any other trace/thread imc
868
+ * events are running.
869
+ */
870
+ mutex_lock(&imc_global_refc.lock);
871
+ if (imc_global_refc.id == 0 || imc_global_refc.id == IMC_DOMAIN_CORE) {
872
+ /*
873
+ * No other trace/thread imc events are running in
874
+ * the system, so set the refc.id to core-imc.
875
+ */
876
+ imc_global_refc.id = IMC_DOMAIN_CORE;
877
+ imc_global_refc.refc++;
878
+ } else {
879
+ mutex_unlock(&imc_global_refc.lock);
880
+ return -EBUSY;
881
+ }
882
+ mutex_unlock(&imc_global_refc.lock);
883
+
808884 event->hw.event_base = (u64)pcmi->vbase + (config & IMC_EVENT_OFFSET_MASK);
809885 event->destroy = core_imc_counters_release;
810886 return 0;
811887 }
812888
813889 /*
814
- * Allocates a page of memory for each of the online cpus, and write the
815
- * physical base address of that page to the LDBAR for that cpu.
890
+ * Allocates a page of memory for each of the online cpus, and load
891
+ * LDBAR with 0.
892
+ * The physical base address of the page allocated for a cpu will be
893
+ * written to the LDBAR for that cpu, when the thread-imc event
894
+ * is added.
816895 *
817896 * LDBAR Register Layout:
818897 *
....@@ -830,26 +909,26 @@
830909 */
831910 static int thread_imc_mem_alloc(int cpu_id, int size)
832911 {
833
- u64 ldbar_value, *local_mem = per_cpu(thread_imc_mem, cpu_id);
912
+ u64 *local_mem = per_cpu(thread_imc_mem, cpu_id);
834913 int nid = cpu_to_node(cpu_id);
835914
836915 if (!local_mem) {
916
+ struct page *page;
837917 /*
838918 * This case could happen only once at start, since we dont
839919 * free the memory in cpu offline path.
840920 */
841
- local_mem = page_address(alloc_pages_node(nid,
921
+ page = alloc_pages_node(nid,
842922 GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE |
843
- __GFP_NOWARN, get_order(size)));
844
- if (!local_mem)
923
+ __GFP_NOWARN, get_order(size));
924
+ if (!page)
845925 return -ENOMEM;
926
+ local_mem = page_address(page);
846927
847928 per_cpu(thread_imc_mem, cpu_id) = local_mem;
848929 }
849930
850
- ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) | THREAD_IMC_ENABLE;
851
-
852
- mtspr(SPRN_LDBAR, ldbar_value);
931
+ mtspr(SPRN_LDBAR, 0);
853932 return 0;
854933 }
855934
....@@ -860,7 +939,23 @@
860939
861940 static int ppc_thread_imc_cpu_offline(unsigned int cpu)
862941 {
863
- mtspr(SPRN_LDBAR, 0);
942
+ /*
943
+ * Set the bit 0 of LDBAR to zero.
944
+ *
945
+ * If bit 0 of LDBAR is unset, it will stop posting
946
+ * the counter data to memory.
947
+ * For thread-imc, bit 0 of LDBAR will be set to 1 in the
948
+ * event_add function. So reset this bit here, to stop the updates
949
+ * to memory in the cpu_offline path.
950
+ */
951
+ mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) & (~(1UL << 63))));
952
+
953
+ /* Reduce the refc if thread-imc event running on this cpu */
954
+ mutex_lock(&imc_global_refc.lock);
955
+ if (imc_global_refc.id == IMC_DOMAIN_THREAD)
956
+ imc_global_refc.refc--;
957
+ mutex_unlock(&imc_global_refc.lock);
958
+
864959 return 0;
865960 }
866961
....@@ -881,6 +976,9 @@
881976 if (event->attr.type != event->pmu->type)
882977 return -ENOENT;
883978
979
+ if (!perfmon_capable())
980
+ return -EACCES;
981
+
884982 /* Sampling not supported */
885983 if (event->hw.sample_period)
886984 return -EINVAL;
....@@ -896,7 +994,22 @@
896994 if (!target)
897995 return -EINVAL;
898996
997
+ mutex_lock(&imc_global_refc.lock);
998
+ /*
999
+ * Check if any other trace/core imc events are running in the
1000
+ * system, if not set the global id to thread-imc.
1001
+ */
1002
+ if (imc_global_refc.id == 0 || imc_global_refc.id == IMC_DOMAIN_THREAD) {
1003
+ imc_global_refc.id = IMC_DOMAIN_THREAD;
1004
+ imc_global_refc.refc++;
1005
+ } else {
1006
+ mutex_unlock(&imc_global_refc.lock);
1007
+ return -EBUSY;
1008
+ }
1009
+ mutex_unlock(&imc_global_refc.lock);
1010
+
8991011 event->pmu->task_ctx_nr = perf_sw_context;
1012
+ event->destroy = reset_global_refc;
9001013 return 0;
9011014 }
9021015
....@@ -1000,6 +1113,7 @@
10001113 {
10011114 int core_id;
10021115 struct imc_pmu_ref *ref;
1116
+ u64 ldbar_value, *local_mem = per_cpu(thread_imc_mem, smp_processor_id());
10031117
10041118 if (flags & PERF_EF_START)
10051119 imc_event_start(event, flags);
....@@ -1008,6 +1122,9 @@
10081122 return -EINVAL;
10091123
10101124 core_id = smp_processor_id() / threads_per_core;
1125
+ ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) | THREAD_IMC_ENABLE;
1126
+ mtspr(SPRN_LDBAR, ldbar_value);
1127
+
10111128 /*
10121129 * imc pmus are enabled only when it is used.
10131130 * See if this is triggered for the first time.
....@@ -1039,14 +1156,12 @@
10391156 int core_id;
10401157 struct imc_pmu_ref *ref;
10411158
1042
- /*
1043
- * Take a snapshot and calculate the delta and update
1044
- * the event counter values.
1045
- */
1046
- imc_event_update(event);
1047
-
10481159 core_id = smp_processor_id() / threads_per_core;
10491160 ref = &core_imc_refc[core_id];
1161
+ if (!ref) {
1162
+ pr_debug("imc: Failed to get event reference count\n");
1163
+ return;
1164
+ }
10501165
10511166 mutex_lock(&ref->lock);
10521167 ref->refc--;
....@@ -1062,6 +1177,293 @@
10621177 ref->refc = 0;
10631178 }
10641179 mutex_unlock(&ref->lock);
1180
+
1181
+ /* Set bit 0 of LDBAR to zero, to stop posting updates to memory */
1182
+ mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) & (~(1UL << 63))));
1183
+
1184
+ /*
1185
+ * Take a snapshot and calculate the delta and update
1186
+ * the event counter values.
1187
+ */
1188
+ imc_event_update(event);
1189
+}
1190
+
1191
+/*
1192
+ * Allocate a page of memory for each cpu, and load LDBAR with 0.
1193
+ */
1194
+static int trace_imc_mem_alloc(int cpu_id, int size)
1195
+{
1196
+ u64 *local_mem = per_cpu(trace_imc_mem, cpu_id);
1197
+ int phys_id = cpu_to_node(cpu_id), rc = 0;
1198
+ int core_id = (cpu_id / threads_per_core);
1199
+
1200
+ if (!local_mem) {
1201
+ struct page *page;
1202
+
1203
+ page = alloc_pages_node(phys_id,
1204
+ GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE |
1205
+ __GFP_NOWARN, get_order(size));
1206
+ if (!page)
1207
+ return -ENOMEM;
1208
+ local_mem = page_address(page);
1209
+ per_cpu(trace_imc_mem, cpu_id) = local_mem;
1210
+
1211
+ /* Initialise the counters for trace mode */
1212
+ rc = opal_imc_counters_init(OPAL_IMC_COUNTERS_TRACE, __pa((void *)local_mem),
1213
+ get_hard_smp_processor_id(cpu_id));
1214
+ if (rc) {
1215
+ pr_info("IMC:opal init failed for trace imc\n");
1216
+ return rc;
1217
+ }
1218
+ }
1219
+
1220
+ /* Init the mutex, if not already */
1221
+ trace_imc_refc[core_id].id = core_id;
1222
+ mutex_init(&trace_imc_refc[core_id].lock);
1223
+
1224
+ mtspr(SPRN_LDBAR, 0);
1225
+ return 0;
1226
+}
1227
+
1228
+static int ppc_trace_imc_cpu_online(unsigned int cpu)
1229
+{
1230
+ return trace_imc_mem_alloc(cpu, trace_imc_mem_size);
1231
+}
1232
+
1233
+static int ppc_trace_imc_cpu_offline(unsigned int cpu)
1234
+{
1235
+ /*
1236
+ * No need to set bit 0 of LDBAR to zero, as
1237
+ * it is set to zero for imc trace-mode
1238
+ *
1239
+ * Reduce the refc if any trace-imc event running
1240
+ * on this cpu.
1241
+ */
1242
+ mutex_lock(&imc_global_refc.lock);
1243
+ if (imc_global_refc.id == IMC_DOMAIN_TRACE)
1244
+ imc_global_refc.refc--;
1245
+ mutex_unlock(&imc_global_refc.lock);
1246
+
1247
+ return 0;
1248
+}
1249
+
1250
+static int trace_imc_cpu_init(void)
1251
+{
1252
+ return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_TRACE_IMC_ONLINE,
1253
+ "perf/powerpc/imc_trace:online",
1254
+ ppc_trace_imc_cpu_online,
1255
+ ppc_trace_imc_cpu_offline);
1256
+}
1257
+
1258
+static u64 get_trace_imc_event_base_addr(void)
1259
+{
1260
+ return (u64)per_cpu(trace_imc_mem, smp_processor_id());
1261
+}
1262
+
1263
+/*
1264
+ * Function to parse trace-imc data obtained
1265
+ * and to prepare the perf sample.
1266
+ */
1267
+static int trace_imc_prepare_sample(struct trace_imc_data *mem,
1268
+ struct perf_sample_data *data,
1269
+ u64 *prev_tb,
1270
+ struct perf_event_header *header,
1271
+ struct perf_event *event)
1272
+{
1273
+ /* Sanity checks for a valid record */
1274
+ if (be64_to_cpu(READ_ONCE(mem->tb1)) > *prev_tb)
1275
+ *prev_tb = be64_to_cpu(READ_ONCE(mem->tb1));
1276
+ else
1277
+ return -EINVAL;
1278
+
1279
+ if ((be64_to_cpu(READ_ONCE(mem->tb1)) & IMC_TRACE_RECORD_TB1_MASK) !=
1280
+ be64_to_cpu(READ_ONCE(mem->tb2)))
1281
+ return -EINVAL;
1282
+
1283
+ /* Prepare perf sample */
1284
+ data->ip = be64_to_cpu(READ_ONCE(mem->ip));
1285
+ data->period = event->hw.last_period;
1286
+
1287
+ header->type = PERF_RECORD_SAMPLE;
1288
+ header->size = sizeof(*header) + event->header_size;
1289
+ header->misc = 0;
1290
+
1291
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
1292
+ switch (IMC_TRACE_RECORD_VAL_HVPR(be64_to_cpu(READ_ONCE(mem->val)))) {
1293
+ case 0:/* when MSR HV and PR not set in the trace-record */
1294
+ header->misc |= PERF_RECORD_MISC_GUEST_KERNEL;
1295
+ break;
1296
+ case 1: /* MSR HV is 0 and PR is 1 */
1297
+ header->misc |= PERF_RECORD_MISC_GUEST_USER;
1298
+ break;
1299
+ case 2: /* MSR HV is 1 and PR is 0 */
1300
+ header->misc |= PERF_RECORD_MISC_KERNEL;
1301
+ break;
1302
+ case 3: /* MSR HV is 1 and PR is 1 */
1303
+ header->misc |= PERF_RECORD_MISC_USER;
1304
+ break;
1305
+ default:
1306
+ pr_info("IMC: Unable to set the flag based on MSR bits\n");
1307
+ break;
1308
+ }
1309
+ } else {
1310
+ if (is_kernel_addr(data->ip))
1311
+ header->misc |= PERF_RECORD_MISC_KERNEL;
1312
+ else
1313
+ header->misc |= PERF_RECORD_MISC_USER;
1314
+ }
1315
+ perf_event_header__init_id(header, data, event);
1316
+
1317
+ return 0;
1318
+}
1319
+
1320
+static void dump_trace_imc_data(struct perf_event *event)
1321
+{
1322
+ struct trace_imc_data *mem;
1323
+ int i, ret;
1324
+ u64 prev_tb = 0;
1325
+
1326
+ mem = (struct trace_imc_data *)get_trace_imc_event_base_addr();
1327
+ for (i = 0; i < (trace_imc_mem_size / sizeof(struct trace_imc_data));
1328
+ i++, mem++) {
1329
+ struct perf_sample_data data;
1330
+ struct perf_event_header header;
1331
+
1332
+ ret = trace_imc_prepare_sample(mem, &data, &prev_tb, &header, event);
1333
+ if (ret) /* Exit, if not a valid record */
1334
+ break;
1335
+ else {
1336
+ /* If this is a valid record, create the sample */
1337
+ struct perf_output_handle handle;
1338
+
1339
+ if (perf_output_begin(&handle, &data, event, header.size))
1340
+ return;
1341
+
1342
+ perf_output_sample(&handle, &header, &data, event);
1343
+ perf_output_end(&handle);
1344
+ }
1345
+ }
1346
+}
1347
+
1348
+static int trace_imc_event_add(struct perf_event *event, int flags)
1349
+{
1350
+ int core_id = smp_processor_id() / threads_per_core;
1351
+ struct imc_pmu_ref *ref = NULL;
1352
+ u64 local_mem, ldbar_value;
1353
+
1354
+ /* Set trace-imc bit in ldbar and load ldbar with per-thread memory address */
1355
+ local_mem = get_trace_imc_event_base_addr();
1356
+ ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) | TRACE_IMC_ENABLE;
1357
+
1358
+ /* trace-imc reference count */
1359
+ if (trace_imc_refc)
1360
+ ref = &trace_imc_refc[core_id];
1361
+ if (!ref) {
1362
+ pr_debug("imc: Failed to get the event reference count\n");
1363
+ return -EINVAL;
1364
+ }
1365
+
1366
+ mtspr(SPRN_LDBAR, ldbar_value);
1367
+ mutex_lock(&ref->lock);
1368
+ if (ref->refc == 0) {
1369
+ if (opal_imc_counters_start(OPAL_IMC_COUNTERS_TRACE,
1370
+ get_hard_smp_processor_id(smp_processor_id()))) {
1371
+ mutex_unlock(&ref->lock);
1372
+ pr_err("trace-imc: Unable to start the counters for core %d\n", core_id);
1373
+ return -EINVAL;
1374
+ }
1375
+ }
1376
+ ++ref->refc;
1377
+ mutex_unlock(&ref->lock);
1378
+ return 0;
1379
+}
1380
+
1381
+static void trace_imc_event_read(struct perf_event *event)
1382
+{
1383
+ return;
1384
+}
1385
+
1386
+static void trace_imc_event_stop(struct perf_event *event, int flags)
1387
+{
1388
+ u64 local_mem = get_trace_imc_event_base_addr();
1389
+ dump_trace_imc_data(event);
1390
+ memset((void *)local_mem, 0, sizeof(u64));
1391
+}
1392
+
1393
+static void trace_imc_event_start(struct perf_event *event, int flags)
1394
+{
1395
+ return;
1396
+}
1397
+
1398
+static void trace_imc_event_del(struct perf_event *event, int flags)
1399
+{
1400
+ int core_id = smp_processor_id() / threads_per_core;
1401
+ struct imc_pmu_ref *ref = NULL;
1402
+
1403
+ if (trace_imc_refc)
1404
+ ref = &trace_imc_refc[core_id];
1405
+ if (!ref) {
1406
+ pr_debug("imc: Failed to get event reference count\n");
1407
+ return;
1408
+ }
1409
+
1410
+ mutex_lock(&ref->lock);
1411
+ ref->refc--;
1412
+ if (ref->refc == 0) {
1413
+ if (opal_imc_counters_stop(OPAL_IMC_COUNTERS_TRACE,
1414
+ get_hard_smp_processor_id(smp_processor_id()))) {
1415
+ mutex_unlock(&ref->lock);
1416
+ pr_err("trace-imc: Unable to stop the counters for core %d\n", core_id);
1417
+ return;
1418
+ }
1419
+ } else if (ref->refc < 0) {
1420
+ ref->refc = 0;
1421
+ }
1422
+ mutex_unlock(&ref->lock);
1423
+
1424
+ trace_imc_event_stop(event, flags);
1425
+}
1426
+
1427
+static int trace_imc_event_init(struct perf_event *event)
1428
+{
1429
+ if (event->attr.type != event->pmu->type)
1430
+ return -ENOENT;
1431
+
1432
+ if (!perfmon_capable())
1433
+ return -EACCES;
1434
+
1435
+ /* Return if this is a couting event */
1436
+ if (event->attr.sample_period == 0)
1437
+ return -ENOENT;
1438
+
1439
+ /*
1440
+ * Take the global lock, and make sure
1441
+ * no other thread is running any core/thread imc
1442
+ * events
1443
+ */
1444
+ mutex_lock(&imc_global_refc.lock);
1445
+ if (imc_global_refc.id == 0 || imc_global_refc.id == IMC_DOMAIN_TRACE) {
1446
+ /*
1447
+ * No core/thread imc events are running in the
1448
+ * system, so set the refc.id to trace-imc.
1449
+ */
1450
+ imc_global_refc.id = IMC_DOMAIN_TRACE;
1451
+ imc_global_refc.refc++;
1452
+ } else {
1453
+ mutex_unlock(&imc_global_refc.lock);
1454
+ return -EBUSY;
1455
+ }
1456
+ mutex_unlock(&imc_global_refc.lock);
1457
+
1458
+ event->hw.idx = -1;
1459
+
1460
+ /*
1461
+ * There can only be a single PMU for perf_hw_context events which is assigned to
1462
+ * core PMU. Hence use "perf_sw_context" for trace_imc.
1463
+ */
1464
+ event->pmu->task_ctx_nr = perf_sw_context;
1465
+ event->destroy = reset_global_refc;
1466
+ return 0;
10651467 }
10661468
10671469 /* update_pmu_ops : Populate the appropriate operations for "pmu" */
....@@ -1074,6 +1476,7 @@
10741476 pmu->pmu.stop = imc_event_stop;
10751477 pmu->pmu.read = imc_event_update;
10761478 pmu->pmu.attr_groups = pmu->attr_groups;
1479
+ pmu->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE;
10771480 pmu->attr_groups[IMC_FORMAT_ATTR] = &imc_format_group;
10781481
10791482 switch (pmu->domain) {
....@@ -1093,6 +1496,14 @@
10931496 pmu->pmu.cancel_txn = thread_imc_pmu_cancel_txn;
10941497 pmu->pmu.commit_txn = thread_imc_pmu_commit_txn;
10951498 break;
1499
+ case IMC_DOMAIN_TRACE:
1500
+ pmu->pmu.event_init = trace_imc_event_init;
1501
+ pmu->pmu.add = trace_imc_event_add;
1502
+ pmu->pmu.del = trace_imc_event_del;
1503
+ pmu->pmu.start = trace_imc_event_start;
1504
+ pmu->pmu.stop = trace_imc_event_stop;
1505
+ pmu->pmu.read = trace_imc_event_read;
1506
+ pmu->attr_groups[IMC_FORMAT_ATTR] = &trace_imc_format_group;
10961507 default:
10971508 break;
10981509 }
....@@ -1163,10 +1574,10 @@
11631574 static void thread_imc_ldbar_disable(void *dummy)
11641575 {
11651576 /*
1166
- * By Zeroing LDBAR, we disable thread-imc
1167
- * updates.
1577
+ * By setting 0th bit of LDBAR to zero, we disable thread-imc
1578
+ * updates to memory.
11681579 */
1169
- mtspr(SPRN_LDBAR, 0);
1580
+ mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) & (~(1UL << 63))));
11701581 }
11711582
11721583 void thread_imc_disable(void)
....@@ -1183,6 +1594,18 @@
11831594 free_pages((u64)per_cpu(thread_imc_mem, i), order);
11841595
11851596 }
1597
+}
1598
+
1599
+static void cleanup_all_trace_imc_memory(void)
1600
+{
1601
+ int i, order = get_order(trace_imc_mem_size);
1602
+
1603
+ for_each_online_cpu(i) {
1604
+ if (per_cpu(trace_imc_mem, i))
1605
+ free_pages((u64)per_cpu(trace_imc_mem, i), order);
1606
+
1607
+ }
1608
+ kfree(trace_imc_refc);
11861609 }
11871610
11881611 /* Function to free the attr_groups which are dynamically allocated */
....@@ -1225,6 +1648,11 @@
12251648 if (pmu_ptr->domain == IMC_DOMAIN_THREAD) {
12261649 cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE);
12271650 cleanup_all_thread_imc_memory();
1651
+ }
1652
+
1653
+ if (pmu_ptr->domain == IMC_DOMAIN_TRACE) {
1654
+ cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_TRACE_IMC_ONLINE);
1655
+ cleanup_all_trace_imc_memory();
12281656 }
12291657 }
12301658
....@@ -1308,6 +1736,27 @@
13081736
13091737 thread_imc_pmu = pmu_ptr;
13101738 break;
1739
+ case IMC_DOMAIN_TRACE:
1740
+ /* Update the pmu name */
1741
+ pmu_ptr->pmu.name = kasprintf(GFP_KERNEL, "%s%s", s, "_imc");
1742
+ if (!pmu_ptr->pmu.name)
1743
+ return -ENOMEM;
1744
+
1745
+ nr_cores = DIV_ROUND_UP(num_possible_cpus(), threads_per_core);
1746
+ trace_imc_refc = kcalloc(nr_cores, sizeof(struct imc_pmu_ref),
1747
+ GFP_KERNEL);
1748
+ if (!trace_imc_refc)
1749
+ return -ENOMEM;
1750
+
1751
+ trace_imc_mem_size = pmu_ptr->counter_mem_size;
1752
+ for_each_online_cpu(cpu) {
1753
+ res = trace_imc_mem_alloc(cpu, trace_imc_mem_size);
1754
+ if (res) {
1755
+ cleanup_all_trace_imc_memory();
1756
+ goto err;
1757
+ }
1758
+ }
1759
+ break;
13111760 default:
13121761 return -EINVAL;
13131762 }
....@@ -1381,6 +1830,14 @@
13811830 }
13821831
13831832 break;
1833
+ case IMC_DOMAIN_TRACE:
1834
+ ret = trace_imc_cpu_init();
1835
+ if (ret) {
1836
+ cleanup_all_trace_imc_memory();
1837
+ goto err_free_mem;
1838
+ }
1839
+
1840
+ break;
13841841 default:
13851842 return -EINVAL; /* Unknown domain */
13861843 }
....@@ -1397,7 +1854,7 @@
13971854 if (ret)
13981855 goto err_free_cpuhp_mem;
13991856
1400
- pr_info("%s performance monitor hardware support registered\n",
1857
+ pr_debug("%s performance monitor hardware support registered\n",
14011858 pmu_ptr->pmu.name);
14021859
14031860 return 0;