.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
1 | 2 | /* |
---|
2 | 3 | * Linux performance counter support for MIPS. |
---|
3 | 4 | * |
---|
.. | .. |
---|
9 | 10 | * based on the sparc64 perf event code and the x86 code. Performance |
---|
10 | 11 | * counter access is based on the MIPS Oprofile code. And the callchain |
---|
11 | 12 | * support references the code of MIPS stacktrace.c. |
---|
12 | | - * |
---|
13 | | - * This program is free software; you can redistribute it and/or modify |
---|
14 | | - * it under the terms of the GNU General Public License version 2 as |
---|
15 | | - * published by the Free Software Foundation. |
---|
16 | 13 | */ |
---|
17 | 14 | |
---|
18 | 15 | #include <linux/cpumask.h> |
---|
.. | .. |
---|
93 | 90 | unsigned int num_counters; |
---|
94 | 91 | }; |
---|
95 | 92 | |
---|
| 93 | +static int counter_bits; |
---|
96 | 94 | static struct mips_pmu mipspmu; |
---|
97 | 95 | |
---|
98 | 96 | #define M_PERFCTL_EVENT(event) (((event) << MIPS_PERFCTRL_EVENT_S) & \ |
---|
.. | .. |
---|
121 | 119 | #define M_PERFCTL_CONFIG_MASK 0x1f |
---|
122 | 120 | #endif |
---|
123 | 121 | |
---|
| 122 | +#define CNTR_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1)) |
---|
124 | 123 | |
---|
125 | 124 | #ifdef CONFIG_MIPS_PERF_SHARED_TC_COUNTERS |
---|
126 | 125 | static DEFINE_RWLOCK(pmuint_rwlock); |
---|
.. | .. |
---|
157 | 156 | static irqreturn_t mipsxx_pmu_handle_irq(int, void *); |
---|
158 | 157 | static int mipsxx_pmu_handle_shared_irq(void); |
---|
159 | 158 | |
---|
| 159 | +/* 0: Not Loongson-3 |
---|
| 160 | + * 1: Loongson-3A1000/3B1000/3B1500 |
---|
| 161 | + * 2: Loongson-3A2000/3A3000 |
---|
| 162 | + * 3: Loongson-3A4000+ |
---|
| 163 | + */ |
---|
| 164 | + |
---|
| 165 | +#define LOONGSON_PMU_TYPE0 0 |
---|
| 166 | +#define LOONGSON_PMU_TYPE1 1 |
---|
| 167 | +#define LOONGSON_PMU_TYPE2 2 |
---|
| 168 | +#define LOONGSON_PMU_TYPE3 3 |
---|
| 169 | + |
---|
| 170 | +static inline int get_loongson3_pmu_type(void) |
---|
| 171 | +{ |
---|
| 172 | + if (boot_cpu_type() != CPU_LOONGSON64) |
---|
| 173 | + return LOONGSON_PMU_TYPE0; |
---|
| 174 | + if ((boot_cpu_data.processor_id & PRID_COMP_MASK) == PRID_COMP_LEGACY) |
---|
| 175 | + return LOONGSON_PMU_TYPE1; |
---|
| 176 | + if ((boot_cpu_data.processor_id & PRID_IMP_MASK) == PRID_IMP_LOONGSON_64C) |
---|
| 177 | + return LOONGSON_PMU_TYPE2; |
---|
| 178 | + if ((boot_cpu_data.processor_id & PRID_IMP_MASK) == PRID_IMP_LOONGSON_64G) |
---|
| 179 | + return LOONGSON_PMU_TYPE3; |
---|
| 180 | + |
---|
| 181 | + return LOONGSON_PMU_TYPE0; |
---|
| 182 | +} |
---|
| 183 | + |
---|
160 | 184 | static unsigned int mipsxx_pmu_swizzle_perf_idx(unsigned int idx) |
---|
161 | 185 | { |
---|
162 | 186 | if (vpe_id() == 1) |
---|
.. | .. |
---|
189 | 213 | |
---|
190 | 214 | static u64 mipsxx_pmu_read_counter_64(unsigned int idx) |
---|
191 | 215 | { |
---|
| 216 | + u64 mask = CNTR_BIT_MASK(counter_bits); |
---|
192 | 217 | idx = mipsxx_pmu_swizzle_perf_idx(idx); |
---|
193 | 218 | |
---|
194 | 219 | switch (idx) { |
---|
195 | 220 | case 0: |
---|
196 | | - return read_c0_perfcntr0_64(); |
---|
| 221 | + return read_c0_perfcntr0_64() & mask; |
---|
197 | 222 | case 1: |
---|
198 | | - return read_c0_perfcntr1_64(); |
---|
| 223 | + return read_c0_perfcntr1_64() & mask; |
---|
199 | 224 | case 2: |
---|
200 | | - return read_c0_perfcntr2_64(); |
---|
| 225 | + return read_c0_perfcntr2_64() & mask; |
---|
201 | 226 | case 3: |
---|
202 | | - return read_c0_perfcntr3_64(); |
---|
| 227 | + return read_c0_perfcntr3_64() & mask; |
---|
203 | 228 | default: |
---|
204 | 229 | WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx); |
---|
205 | 230 | return 0; |
---|
.. | .. |
---|
228 | 253 | |
---|
229 | 254 | static void mipsxx_pmu_write_counter_64(unsigned int idx, u64 val) |
---|
230 | 255 | { |
---|
| 256 | + val &= CNTR_BIT_MASK(counter_bits); |
---|
231 | 257 | idx = mipsxx_pmu_swizzle_perf_idx(idx); |
---|
232 | 258 | |
---|
233 | 259 | switch (idx) { |
---|
.. | .. |
---|
289 | 315 | struct hw_perf_event *hwc) |
---|
290 | 316 | { |
---|
291 | 317 | int i; |
---|
| 318 | + unsigned long cntr_mask; |
---|
292 | 319 | |
---|
293 | 320 | /* |
---|
294 | 321 | * We only need to care the counter mask. The range has been |
---|
295 | 322 | * checked definitely. |
---|
296 | 323 | */ |
---|
297 | | - unsigned long cntr_mask = (hwc->event_base >> 8) & 0xffff; |
---|
| 324 | + if (get_loongson3_pmu_type() == LOONGSON_PMU_TYPE2) |
---|
| 325 | + cntr_mask = (hwc->event_base >> 10) & 0xffff; |
---|
| 326 | + else |
---|
| 327 | + cntr_mask = (hwc->event_base >> 8) & 0xffff; |
---|
298 | 328 | |
---|
299 | 329 | for (i = mipspmu.num_counters - 1; i >= 0; i--) { |
---|
300 | 330 | /* |
---|
.. | .. |
---|
323 | 353 | |
---|
324 | 354 | WARN_ON(idx < 0 || idx >= mipspmu.num_counters); |
---|
325 | 355 | |
---|
326 | | - cpuc->saved_ctrl[idx] = M_PERFCTL_EVENT(evt->event_base & 0xff) | |
---|
327 | | - (evt->config_base & M_PERFCTL_CONFIG_MASK) | |
---|
328 | | - /* Make sure interrupt enabled. */ |
---|
329 | | - MIPS_PERFCTRL_IE; |
---|
| 356 | + if (get_loongson3_pmu_type() == LOONGSON_PMU_TYPE2) |
---|
| 357 | + cpuc->saved_ctrl[idx] = M_PERFCTL_EVENT(evt->event_base & 0x3ff) | |
---|
| 358 | + (evt->config_base & M_PERFCTL_CONFIG_MASK) | |
---|
| 359 | + /* Make sure interrupt enabled. */ |
---|
| 360 | + MIPS_PERFCTRL_IE; |
---|
| 361 | + else |
---|
| 362 | + cpuc->saved_ctrl[idx] = M_PERFCTL_EVENT(evt->event_base & 0xff) | |
---|
| 363 | + (evt->config_base & M_PERFCTL_CONFIG_MASK) | |
---|
| 364 | + /* Make sure interrupt enabled. */ |
---|
| 365 | + MIPS_PERFCTRL_IE; |
---|
330 | 366 | |
---|
331 | 367 | if (IS_ENABLED(CONFIG_CPU_BMIPS5000)) { |
---|
332 | 368 | /* enable the counter for the calling thread */ |
---|
.. | .. |
---|
398 | 434 | } |
---|
399 | 435 | |
---|
400 | 436 | local64_set(&hwc->prev_count, mipspmu.overflow - left); |
---|
| 437 | + |
---|
| 438 | + if (get_loongson3_pmu_type() == LOONGSON_PMU_TYPE2) |
---|
| 439 | + mipsxx_pmu_write_control(idx, |
---|
| 440 | + M_PERFCTL_EVENT(hwc->event_base & 0x3ff)); |
---|
401 | 441 | |
---|
402 | 442 | mipspmu.write_counter(idx, mipspmu.overflow - left); |
---|
403 | 443 | |
---|
.. | .. |
---|
670 | 710 | (pev->event_id & 0xff); |
---|
671 | 711 | else |
---|
672 | 712 | #endif /* CONFIG_MIPS_MT_SMP */ |
---|
673 | | - return ((pev->cntr_mask & 0xffff00) | |
---|
674 | | - (pev->event_id & 0xff)); |
---|
| 713 | + { |
---|
| 714 | + if (get_loongson3_pmu_type() == LOONGSON_PMU_TYPE2) |
---|
| 715 | + return (pev->cntr_mask & 0xfffc00) | |
---|
| 716 | + (pev->event_id & 0x3ff); |
---|
| 717 | + else |
---|
| 718 | + return (pev->cntr_mask & 0xffff00) | |
---|
| 719 | + (pev->event_id & 0xff); |
---|
| 720 | + } |
---|
675 | 721 | } |
---|
676 | 722 | |
---|
677 | 723 | static const struct mips_perf_event *mipspmu_map_general_event(int idx) |
---|
.. | .. |
---|
786 | 832 | return counters; |
---|
787 | 833 | } |
---|
788 | 834 | |
---|
789 | | -static void reset_counters(void *arg) |
---|
| 835 | +static void loongson3_reset_counters(void *arg) |
---|
790 | 836 | { |
---|
791 | 837 | int counters = (int)(long)arg; |
---|
| 838 | + |
---|
792 | 839 | switch (counters) { |
---|
793 | 840 | case 4: |
---|
794 | 841 | mipsxx_pmu_write_control(3, 0); |
---|
795 | 842 | mipspmu.write_counter(3, 0); |
---|
| 843 | + mipsxx_pmu_write_control(3, 127<<5); |
---|
| 844 | + mipspmu.write_counter(3, 0); |
---|
| 845 | + mipsxx_pmu_write_control(3, 191<<5); |
---|
| 846 | + mipspmu.write_counter(3, 0); |
---|
| 847 | + mipsxx_pmu_write_control(3, 255<<5); |
---|
| 848 | + mipspmu.write_counter(3, 0); |
---|
| 849 | + mipsxx_pmu_write_control(3, 319<<5); |
---|
| 850 | + mipspmu.write_counter(3, 0); |
---|
| 851 | + mipsxx_pmu_write_control(3, 383<<5); |
---|
| 852 | + mipspmu.write_counter(3, 0); |
---|
| 853 | + mipsxx_pmu_write_control(3, 575<<5); |
---|
| 854 | + mipspmu.write_counter(3, 0); |
---|
| 855 | + fallthrough; |
---|
796 | 856 | case 3: |
---|
797 | 857 | mipsxx_pmu_write_control(2, 0); |
---|
798 | 858 | mipspmu.write_counter(2, 0); |
---|
| 859 | + mipsxx_pmu_write_control(2, 127<<5); |
---|
| 860 | + mipspmu.write_counter(2, 0); |
---|
| 861 | + mipsxx_pmu_write_control(2, 191<<5); |
---|
| 862 | + mipspmu.write_counter(2, 0); |
---|
| 863 | + mipsxx_pmu_write_control(2, 255<<5); |
---|
| 864 | + mipspmu.write_counter(2, 0); |
---|
| 865 | + mipsxx_pmu_write_control(2, 319<<5); |
---|
| 866 | + mipspmu.write_counter(2, 0); |
---|
| 867 | + mipsxx_pmu_write_control(2, 383<<5); |
---|
| 868 | + mipspmu.write_counter(2, 0); |
---|
| 869 | + mipsxx_pmu_write_control(2, 575<<5); |
---|
| 870 | + mipspmu.write_counter(2, 0); |
---|
| 871 | + fallthrough; |
---|
799 | 872 | case 2: |
---|
800 | 873 | mipsxx_pmu_write_control(1, 0); |
---|
801 | 874 | mipspmu.write_counter(1, 0); |
---|
| 875 | + mipsxx_pmu_write_control(1, 127<<5); |
---|
| 876 | + mipspmu.write_counter(1, 0); |
---|
| 877 | + mipsxx_pmu_write_control(1, 191<<5); |
---|
| 878 | + mipspmu.write_counter(1, 0); |
---|
| 879 | + mipsxx_pmu_write_control(1, 255<<5); |
---|
| 880 | + mipspmu.write_counter(1, 0); |
---|
| 881 | + mipsxx_pmu_write_control(1, 319<<5); |
---|
| 882 | + mipspmu.write_counter(1, 0); |
---|
| 883 | + mipsxx_pmu_write_control(1, 383<<5); |
---|
| 884 | + mipspmu.write_counter(1, 0); |
---|
| 885 | + mipsxx_pmu_write_control(1, 575<<5); |
---|
| 886 | + mipspmu.write_counter(1, 0); |
---|
| 887 | + fallthrough; |
---|
802 | 888 | case 1: |
---|
803 | 889 | mipsxx_pmu_write_control(0, 0); |
---|
804 | 890 | mipspmu.write_counter(0, 0); |
---|
| 891 | + mipsxx_pmu_write_control(0, 127<<5); |
---|
| 892 | + mipspmu.write_counter(0, 0); |
---|
| 893 | + mipsxx_pmu_write_control(0, 191<<5); |
---|
| 894 | + mipspmu.write_counter(0, 0); |
---|
| 895 | + mipsxx_pmu_write_control(0, 255<<5); |
---|
| 896 | + mipspmu.write_counter(0, 0); |
---|
| 897 | + mipsxx_pmu_write_control(0, 319<<5); |
---|
| 898 | + mipspmu.write_counter(0, 0); |
---|
| 899 | + mipsxx_pmu_write_control(0, 383<<5); |
---|
| 900 | + mipspmu.write_counter(0, 0); |
---|
| 901 | + mipsxx_pmu_write_control(0, 575<<5); |
---|
| 902 | + mipspmu.write_counter(0, 0); |
---|
| 903 | + break; |
---|
| 904 | + } |
---|
| 905 | +} |
---|
| 906 | + |
---|
| 907 | +static void reset_counters(void *arg) |
---|
| 908 | +{ |
---|
| 909 | + int counters = (int)(long)arg; |
---|
| 910 | + |
---|
| 911 | + if (get_loongson3_pmu_type() == LOONGSON_PMU_TYPE2) { |
---|
| 912 | + loongson3_reset_counters(arg); |
---|
| 913 | + return; |
---|
| 914 | + } |
---|
| 915 | + |
---|
| 916 | + switch (counters) { |
---|
| 917 | + case 4: |
---|
| 918 | + mipsxx_pmu_write_control(3, 0); |
---|
| 919 | + mipspmu.write_counter(3, 0); |
---|
| 920 | + fallthrough; |
---|
| 921 | + case 3: |
---|
| 922 | + mipsxx_pmu_write_control(2, 0); |
---|
| 923 | + mipspmu.write_counter(2, 0); |
---|
| 924 | + fallthrough; |
---|
| 925 | + case 2: |
---|
| 926 | + mipsxx_pmu_write_control(1, 0); |
---|
| 927 | + mipspmu.write_counter(1, 0); |
---|
| 928 | + fallthrough; |
---|
| 929 | + case 1: |
---|
| 930 | + mipsxx_pmu_write_control(0, 0); |
---|
| 931 | + mipspmu.write_counter(0, 0); |
---|
| 932 | + break; |
---|
805 | 933 | } |
---|
806 | 934 | } |
---|
807 | 935 | |
---|
.. | .. |
---|
833 | 961 | [PERF_COUNT_HW_BRANCH_MISSES] = { 0x16, CNTR_EVEN | CNTR_ODD }, |
---|
834 | 962 | }; |
---|
835 | 963 | |
---|
836 | | -static const struct mips_perf_event loongson3_event_map[PERF_COUNT_HW_MAX] = { |
---|
| 964 | +static const struct mips_perf_event loongson3_event_map1[PERF_COUNT_HW_MAX] = { |
---|
837 | 965 | [PERF_COUNT_HW_CPU_CYCLES] = { 0x00, CNTR_EVEN }, |
---|
838 | 966 | [PERF_COUNT_HW_INSTRUCTIONS] = { 0x00, CNTR_ODD }, |
---|
839 | 967 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x01, CNTR_EVEN }, |
---|
840 | 968 | [PERF_COUNT_HW_BRANCH_MISSES] = { 0x01, CNTR_ODD }, |
---|
| 969 | +}; |
---|
| 970 | + |
---|
| 971 | +static const struct mips_perf_event loongson3_event_map2[PERF_COUNT_HW_MAX] = { |
---|
| 972 | + [PERF_COUNT_HW_CPU_CYCLES] = { 0x80, CNTR_ALL }, |
---|
| 973 | + [PERF_COUNT_HW_INSTRUCTIONS] = { 0x81, CNTR_ALL }, |
---|
| 974 | + [PERF_COUNT_HW_CACHE_MISSES] = { 0x18, CNTR_ALL }, |
---|
| 975 | + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x94, CNTR_ALL }, |
---|
| 976 | + [PERF_COUNT_HW_BRANCH_MISSES] = { 0x9c, CNTR_ALL }, |
---|
| 977 | +}; |
---|
| 978 | + |
---|
| 979 | +static const struct mips_perf_event loongson3_event_map3[PERF_COUNT_HW_MAX] = { |
---|
| 980 | + [PERF_COUNT_HW_CPU_CYCLES] = { 0x00, CNTR_ALL }, |
---|
| 981 | + [PERF_COUNT_HW_INSTRUCTIONS] = { 0x01, CNTR_ALL }, |
---|
| 982 | + [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x1c, CNTR_ALL }, |
---|
| 983 | + [PERF_COUNT_HW_CACHE_MISSES] = { 0x1d, CNTR_ALL }, |
---|
| 984 | + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x02, CNTR_ALL }, |
---|
| 985 | + [PERF_COUNT_HW_BRANCH_MISSES] = { 0x08, CNTR_ALL }, |
---|
841 | 986 | }; |
---|
842 | 987 | |
---|
843 | 988 | static const struct mips_perf_event octeon_event_map[PERF_COUNT_HW_MAX] = { |
---|
.. | .. |
---|
1063 | 1208 | }, |
---|
1064 | 1209 | }; |
---|
1065 | 1210 | |
---|
1066 | | -static const struct mips_perf_event loongson3_cache_map |
---|
| 1211 | +static const struct mips_perf_event loongson3_cache_map1 |
---|
1067 | 1212 | [PERF_COUNT_HW_CACHE_MAX] |
---|
1068 | 1213 | [PERF_COUNT_HW_CACHE_OP_MAX] |
---|
1069 | 1214 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = { |
---|
.. | .. |
---|
1108 | 1253 | [C(BPU)] = { |
---|
1109 | 1254 | /* Using the same code for *HW_BRANCH* */ |
---|
1110 | 1255 | [C(OP_READ)] = { |
---|
1111 | | - [C(RESULT_ACCESS)] = { 0x02, CNTR_EVEN }, |
---|
1112 | | - [C(RESULT_MISS)] = { 0x02, CNTR_ODD }, |
---|
| 1256 | + [C(RESULT_ACCESS)] = { 0x01, CNTR_EVEN }, |
---|
| 1257 | + [C(RESULT_MISS)] = { 0x01, CNTR_ODD }, |
---|
1113 | 1258 | }, |
---|
1114 | 1259 | [C(OP_WRITE)] = { |
---|
1115 | | - [C(RESULT_ACCESS)] = { 0x02, CNTR_EVEN }, |
---|
1116 | | - [C(RESULT_MISS)] = { 0x02, CNTR_ODD }, |
---|
| 1260 | + [C(RESULT_ACCESS)] = { 0x01, CNTR_EVEN }, |
---|
| 1261 | + [C(RESULT_MISS)] = { 0x01, CNTR_ODD }, |
---|
| 1262 | + }, |
---|
| 1263 | +}, |
---|
| 1264 | +}; |
---|
| 1265 | + |
---|
| 1266 | +static const struct mips_perf_event loongson3_cache_map2 |
---|
| 1267 | + [PERF_COUNT_HW_CACHE_MAX] |
---|
| 1268 | + [PERF_COUNT_HW_CACHE_OP_MAX] |
---|
| 1269 | + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { |
---|
| 1270 | +[C(L1D)] = { |
---|
| 1271 | + /* |
---|
| 1272 | + * Like some other architectures (e.g. ARM), the performance |
---|
| 1273 | + * counters don't differentiate between read and write |
---|
| 1274 | + * accesses/misses, so this isn't strictly correct, but it's the |
---|
| 1275 | + * best we can do. Writes and reads get combined. |
---|
| 1276 | + */ |
---|
| 1277 | + [C(OP_READ)] = { |
---|
| 1278 | + [C(RESULT_ACCESS)] = { 0x156, CNTR_ALL }, |
---|
| 1279 | + }, |
---|
| 1280 | + [C(OP_WRITE)] = { |
---|
| 1281 | + [C(RESULT_ACCESS)] = { 0x155, CNTR_ALL }, |
---|
| 1282 | + [C(RESULT_MISS)] = { 0x153, CNTR_ALL }, |
---|
| 1283 | + }, |
---|
| 1284 | +}, |
---|
| 1285 | +[C(L1I)] = { |
---|
| 1286 | + [C(OP_READ)] = { |
---|
| 1287 | + [C(RESULT_MISS)] = { 0x18, CNTR_ALL }, |
---|
| 1288 | + }, |
---|
| 1289 | + [C(OP_WRITE)] = { |
---|
| 1290 | + [C(RESULT_MISS)] = { 0x18, CNTR_ALL }, |
---|
| 1291 | + }, |
---|
| 1292 | +}, |
---|
| 1293 | +[C(LL)] = { |
---|
| 1294 | + [C(OP_READ)] = { |
---|
| 1295 | + [C(RESULT_ACCESS)] = { 0x1b6, CNTR_ALL }, |
---|
| 1296 | + }, |
---|
| 1297 | + [C(OP_WRITE)] = { |
---|
| 1298 | + [C(RESULT_ACCESS)] = { 0x1b7, CNTR_ALL }, |
---|
| 1299 | + }, |
---|
| 1300 | + [C(OP_PREFETCH)] = { |
---|
| 1301 | + [C(RESULT_ACCESS)] = { 0x1bf, CNTR_ALL }, |
---|
| 1302 | + }, |
---|
| 1303 | +}, |
---|
| 1304 | +[C(DTLB)] = { |
---|
| 1305 | + [C(OP_READ)] = { |
---|
| 1306 | + [C(RESULT_MISS)] = { 0x92, CNTR_ALL }, |
---|
| 1307 | + }, |
---|
| 1308 | + [C(OP_WRITE)] = { |
---|
| 1309 | + [C(RESULT_MISS)] = { 0x92, CNTR_ALL }, |
---|
| 1310 | + }, |
---|
| 1311 | +}, |
---|
| 1312 | +[C(ITLB)] = { |
---|
| 1313 | + [C(OP_READ)] = { |
---|
| 1314 | + [C(RESULT_MISS)] = { 0x1a, CNTR_ALL }, |
---|
| 1315 | + }, |
---|
| 1316 | + [C(OP_WRITE)] = { |
---|
| 1317 | + [C(RESULT_MISS)] = { 0x1a, CNTR_ALL }, |
---|
| 1318 | + }, |
---|
| 1319 | +}, |
---|
| 1320 | +[C(BPU)] = { |
---|
| 1321 | + /* Using the same code for *HW_BRANCH* */ |
---|
| 1322 | + [C(OP_READ)] = { |
---|
| 1323 | + [C(RESULT_ACCESS)] = { 0x94, CNTR_ALL }, |
---|
| 1324 | + [C(RESULT_MISS)] = { 0x9c, CNTR_ALL }, |
---|
| 1325 | + }, |
---|
| 1326 | +}, |
---|
| 1327 | +}; |
---|
| 1328 | + |
---|
| 1329 | +static const struct mips_perf_event loongson3_cache_map3 |
---|
| 1330 | + [PERF_COUNT_HW_CACHE_MAX] |
---|
| 1331 | + [PERF_COUNT_HW_CACHE_OP_MAX] |
---|
| 1332 | + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { |
---|
| 1333 | +[C(L1D)] = { |
---|
| 1334 | + /* |
---|
| 1335 | + * Like some other architectures (e.g. ARM), the performance |
---|
| 1336 | + * counters don't differentiate between read and write |
---|
| 1337 | + * accesses/misses, so this isn't strictly correct, but it's the |
---|
| 1338 | + * best we can do. Writes and reads get combined. |
---|
| 1339 | + */ |
---|
| 1340 | + [C(OP_READ)] = { |
---|
| 1341 | + [C(RESULT_ACCESS)] = { 0x1e, CNTR_ALL }, |
---|
| 1342 | + [C(RESULT_MISS)] = { 0x1f, CNTR_ALL }, |
---|
| 1343 | + }, |
---|
| 1344 | + [C(OP_PREFETCH)] = { |
---|
| 1345 | + [C(RESULT_ACCESS)] = { 0xaa, CNTR_ALL }, |
---|
| 1346 | + [C(RESULT_MISS)] = { 0xa9, CNTR_ALL }, |
---|
| 1347 | + }, |
---|
| 1348 | +}, |
---|
| 1349 | +[C(L1I)] = { |
---|
| 1350 | + [C(OP_READ)] = { |
---|
| 1351 | + [C(RESULT_ACCESS)] = { 0x1c, CNTR_ALL }, |
---|
| 1352 | + [C(RESULT_MISS)] = { 0x1d, CNTR_ALL }, |
---|
| 1353 | + }, |
---|
| 1354 | +}, |
---|
| 1355 | +[C(LL)] = { |
---|
| 1356 | + [C(OP_READ)] = { |
---|
| 1357 | + [C(RESULT_ACCESS)] = { 0x2e, CNTR_ALL }, |
---|
| 1358 | + [C(RESULT_MISS)] = { 0x2f, CNTR_ALL }, |
---|
| 1359 | + }, |
---|
| 1360 | +}, |
---|
| 1361 | +[C(DTLB)] = { |
---|
| 1362 | + [C(OP_READ)] = { |
---|
| 1363 | + [C(RESULT_ACCESS)] = { 0x14, CNTR_ALL }, |
---|
| 1364 | + [C(RESULT_MISS)] = { 0x1b, CNTR_ALL }, |
---|
| 1365 | + }, |
---|
| 1366 | +}, |
---|
| 1367 | +[C(ITLB)] = { |
---|
| 1368 | + [C(OP_READ)] = { |
---|
| 1369 | + [C(RESULT_MISS)] = { 0x1a, CNTR_ALL }, |
---|
| 1370 | + }, |
---|
| 1371 | +}, |
---|
| 1372 | +[C(BPU)] = { |
---|
| 1373 | + /* Using the same code for *HW_BRANCH* */ |
---|
| 1374 | + [C(OP_READ)] = { |
---|
| 1375 | + [C(RESULT_ACCESS)] = { 0x02, CNTR_ALL }, |
---|
| 1376 | + [C(RESULT_MISS)] = { 0x08, CNTR_ALL }, |
---|
1117 | 1377 | }, |
---|
1118 | 1378 | }, |
---|
1119 | 1379 | }; |
---|
.. | .. |
---|
1176 | 1436 | }, |
---|
1177 | 1437 | }, |
---|
1178 | 1438 | }; |
---|
1179 | | - |
---|
1180 | 1439 | |
---|
1181 | 1440 | static const struct mips_perf_event octeon_cache_map |
---|
1182 | 1441 | [PERF_COUNT_HW_CACHE_MAX] |
---|
.. | .. |
---|
1383 | 1642 | struct perf_sample_data data; |
---|
1384 | 1643 | unsigned int counters = mipspmu.num_counters; |
---|
1385 | 1644 | u64 counter; |
---|
1386 | | - int handled = IRQ_NONE; |
---|
| 1645 | + int n, handled = IRQ_NONE; |
---|
1387 | 1646 | struct pt_regs *regs; |
---|
1388 | 1647 | |
---|
1389 | 1648 | if (cpu_has_perf_cntr_intr_bit && !(read_c0_cause() & CAUSEF_PCI)) |
---|
.. | .. |
---|
1404 | 1663 | |
---|
1405 | 1664 | perf_sample_data_init(&data, 0, 0); |
---|
1406 | 1665 | |
---|
1407 | | - switch (counters) { |
---|
1408 | | -#define HANDLE_COUNTER(n) \ |
---|
1409 | | - case n + 1: \ |
---|
1410 | | - if (test_bit(n, cpuc->used_mask)) { \ |
---|
1411 | | - counter = mipspmu.read_counter(n); \ |
---|
1412 | | - if (counter & mipspmu.overflow) { \ |
---|
1413 | | - handle_associated_event(cpuc, n, &data, regs); \ |
---|
1414 | | - handled = IRQ_HANDLED; \ |
---|
1415 | | - } \ |
---|
1416 | | - } |
---|
1417 | | - HANDLE_COUNTER(3) |
---|
1418 | | - HANDLE_COUNTER(2) |
---|
1419 | | - HANDLE_COUNTER(1) |
---|
1420 | | - HANDLE_COUNTER(0) |
---|
| 1666 | + for (n = counters - 1; n >= 0; n--) { |
---|
| 1667 | + if (!test_bit(n, cpuc->used_mask)) |
---|
| 1668 | + continue; |
---|
| 1669 | + |
---|
| 1670 | + counter = mipspmu.read_counter(n); |
---|
| 1671 | + if (!(counter & mipspmu.overflow)) |
---|
| 1672 | + continue; |
---|
| 1673 | + |
---|
| 1674 | + handle_associated_event(cpuc, n, &data, regs); |
---|
| 1675 | + handled = IRQ_HANDLED; |
---|
1421 | 1676 | } |
---|
1422 | 1677 | |
---|
1423 | 1678 | #ifdef CONFIG_MIPS_PERF_SHARED_TC_COUNTERS |
---|
.. | .. |
---|
1515 | 1770 | static const struct mips_perf_event *mipsxx_pmu_map_raw_event(u64 config) |
---|
1516 | 1771 | { |
---|
1517 | 1772 | /* currently most cores have 7-bit event numbers */ |
---|
| 1773 | + int pmu_type; |
---|
1518 | 1774 | unsigned int raw_id = config & 0xff; |
---|
1519 | 1775 | unsigned int base_id = raw_id & 0x7f; |
---|
1520 | 1776 | |
---|
.. | .. |
---|
1626 | 1882 | raw_event.cntr_mask = |
---|
1627 | 1883 | raw_id > 127 ? CNTR_ODD : CNTR_EVEN; |
---|
1628 | 1884 | break; |
---|
1629 | | - case CPU_LOONGSON3: |
---|
1630 | | - raw_event.cntr_mask = raw_id > 127 ? CNTR_ODD : CNTR_EVEN; |
---|
1631 | | - break; |
---|
| 1885 | + case CPU_LOONGSON64: |
---|
| 1886 | + pmu_type = get_loongson3_pmu_type(); |
---|
| 1887 | + |
---|
| 1888 | + switch (pmu_type) { |
---|
| 1889 | + case LOONGSON_PMU_TYPE1: |
---|
| 1890 | + raw_event.cntr_mask = |
---|
| 1891 | + raw_id > 127 ? CNTR_ODD : CNTR_EVEN; |
---|
| 1892 | + break; |
---|
| 1893 | + case LOONGSON_PMU_TYPE2: |
---|
| 1894 | + base_id = config & 0x3ff; |
---|
| 1895 | + raw_event.cntr_mask = CNTR_ALL; |
---|
| 1896 | + |
---|
| 1897 | + if ((base_id >= 1 && base_id < 28) || |
---|
| 1898 | + (base_id >= 64 && base_id < 90) || |
---|
| 1899 | + (base_id >= 128 && base_id < 164) || |
---|
| 1900 | + (base_id >= 192 && base_id < 200) || |
---|
| 1901 | + (base_id >= 256 && base_id < 275) || |
---|
| 1902 | + (base_id >= 320 && base_id < 361) || |
---|
| 1903 | + (base_id >= 384 && base_id < 574)) |
---|
| 1904 | + break; |
---|
| 1905 | + |
---|
| 1906 | + return ERR_PTR(-EOPNOTSUPP); |
---|
| 1907 | + case LOONGSON_PMU_TYPE3: |
---|
| 1908 | + base_id = raw_id; |
---|
| 1909 | + raw_event.cntr_mask = CNTR_ALL; |
---|
| 1910 | + break; |
---|
| 1911 | + } |
---|
| 1912 | + break; |
---|
1632 | 1913 | } |
---|
1633 | 1914 | |
---|
1634 | 1915 | raw_event.event_id = base_id; |
---|
.. | .. |
---|
1686 | 1967 | static int __init |
---|
1687 | 1968 | init_hw_perf_events(void) |
---|
1688 | 1969 | { |
---|
1689 | | - int counters, irq; |
---|
1690 | | - int counter_bits; |
---|
| 1970 | + int counters, irq, pmu_type; |
---|
1691 | 1971 | |
---|
1692 | 1972 | pr_info("Performance counters: "); |
---|
1693 | 1973 | |
---|
.. | .. |
---|
1767 | 2047 | mipspmu.general_event_map = &mipsxxcore_event_map; |
---|
1768 | 2048 | mipspmu.cache_event_map = &mipsxxcore_cache_map; |
---|
1769 | 2049 | break; |
---|
1770 | | - case CPU_LOONGSON1: |
---|
| 2050 | + case CPU_LOONGSON32: |
---|
1771 | 2051 | mipspmu.name = "mips/loongson1"; |
---|
1772 | 2052 | mipspmu.general_event_map = &mipsxxcore_event_map; |
---|
1773 | 2053 | mipspmu.cache_event_map = &mipsxxcore_cache_map; |
---|
1774 | 2054 | break; |
---|
1775 | | - case CPU_LOONGSON3: |
---|
| 2055 | + case CPU_LOONGSON64: |
---|
1776 | 2056 | mipspmu.name = "mips/loongson3"; |
---|
1777 | | - mipspmu.general_event_map = &loongson3_event_map; |
---|
1778 | | - mipspmu.cache_event_map = &loongson3_cache_map; |
---|
| 2057 | + pmu_type = get_loongson3_pmu_type(); |
---|
| 2058 | + |
---|
| 2059 | + switch (pmu_type) { |
---|
| 2060 | + case LOONGSON_PMU_TYPE1: |
---|
| 2061 | + counters = 2; |
---|
| 2062 | + mipspmu.general_event_map = &loongson3_event_map1; |
---|
| 2063 | + mipspmu.cache_event_map = &loongson3_cache_map1; |
---|
| 2064 | + break; |
---|
| 2065 | + case LOONGSON_PMU_TYPE2: |
---|
| 2066 | + counters = 4; |
---|
| 2067 | + mipspmu.general_event_map = &loongson3_event_map2; |
---|
| 2068 | + mipspmu.cache_event_map = &loongson3_cache_map2; |
---|
| 2069 | + break; |
---|
| 2070 | + case LOONGSON_PMU_TYPE3: |
---|
| 2071 | + counters = 4; |
---|
| 2072 | + mipspmu.general_event_map = &loongson3_event_map3; |
---|
| 2073 | + mipspmu.cache_event_map = &loongson3_cache_map3; |
---|
| 2074 | + break; |
---|
| 2075 | + } |
---|
1779 | 2076 | break; |
---|
1780 | 2077 | case CPU_CAVIUM_OCTEON: |
---|
1781 | 2078 | case CPU_CAVIUM_OCTEON_PLUS: |
---|
.. | .. |
---|
1806 | 2103 | mipspmu.irq = irq; |
---|
1807 | 2104 | |
---|
1808 | 2105 | if (read_c0_perfctrl0() & MIPS_PERFCTRL_W) { |
---|
1809 | | - mipspmu.max_period = (1ULL << 63) - 1; |
---|
1810 | | - mipspmu.valid_count = (1ULL << 63) - 1; |
---|
1811 | | - mipspmu.overflow = 1ULL << 63; |
---|
| 2106 | + if (get_loongson3_pmu_type() == LOONGSON_PMU_TYPE2) { |
---|
| 2107 | + counter_bits = 48; |
---|
| 2108 | + mipspmu.max_period = (1ULL << 47) - 1; |
---|
| 2109 | + mipspmu.valid_count = (1ULL << 47) - 1; |
---|
| 2110 | + mipspmu.overflow = 1ULL << 47; |
---|
| 2111 | + } else { |
---|
| 2112 | + counter_bits = 64; |
---|
| 2113 | + mipspmu.max_period = (1ULL << 63) - 1; |
---|
| 2114 | + mipspmu.valid_count = (1ULL << 63) - 1; |
---|
| 2115 | + mipspmu.overflow = 1ULL << 63; |
---|
| 2116 | + } |
---|
1812 | 2117 | mipspmu.read_counter = mipsxx_pmu_read_counter_64; |
---|
1813 | 2118 | mipspmu.write_counter = mipsxx_pmu_write_counter_64; |
---|
1814 | | - counter_bits = 64; |
---|
1815 | 2119 | } else { |
---|
| 2120 | + counter_bits = 32; |
---|
1816 | 2121 | mipspmu.max_period = (1ULL << 31) - 1; |
---|
1817 | 2122 | mipspmu.valid_count = (1ULL << 31) - 1; |
---|
1818 | 2123 | mipspmu.overflow = 1ULL << 31; |
---|
1819 | 2124 | mipspmu.read_counter = mipsxx_pmu_read_counter; |
---|
1820 | 2125 | mipspmu.write_counter = mipsxx_pmu_write_counter; |
---|
1821 | | - counter_bits = 32; |
---|
1822 | 2126 | } |
---|
1823 | 2127 | |
---|
1824 | 2128 | on_each_cpu(reset_counters, (void *)(long)counters, 1); |
---|