| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * Copyright (C) 2013 Advanced Micro Devices, Inc. |
|---|
| 3 | 4 | * |
|---|
| 4 | 5 | * Author: Jacob Shin <jacob.shin@amd.com> |
|---|
| 5 | | - * |
|---|
| 6 | | - * This program is free software; you can redistribute it and/or modify |
|---|
| 7 | | - * it under the terms of the GNU General Public License version 2 as |
|---|
| 8 | | - * published by the Free Software Foundation. |
|---|
| 9 | 6 | */ |
|---|
| 10 | 7 | |
|---|
| 11 | 8 | #include <linux/perf_event.h> |
|---|
| .. | .. |
|---|
| 183 | 180 | hwc->idx = -1; |
|---|
| 184 | 181 | } |
|---|
| 185 | 182 | |
|---|
| 183 | +/* |
|---|
| 184 | + * Return a full thread and slice mask unless user |
|---|
| 185 | + * has provided them |
|---|
| 186 | + */ |
|---|
| 187 | +static u64 l3_thread_slice_mask(u64 config) |
|---|
| 188 | +{ |
|---|
| 189 | + if (boot_cpu_data.x86 <= 0x18) |
|---|
| 190 | + return ((config & AMD64_L3_SLICE_MASK) ? : AMD64_L3_SLICE_MASK) | |
|---|
| 191 | + ((config & AMD64_L3_THREAD_MASK) ? : AMD64_L3_THREAD_MASK); |
|---|
| 192 | + |
|---|
| 193 | + /* |
|---|
| 194 | + * If the user doesn't specify a threadmask, they're not trying to |
|---|
| 195 | + * count core 0, so we enable all cores & threads. |
|---|
| 196 | + * We'll also assume that they want to count slice 0 if they specify |
|---|
| 197 | + * a threadmask and leave sliceid and enallslices unpopulated. |
|---|
| 198 | + */ |
|---|
| 199 | + if (!(config & AMD64_L3_F19H_THREAD_MASK)) |
|---|
| 200 | + return AMD64_L3_F19H_THREAD_MASK | AMD64_L3_EN_ALL_SLICES | |
|---|
| 201 | + AMD64_L3_EN_ALL_CORES; |
|---|
| 202 | + |
|---|
| 203 | + return config & (AMD64_L3_F19H_THREAD_MASK | AMD64_L3_SLICEID_MASK | |
|---|
| 204 | + AMD64_L3_EN_ALL_CORES | AMD64_L3_EN_ALL_SLICES | |
|---|
| 205 | + AMD64_L3_COREID_MASK); |
|---|
| 206 | +} |
|---|
| 207 | + |
|---|
| 186 | 208 | static int amd_uncore_event_init(struct perf_event *event) |
|---|
| 187 | 209 | { |
|---|
| 188 | 210 | struct amd_uncore *uncore; |
|---|
| .. | .. |
|---|
| 199 | 221 | * out. So we do not support sampling and per-thread events via |
|---|
| 200 | 222 | * CAP_NO_INTERRUPT, and we do not enable counter overflow interrupts: |
|---|
| 201 | 223 | */ |
|---|
| 202 | | - |
|---|
| 203 | | - /* NB and Last level cache counters do not have usr/os/guest/host bits */ |
|---|
| 204 | | - if (event->attr.exclude_user || event->attr.exclude_kernel || |
|---|
| 205 | | - event->attr.exclude_host || event->attr.exclude_guest) |
|---|
| 206 | | - return -EINVAL; |
|---|
| 207 | | - |
|---|
| 208 | 224 | hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB; |
|---|
| 209 | 225 | hwc->idx = -1; |
|---|
| 210 | 226 | |
|---|
| .. | .. |
|---|
| 212 | 228 | return -EINVAL; |
|---|
| 213 | 229 | |
|---|
| 214 | 230 | /* |
|---|
| 215 | | - * SliceMask and ThreadMask need to be set for certain L3 events in |
|---|
| 216 | | - * Family 17h. For other events, the two fields do not affect the count. |
|---|
| 231 | + * SliceMask and ThreadMask need to be set for certain L3 events. |
|---|
| 232 | + * For other events, the two fields do not affect the count. |
|---|
| 217 | 233 | */ |
|---|
| 218 | | - if (l3_mask && is_llc_event(event)) { |
|---|
| 219 | | - int thread = 2 * (cpu_data(event->cpu).cpu_core_id % 4); |
|---|
| 220 | | - |
|---|
| 221 | | - if (smp_num_siblings > 1) |
|---|
| 222 | | - thread += cpu_data(event->cpu).apicid & 1; |
|---|
| 223 | | - |
|---|
| 224 | | - hwc->config |= (1ULL << (AMD64_L3_THREAD_SHIFT + thread) & |
|---|
| 225 | | - AMD64_L3_THREAD_MASK) | AMD64_L3_SLICE_MASK; |
|---|
| 226 | | - } |
|---|
| 234 | + if (l3_mask && is_llc_event(event)) |
|---|
| 235 | + hwc->config |= l3_thread_slice_mask(event->attr.config); |
|---|
| 227 | 236 | |
|---|
| 228 | 237 | uncore = event_to_amd_uncore(event); |
|---|
| 229 | 238 | if (!uncore) |
|---|
| .. | .. |
|---|
| 265 | 274 | .attrs = amd_uncore_attrs, |
|---|
| 266 | 275 | }; |
|---|
| 267 | 276 | |
|---|
| 268 | | -/* |
|---|
| 269 | | - * Similar to PMU_FORMAT_ATTR but allowing for format_attr to be assigned based |
|---|
| 270 | | - * on family |
|---|
| 271 | | - */ |
|---|
| 272 | | -#define AMD_FORMAT_ATTR(_dev, _name, _format) \ |
|---|
| 273 | | -static ssize_t \ |
|---|
| 274 | | -_dev##_show##_name(struct device *dev, \ |
|---|
| 275 | | - struct device_attribute *attr, \ |
|---|
| 276 | | - char *page) \ |
|---|
| 277 | | -{ \ |
|---|
| 278 | | - BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ |
|---|
| 279 | | - return sprintf(page, _format "\n"); \ |
|---|
| 280 | | -} \ |
|---|
| 281 | | -static struct device_attribute format_attr_##_dev##_name = __ATTR_RO(_dev); |
|---|
| 277 | +#define DEFINE_UNCORE_FORMAT_ATTR(_var, _name, _format) \ |
|---|
| 278 | +static ssize_t __uncore_##_var##_show(struct device *dev, \ |
|---|
| 279 | + struct device_attribute *attr, \ |
|---|
| 280 | + char *page) \ |
|---|
| 281 | +{ \ |
|---|
| 282 | + BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ |
|---|
| 283 | + return sprintf(page, _format "\n"); \ |
|---|
| 284 | +} \ |
|---|
| 285 | +static struct device_attribute format_attr_##_var = \ |
|---|
| 286 | + __ATTR(_name, 0444, __uncore_##_var##_show, NULL) |
|---|
| 282 | 287 | |
|---|
| 283 | | -/* Used for each uncore counter type */ |
|---|
| 284 | | -#define AMD_ATTRIBUTE(_name) \ |
|---|
| 285 | | -static struct attribute *amd_uncore_format_attr_##_name[] = { \ |
|---|
| 286 | | - &format_attr_event_##_name.attr, \ |
|---|
| 287 | | - &format_attr_umask.attr, \ |
|---|
| 288 | | - NULL, \ |
|---|
| 289 | | -}; \ |
|---|
| 290 | | -static struct attribute_group amd_uncore_format_group_##_name = { \ |
|---|
| 291 | | - .name = "format", \ |
|---|
| 292 | | - .attrs = amd_uncore_format_attr_##_name, \ |
|---|
| 293 | | -}; \ |
|---|
| 294 | | -static const struct attribute_group *amd_uncore_attr_groups_##_name[] = { \ |
|---|
| 295 | | - &amd_uncore_attr_group, \ |
|---|
| 296 | | - &amd_uncore_format_group_##_name, \ |
|---|
| 297 | | - NULL, \ |
|---|
| 288 | +DEFINE_UNCORE_FORMAT_ATTR(event12, event, "config:0-7,32-35"); |
|---|
| 289 | +DEFINE_UNCORE_FORMAT_ATTR(event14, event, "config:0-7,32-35,59-60"); /* F17h+ DF */ |
|---|
| 290 | +DEFINE_UNCORE_FORMAT_ATTR(event8, event, "config:0-7"); /* F17h+ L3 */ |
|---|
| 291 | +DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15"); |
|---|
| 292 | +DEFINE_UNCORE_FORMAT_ATTR(coreid, coreid, "config:42-44"); /* F19h L3 */ |
|---|
| 293 | +DEFINE_UNCORE_FORMAT_ATTR(slicemask, slicemask, "config:48-51"); /* F17h L3 */ |
|---|
| 294 | +DEFINE_UNCORE_FORMAT_ATTR(threadmask8, threadmask, "config:56-63"); /* F17h L3 */ |
|---|
| 295 | +DEFINE_UNCORE_FORMAT_ATTR(threadmask2, threadmask, "config:56-57"); /* F19h L3 */ |
|---|
| 296 | +DEFINE_UNCORE_FORMAT_ATTR(enallslices, enallslices, "config:46"); /* F19h L3 */ |
|---|
| 297 | +DEFINE_UNCORE_FORMAT_ATTR(enallcores, enallcores, "config:47"); /* F19h L3 */ |
|---|
| 298 | +DEFINE_UNCORE_FORMAT_ATTR(sliceid, sliceid, "config:48-50"); /* F19h L3 */ |
|---|
| 299 | + |
|---|
| 300 | +static struct attribute *amd_uncore_df_format_attr[] = { |
|---|
| 301 | + &format_attr_event12.attr, /* event14 if F17h+ */ |
|---|
| 302 | + &format_attr_umask.attr, |
|---|
| 303 | + NULL, |
|---|
| 298 | 304 | }; |
|---|
| 299 | 305 | |
|---|
| 300 | | -AMD_FORMAT_ATTR(event, , "config:0-7,32-35"); |
|---|
| 301 | | -AMD_FORMAT_ATTR(umask, , "config:8-15"); |
|---|
| 302 | | -AMD_FORMAT_ATTR(event, _df, "config:0-7,32-35,59-60"); |
|---|
| 303 | | -AMD_FORMAT_ATTR(event, _l3, "config:0-7"); |
|---|
| 304 | | -AMD_ATTRIBUTE(df); |
|---|
| 305 | | -AMD_ATTRIBUTE(l3); |
|---|
| 306 | +static struct attribute *amd_uncore_l3_format_attr[] = { |
|---|
| 307 | + &format_attr_event12.attr, /* event8 if F17h+ */ |
|---|
| 308 | + &format_attr_umask.attr, |
|---|
| 309 | + NULL, /* slicemask if F17h, coreid if F19h */ |
|---|
| 310 | + NULL, /* threadmask8 if F17h, enallslices if F19h */ |
|---|
| 311 | + NULL, /* enallcores if F19h */ |
|---|
| 312 | + NULL, /* sliceid if F19h */ |
|---|
| 313 | + NULL, /* threadmask2 if F19h */ |
|---|
| 314 | + NULL, |
|---|
| 315 | +}; |
|---|
| 316 | + |
|---|
| 317 | +static struct attribute_group amd_uncore_df_format_group = { |
|---|
| 318 | + .name = "format", |
|---|
| 319 | + .attrs = amd_uncore_df_format_attr, |
|---|
| 320 | +}; |
|---|
| 321 | + |
|---|
| 322 | +static struct attribute_group amd_uncore_l3_format_group = { |
|---|
| 323 | + .name = "format", |
|---|
| 324 | + .attrs = amd_uncore_l3_format_attr, |
|---|
| 325 | +}; |
|---|
| 326 | + |
|---|
| 327 | +static const struct attribute_group *amd_uncore_df_attr_groups[] = { |
|---|
| 328 | + &amd_uncore_attr_group, |
|---|
| 329 | + &amd_uncore_df_format_group, |
|---|
| 330 | + NULL, |
|---|
| 331 | +}; |
|---|
| 332 | + |
|---|
| 333 | +static const struct attribute_group *amd_uncore_l3_attr_groups[] = { |
|---|
| 334 | + &amd_uncore_attr_group, |
|---|
| 335 | + &amd_uncore_l3_format_group, |
|---|
| 336 | + NULL, |
|---|
| 337 | +}; |
|---|
| 306 | 338 | |
|---|
| 307 | 339 | static struct pmu amd_nb_pmu = { |
|---|
| 308 | 340 | .task_ctx_nr = perf_invalid_context, |
|---|
| 341 | + .attr_groups = amd_uncore_df_attr_groups, |
|---|
| 342 | + .name = "amd_nb", |
|---|
| 309 | 343 | .event_init = amd_uncore_event_init, |
|---|
| 310 | 344 | .add = amd_uncore_add, |
|---|
| 311 | 345 | .del = amd_uncore_del, |
|---|
| 312 | 346 | .start = amd_uncore_start, |
|---|
| 313 | 347 | .stop = amd_uncore_stop, |
|---|
| 314 | 348 | .read = amd_uncore_read, |
|---|
| 315 | | - .capabilities = PERF_PMU_CAP_NO_INTERRUPT, |
|---|
| 349 | + .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT, |
|---|
| 316 | 350 | }; |
|---|
| 317 | 351 | |
|---|
| 318 | 352 | static struct pmu amd_llc_pmu = { |
|---|
| 319 | 353 | .task_ctx_nr = perf_invalid_context, |
|---|
| 354 | + .attr_groups = amd_uncore_l3_attr_groups, |
|---|
| 355 | + .name = "amd_l2", |
|---|
| 320 | 356 | .event_init = amd_uncore_event_init, |
|---|
| 321 | 357 | .add = amd_uncore_add, |
|---|
| 322 | 358 | .del = amd_uncore_del, |
|---|
| 323 | 359 | .start = amd_uncore_start, |
|---|
| 324 | 360 | .stop = amd_uncore_stop, |
|---|
| 325 | 361 | .read = amd_uncore_read, |
|---|
| 326 | | - .capabilities = PERF_PMU_CAP_NO_INTERRUPT, |
|---|
| 362 | + .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT, |
|---|
| 327 | 363 | }; |
|---|
| 328 | 364 | |
|---|
| 329 | 365 | static struct amd_uncore *amd_uncore_alloc(unsigned int cpu) |
|---|
| .. | .. |
|---|
| 520 | 556 | |
|---|
| 521 | 557 | static int __init amd_uncore_init(void) |
|---|
| 522 | 558 | { |
|---|
| 559 | + struct attribute **df_attr = amd_uncore_df_format_attr; |
|---|
| 560 | + struct attribute **l3_attr = amd_uncore_l3_format_attr; |
|---|
| 523 | 561 | int ret = -ENODEV; |
|---|
| 524 | 562 | |
|---|
| 525 | | - if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) |
|---|
| 563 | + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && |
|---|
| 564 | + boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) |
|---|
| 526 | 565 | return -ENODEV; |
|---|
| 527 | 566 | |
|---|
| 528 | 567 | if (!boot_cpu_has(X86_FEATURE_TOPOEXT)) |
|---|
| 529 | 568 | return -ENODEV; |
|---|
| 530 | 569 | |
|---|
| 531 | | - if (boot_cpu_data.x86 == 0x17) { |
|---|
| 570 | + num_counters_nb = NUM_COUNTERS_NB; |
|---|
| 571 | + num_counters_llc = NUM_COUNTERS_L2; |
|---|
| 572 | + if (boot_cpu_data.x86 >= 0x17) { |
|---|
| 532 | 573 | /* |
|---|
| 533 | | - * For F17h, the Northbridge counters are repurposed as Data |
|---|
| 534 | | - * Fabric counters. Also, L3 counters are supported too. The PMUs |
|---|
| 535 | | - * are exported based on family as either L2 or L3 and NB or DF. |
|---|
| 574 | + * For F17h and above, the Northbridge counters are |
|---|
| 575 | + * repurposed as Data Fabric counters. Also, L3 |
|---|
| 576 | + * counters are supported too. The PMUs are exported |
|---|
| 577 | + * based on family as either L2 or L3 and NB or DF. |
|---|
| 536 | 578 | */ |
|---|
| 537 | | - num_counters_nb = NUM_COUNTERS_NB; |
|---|
| 538 | 579 | num_counters_llc = NUM_COUNTERS_L3; |
|---|
| 539 | 580 | amd_nb_pmu.name = "amd_df"; |
|---|
| 540 | 581 | amd_llc_pmu.name = "amd_l3"; |
|---|
| 541 | | - format_attr_event_df.show = &event_show_df; |
|---|
| 542 | | - format_attr_event_l3.show = &event_show_l3; |
|---|
| 543 | 582 | l3_mask = true; |
|---|
| 544 | | - } else { |
|---|
| 545 | | - num_counters_nb = NUM_COUNTERS_NB; |
|---|
| 546 | | - num_counters_llc = NUM_COUNTERS_L2; |
|---|
| 547 | | - amd_nb_pmu.name = "amd_nb"; |
|---|
| 548 | | - amd_llc_pmu.name = "amd_l2"; |
|---|
| 549 | | - format_attr_event_df = format_attr_event; |
|---|
| 550 | | - format_attr_event_l3 = format_attr_event; |
|---|
| 551 | | - l3_mask = false; |
|---|
| 552 | 583 | } |
|---|
| 553 | 584 | |
|---|
| 554 | | - amd_nb_pmu.attr_groups = amd_uncore_attr_groups_df; |
|---|
| 555 | | - amd_llc_pmu.attr_groups = amd_uncore_attr_groups_l3; |
|---|
| 556 | | - |
|---|
| 557 | 585 | if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) { |
|---|
| 586 | + if (boot_cpu_data.x86 >= 0x17) |
|---|
| 587 | + *df_attr = &format_attr_event14.attr; |
|---|
| 588 | + |
|---|
| 558 | 589 | amd_uncore_nb = alloc_percpu(struct amd_uncore *); |
|---|
| 559 | 590 | if (!amd_uncore_nb) { |
|---|
| 560 | 591 | ret = -ENOMEM; |
|---|
| .. | .. |
|---|
| 564 | 595 | if (ret) |
|---|
| 565 | 596 | goto fail_nb; |
|---|
| 566 | 597 | |
|---|
| 567 | | - pr_info("AMD NB counters detected\n"); |
|---|
| 598 | + pr_info("%d %s %s counters detected\n", num_counters_nb, |
|---|
| 599 | + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON" : "", |
|---|
| 600 | + amd_nb_pmu.name); |
|---|
| 601 | + |
|---|
| 568 | 602 | ret = 0; |
|---|
| 569 | 603 | } |
|---|
| 570 | 604 | |
|---|
| 571 | 605 | if (boot_cpu_has(X86_FEATURE_PERFCTR_LLC)) { |
|---|
| 606 | + if (boot_cpu_data.x86 >= 0x19) { |
|---|
| 607 | + *l3_attr++ = &format_attr_event8.attr; |
|---|
| 608 | + *l3_attr++ = &format_attr_umask.attr; |
|---|
| 609 | + *l3_attr++ = &format_attr_coreid.attr; |
|---|
| 610 | + *l3_attr++ = &format_attr_enallslices.attr; |
|---|
| 611 | + *l3_attr++ = &format_attr_enallcores.attr; |
|---|
| 612 | + *l3_attr++ = &format_attr_sliceid.attr; |
|---|
| 613 | + *l3_attr++ = &format_attr_threadmask2.attr; |
|---|
| 614 | + } else if (boot_cpu_data.x86 >= 0x17) { |
|---|
| 615 | + *l3_attr++ = &format_attr_event8.attr; |
|---|
| 616 | + *l3_attr++ = &format_attr_umask.attr; |
|---|
| 617 | + *l3_attr++ = &format_attr_slicemask.attr; |
|---|
| 618 | + *l3_attr++ = &format_attr_threadmask8.attr; |
|---|
| 619 | + } |
|---|
| 620 | + |
|---|
| 572 | 621 | amd_uncore_llc = alloc_percpu(struct amd_uncore *); |
|---|
| 573 | 622 | if (!amd_uncore_llc) { |
|---|
| 574 | 623 | ret = -ENOMEM; |
|---|
| .. | .. |
|---|
| 578 | 627 | if (ret) |
|---|
| 579 | 628 | goto fail_llc; |
|---|
| 580 | 629 | |
|---|
| 581 | | - pr_info("AMD LLC counters detected\n"); |
|---|
| 630 | + pr_info("%d %s %s counters detected\n", num_counters_llc, |
|---|
| 631 | + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON" : "", |
|---|
| 632 | + amd_llc_pmu.name); |
|---|
| 582 | 633 | ret = 0; |
|---|
| 583 | 634 | } |
|---|
| 584 | 635 | |
|---|