.. | .. |
---|
202 | 202 | } |
---|
203 | 203 | |
---|
204 | 204 | /* |
---|
205 | | - * Use IBS for precise event sampling: |
---|
| 205 | + * core pmu config -> IBS config |
---|
206 | 206 | * |
---|
207 | 207 | * perf record -a -e cpu-cycles:p ... # use ibs op counting cycle count |
---|
208 | 208 | * perf record -a -e r076:p ... # same as -e cpu-cycles:p |
---|
.. | .. |
---|
211 | 211 | * IbsOpCntCtl (bit 19) of IBS Execution Control Register (IbsOpCtl, |
---|
212 | 212 | * MSRC001_1033) is used to select either cycle or micro-ops counting |
---|
213 | 213 | * mode. |
---|
214 | | - * |
---|
215 | | - * The rip of IBS samples has skid 0. Thus, IBS supports precise |
---|
216 | | - * levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the |
---|
217 | | - * rip is invalid when IBS was not able to record the rip correctly. |
---|
218 | | - * We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then. |
---|
219 | | - * |
---|
220 | 214 | */ |
---|
221 | | -static int perf_ibs_precise_event(struct perf_event *event, u64 *config) |
---|
| 215 | +static int core_pmu_ibs_config(struct perf_event *event, u64 *config) |
---|
222 | 216 | { |
---|
223 | | - switch (event->attr.precise_ip) { |
---|
224 | | - case 0: |
---|
225 | | - return -ENOENT; |
---|
226 | | - case 1: |
---|
227 | | - case 2: |
---|
228 | | - break; |
---|
229 | | - default: |
---|
230 | | - return -EOPNOTSUPP; |
---|
231 | | - } |
---|
232 | | - |
---|
233 | 217 | switch (event->attr.type) { |
---|
234 | 218 | case PERF_TYPE_HARDWARE: |
---|
235 | 219 | switch (event->attr.config) { |
---|
.. | .. |
---|
255 | 239 | return -EOPNOTSUPP; |
---|
256 | 240 | } |
---|
257 | 241 | |
---|
258 | | -static const struct perf_event_attr ibs_notsupp = { |
---|
259 | | - .exclude_user = 1, |
---|
260 | | - .exclude_kernel = 1, |
---|
261 | | - .exclude_hv = 1, |
---|
262 | | - .exclude_idle = 1, |
---|
263 | | - .exclude_host = 1, |
---|
264 | | - .exclude_guest = 1, |
---|
265 | | -}; |
---|
| 242 | +/* |
---|
| 243 | + * The rip of IBS samples has skid 0. Thus, IBS supports precise |
---|
| 244 | + * levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the |
---|
| 245 | + * rip is invalid when IBS was not able to record the rip correctly. |
---|
| 246 | + * We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then. |
---|
| 247 | + */ |
---|
| 248 | +int forward_event_to_ibs(struct perf_event *event) |
---|
| 249 | +{ |
---|
| 250 | + u64 config = 0; |
---|
| 251 | + |
---|
| 252 | + if (!event->attr.precise_ip || event->attr.precise_ip > 2) |
---|
| 253 | + return -EOPNOTSUPP; |
---|
| 254 | + |
---|
| 255 | + if (!core_pmu_ibs_config(event, &config)) { |
---|
| 256 | + event->attr.type = perf_ibs_op.pmu.type; |
---|
| 257 | + event->attr.config = config; |
---|
| 258 | + } |
---|
| 259 | + return -ENOENT; |
---|
| 260 | +} |
---|
266 | 261 | |
---|
267 | 262 | static int perf_ibs_init(struct perf_event *event) |
---|
268 | 263 | { |
---|
269 | 264 | struct hw_perf_event *hwc = &event->hw; |
---|
270 | 265 | struct perf_ibs *perf_ibs; |
---|
271 | 266 | u64 max_cnt, config; |
---|
272 | | - int ret; |
---|
273 | 267 | |
---|
274 | 268 | perf_ibs = get_ibs_pmu(event->attr.type); |
---|
275 | | - if (perf_ibs) { |
---|
276 | | - config = event->attr.config; |
---|
277 | | - } else { |
---|
278 | | - perf_ibs = &perf_ibs_op; |
---|
279 | | - ret = perf_ibs_precise_event(event, &config); |
---|
280 | | - if (ret) |
---|
281 | | - return ret; |
---|
282 | | - } |
---|
| 269 | + if (!perf_ibs) |
---|
| 270 | + return -ENOENT; |
---|
| 271 | + |
---|
| 272 | + config = event->attr.config; |
---|
283 | 273 | |
---|
284 | 274 | if (event->pmu != &perf_ibs->pmu) |
---|
285 | 275 | return -ENOENT; |
---|
286 | | - |
---|
287 | | - if (perf_flags(&event->attr) & perf_flags(&ibs_notsupp)) |
---|
288 | | - return -EINVAL; |
---|
289 | 276 | |
---|
290 | 277 | if (config & ~perf_ibs->config_mask) |
---|
291 | 278 | return -EINVAL; |
---|
.. | .. |
---|
324 | 311 | hwc->config_base = perf_ibs->msr; |
---|
325 | 312 | hwc->config = config; |
---|
326 | 313 | |
---|
| 314 | + /* |
---|
| 315 | + * rip recorded by IbsOpRip will not be consistent with rsp and rbp |
---|
| 316 | + * recorded as part of interrupt regs. Thus we need to use rip from |
---|
| 317 | + * interrupt regs while unwinding call stack. Setting _EARLY flag |
---|
| 318 | + * makes sure we unwind call-stack before perf sample rip is set to |
---|
| 319 | + * IbsOpRip. |
---|
| 320 | + */ |
---|
| 321 | + if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) |
---|
| 322 | + event->attr.sample_type |= __PERF_SAMPLE_CALLCHAIN_EARLY; |
---|
| 323 | + |
---|
327 | 324 | return 0; |
---|
328 | 325 | } |
---|
329 | 326 | |
---|
.. | .. |
---|
353 | 350 | * and the lower 7 bits of CurCnt are randomized. |
---|
354 | 351 | * Otherwise CurCnt has the full 27-bit current counter value. |
---|
355 | 352 | */ |
---|
356 | | - if (config & IBS_OP_VAL) |
---|
| 353 | + if (config & IBS_OP_VAL) { |
---|
357 | 354 | count = (config & IBS_OP_MAX_CNT) << 4; |
---|
358 | | - else if (ibs_caps & IBS_CAPS_RDWROPCNT) |
---|
| 355 | + if (ibs_caps & IBS_CAPS_OPCNTEXT) |
---|
| 356 | + count += config & IBS_OP_MAX_CNT_EXT_MASK; |
---|
| 357 | + } else if (ibs_caps & IBS_CAPS_RDWROPCNT) { |
---|
359 | 358 | count = (config & IBS_OP_CUR_CNT) >> 32; |
---|
| 359 | + } |
---|
360 | 360 | |
---|
361 | 361 | return count; |
---|
362 | 362 | } |
---|
.. | .. |
---|
417 | 417 | struct hw_perf_event *hwc = &event->hw; |
---|
418 | 418 | struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); |
---|
419 | 419 | struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); |
---|
420 | | - u64 period; |
---|
| 420 | + u64 period, config = 0; |
---|
421 | 421 | |
---|
422 | 422 | if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) |
---|
423 | 423 | return; |
---|
.. | .. |
---|
426 | 426 | hwc->state = 0; |
---|
427 | 427 | |
---|
428 | 428 | perf_ibs_set_period(perf_ibs, hwc, &period); |
---|
| 429 | + if (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_OPCNTEXT)) { |
---|
| 430 | + config |= period & IBS_OP_MAX_CNT_EXT_MASK; |
---|
| 431 | + period &= ~IBS_OP_MAX_CNT_EXT_MASK; |
---|
| 432 | + } |
---|
| 433 | + config |= period >> 4; |
---|
| 434 | + |
---|
429 | 435 | /* |
---|
430 | 436 | * Set STARTED before enabling the hardware, such that a subsequent NMI |
---|
431 | 437 | * must observe it. |
---|
432 | 438 | */ |
---|
433 | 439 | set_bit(IBS_STARTED, pcpu->state); |
---|
434 | 440 | clear_bit(IBS_STOPPING, pcpu->state); |
---|
435 | | - perf_ibs_enable_event(perf_ibs, hwc, period >> 4); |
---|
| 441 | + perf_ibs_enable_event(perf_ibs, hwc, config); |
---|
436 | 442 | |
---|
437 | 443 | perf_event_update_userpage(event); |
---|
438 | 444 | } |
---|
.. | .. |
---|
549 | 555 | .start = perf_ibs_start, |
---|
550 | 556 | .stop = perf_ibs_stop, |
---|
551 | 557 | .read = perf_ibs_read, |
---|
| 558 | + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, |
---|
552 | 559 | }, |
---|
553 | 560 | .msr = MSR_AMD64_IBSFETCHCTL, |
---|
554 | 561 | .config_mask = IBS_FETCH_CONFIG_MASK, |
---|
.. | .. |
---|
573 | 580 | .start = perf_ibs_start, |
---|
574 | 581 | .stop = perf_ibs_stop, |
---|
575 | 582 | .read = perf_ibs_read, |
---|
| 583 | + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, |
---|
576 | 584 | }, |
---|
577 | 585 | .msr = MSR_AMD64_IBSOPCTL, |
---|
578 | 586 | .config_mask = IBS_OP_CONFIG_MASK, |
---|
.. | .. |
---|
599 | 607 | struct perf_ibs_data ibs_data; |
---|
600 | 608 | int offset, size, check_rip, offset_max, throttle = 0; |
---|
601 | 609 | unsigned int msr; |
---|
602 | | - u64 *buf, *config, period; |
---|
| 610 | + u64 *buf, *config, period, new_config = 0; |
---|
603 | 611 | |
---|
604 | 612 | if (!test_bit(IBS_STARTED, pcpu->state)) { |
---|
605 | 613 | fail: |
---|
.. | .. |
---|
693 | 701 | data.raw = &raw; |
---|
694 | 702 | } |
---|
695 | 703 | |
---|
| 704 | + /* |
---|
| 705 | + * rip recorded by IbsOpRip will not be consistent with rsp and rbp |
---|
| 706 | + * recorded as part of interrupt regs. Thus we need to use rip from |
---|
| 707 | + * interrupt regs while unwinding call stack. |
---|
| 708 | + */ |
---|
| 709 | + if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) |
---|
| 710 | + data.callchain = perf_callchain(event, iregs); |
---|
| 711 | + |
---|
696 | 712 | throttle = perf_event_overflow(event, &data, ®s); |
---|
697 | 713 | out: |
---|
698 | 714 | if (throttle) { |
---|
699 | 715 | perf_ibs_stop(event, 0); |
---|
700 | 716 | } else { |
---|
701 | | - period >>= 4; |
---|
| 717 | + if (perf_ibs == &perf_ibs_op) { |
---|
| 718 | + if (ibs_caps & IBS_CAPS_OPCNTEXT) { |
---|
| 719 | + new_config = period & IBS_OP_MAX_CNT_EXT_MASK; |
---|
| 720 | + period &= ~IBS_OP_MAX_CNT_EXT_MASK; |
---|
| 721 | + } |
---|
| 722 | + if ((ibs_caps & IBS_CAPS_RDWROPCNT) && (*config & IBS_OP_CNT_CTL)) |
---|
| 723 | + new_config |= *config & IBS_OP_CUR_CNT_RAND; |
---|
| 724 | + } |
---|
| 725 | + new_config |= period >> 4; |
---|
702 | 726 | |
---|
703 | | - if ((ibs_caps & IBS_CAPS_RDWROPCNT) && |
---|
704 | | - (*config & IBS_OP_CNT_CTL)) |
---|
705 | | - period |= *config & IBS_OP_CUR_CNT_RAND; |
---|
706 | | - |
---|
707 | | - perf_ibs_enable_event(perf_ibs, hwc, period); |
---|
| 727 | + perf_ibs_enable_event(perf_ibs, hwc, new_config); |
---|
708 | 728 | } |
---|
709 | 729 | |
---|
710 | 730 | perf_event_update_userpage(event); |
---|
.. | .. |
---|
761 | 781 | return ret; |
---|
762 | 782 | } |
---|
763 | 783 | |
---|
764 | | -static __init void perf_event_ibs_init(void) |
---|
| 784 | +static __init int perf_event_ibs_init(void) |
---|
765 | 785 | { |
---|
766 | 786 | struct attribute **attr = ibs_op_format_attrs; |
---|
| 787 | + int ret; |
---|
767 | 788 | |
---|
768 | 789 | /* |
---|
769 | 790 | * Some chips fail to reset the fetch count when it is written; instead |
---|
.. | .. |
---|
775 | 796 | if (boot_cpu_data.x86 == 0x19 && boot_cpu_data.x86_model < 0x10) |
---|
776 | 797 | perf_ibs_fetch.fetch_ignore_if_zero_rip = 1; |
---|
777 | 798 | |
---|
778 | | - perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch"); |
---|
| 799 | + ret = perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch"); |
---|
| 800 | + if (ret) |
---|
| 801 | + return ret; |
---|
779 | 802 | |
---|
780 | 803 | if (ibs_caps & IBS_CAPS_OPCNT) { |
---|
781 | 804 | perf_ibs_op.config_mask |= IBS_OP_CNT_CTL; |
---|
782 | 805 | *attr++ = &format_attr_cnt_ctl.attr; |
---|
783 | 806 | } |
---|
784 | | - perf_ibs_pmu_init(&perf_ibs_op, "ibs_op"); |
---|
785 | 807 | |
---|
786 | | - register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs"); |
---|
| 808 | + if (ibs_caps & IBS_CAPS_OPCNTEXT) { |
---|
| 809 | + perf_ibs_op.max_period |= IBS_OP_MAX_CNT_EXT_MASK; |
---|
| 810 | + perf_ibs_op.config_mask |= IBS_OP_MAX_CNT_EXT_MASK; |
---|
| 811 | + perf_ibs_op.cnt_mask |= IBS_OP_MAX_CNT_EXT_MASK; |
---|
| 812 | + } |
---|
| 813 | + |
---|
| 814 | + ret = perf_ibs_pmu_init(&perf_ibs_op, "ibs_op"); |
---|
| 815 | + if (ret) |
---|
| 816 | + goto err_op; |
---|
| 817 | + |
---|
| 818 | + ret = register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs"); |
---|
| 819 | + if (ret) |
---|
| 820 | + goto err_nmi; |
---|
| 821 | + |
---|
787 | 822 | pr_info("perf: AMD IBS detected (0x%08x)\n", ibs_caps); |
---|
| 823 | + return 0; |
---|
| 824 | + |
---|
| 825 | +err_nmi: |
---|
| 826 | + perf_pmu_unregister(&perf_ibs_op.pmu); |
---|
| 827 | + free_percpu(perf_ibs_op.pcpu); |
---|
| 828 | + perf_ibs_op.pcpu = NULL; |
---|
| 829 | +err_op: |
---|
| 830 | + perf_pmu_unregister(&perf_ibs_fetch.pmu); |
---|
| 831 | + free_percpu(perf_ibs_fetch.pcpu); |
---|
| 832 | + perf_ibs_fetch.pcpu = NULL; |
---|
| 833 | + |
---|
| 834 | + return ret; |
---|
788 | 835 | } |
---|
789 | 836 | |
---|
790 | 837 | #else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */ |
---|
791 | 838 | |
---|
792 | | -static __init void perf_event_ibs_init(void) { } |
---|
| 839 | +static __init int perf_event_ibs_init(void) |
---|
| 840 | +{ |
---|
| 841 | + return 0; |
---|
| 842 | +} |
---|
793 | 843 | |
---|
794 | 844 | #endif |
---|
795 | 845 | |
---|
.. | .. |
---|
1059 | 1109 | x86_pmu_amd_ibs_starting_cpu, |
---|
1060 | 1110 | x86_pmu_amd_ibs_dying_cpu); |
---|
1061 | 1111 | |
---|
1062 | | - perf_event_ibs_init(); |
---|
1063 | | - |
---|
1064 | | - return 0; |
---|
| 1112 | + return perf_event_ibs_init(); |
---|
1065 | 1113 | } |
---|
1066 | 1114 | |
---|
1067 | 1115 | /* Since we need the pci subsystem to init ibs we can't do this earlier: */ |
---|