.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
1 | 2 | /* |
---|
2 | 3 | * Intel(R) Processor Trace PMU driver for perf |
---|
3 | 4 | * Copyright (c) 2013-2014, Intel Corporation. |
---|
4 | | - * |
---|
5 | | - * This program is free software; you can redistribute it and/or modify it |
---|
6 | | - * under the terms and conditions of the GNU General Public License, |
---|
7 | | - * version 2, as published by the Free Software Foundation. |
---|
8 | | - * |
---|
9 | | - * This program is distributed in the hope it will be useful, but WITHOUT |
---|
10 | | - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
---|
11 | | - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
---|
12 | | - * more details. |
---|
13 | 5 | * |
---|
14 | 6 | * Intel PT is specified in the Intel Architecture Instruction Set Extensions |
---|
15 | 7 | * Programming Reference: |
---|
.. | .. |
---|
21 | 13 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
---|
22 | 14 | |
---|
23 | 15 | #include <linux/types.h> |
---|
| 16 | +#include <linux/bits.h> |
---|
| 17 | +#include <linux/limits.h> |
---|
24 | 18 | #include <linux/slab.h> |
---|
25 | 19 | #include <linux/device.h> |
---|
26 | 20 | |
---|
.. | .. |
---|
68 | 62 | PT_CAP(topa_output, 0, CPUID_ECX, BIT(0)), |
---|
69 | 63 | PT_CAP(topa_multiple_entries, 0, CPUID_ECX, BIT(1)), |
---|
70 | 64 | PT_CAP(single_range_output, 0, CPUID_ECX, BIT(2)), |
---|
| 65 | + PT_CAP(output_subsys, 0, CPUID_ECX, BIT(3)), |
---|
71 | 66 | PT_CAP(payloads_lip, 0, CPUID_ECX, BIT(31)), |
---|
72 | 67 | PT_CAP(num_address_ranges, 1, CPUID_EAX, 0x7), |
---|
73 | 68 | PT_CAP(mtc_periods, 1, CPUID_EAX, 0xffff0000), |
---|
.. | .. |
---|
75 | 70 | PT_CAP(psb_periods, 1, CPUID_EBX, 0xffff0000), |
---|
76 | 71 | }; |
---|
77 | 72 | |
---|
78 | | -static u32 pt_cap_get(enum pt_capabilities cap) |
---|
| 73 | +u32 intel_pt_validate_cap(u32 *caps, enum pt_capabilities capability) |
---|
79 | 74 | { |
---|
80 | | - struct pt_cap_desc *cd = &pt_caps[cap]; |
---|
81 | | - u32 c = pt_pmu.caps[cd->leaf * PT_CPUID_REGS_NUM + cd->reg]; |
---|
| 75 | + struct pt_cap_desc *cd = &pt_caps[capability]; |
---|
| 76 | + u32 c = caps[cd->leaf * PT_CPUID_REGS_NUM + cd->reg]; |
---|
82 | 77 | unsigned int shift = __ffs(cd->mask); |
---|
83 | 78 | |
---|
84 | 79 | return (c & cd->mask) >> shift; |
---|
85 | 80 | } |
---|
| 81 | +EXPORT_SYMBOL_GPL(intel_pt_validate_cap); |
---|
| 82 | + |
---|
| 83 | +u32 intel_pt_validate_hw_cap(enum pt_capabilities cap) |
---|
| 84 | +{ |
---|
| 85 | + return intel_pt_validate_cap(pt_pmu.caps, cap); |
---|
| 86 | +} |
---|
| 87 | +EXPORT_SYMBOL_GPL(intel_pt_validate_hw_cap); |
---|
86 | 88 | |
---|
87 | 89 | static ssize_t pt_cap_show(struct device *cdev, |
---|
88 | 90 | struct device_attribute *attr, |
---|
.. | .. |
---|
92 | 94 | container_of(attr, struct dev_ext_attribute, attr); |
---|
93 | 95 | enum pt_capabilities cap = (long)ea->var; |
---|
94 | 96 | |
---|
95 | | - return snprintf(buf, PAGE_SIZE, "%x\n", pt_cap_get(cap)); |
---|
| 97 | + return snprintf(buf, PAGE_SIZE, "%x\n", intel_pt_validate_hw_cap(cap)); |
---|
96 | 98 | } |
---|
97 | 99 | |
---|
98 | | -static struct attribute_group pt_cap_group = { |
---|
| 100 | +static struct attribute_group pt_cap_group __ro_after_init = { |
---|
99 | 101 | .name = "caps", |
---|
100 | 102 | }; |
---|
101 | 103 | |
---|
.. | .. |
---|
204 | 206 | |
---|
205 | 207 | /* model-specific quirks */ |
---|
206 | 208 | switch (boot_cpu_data.x86_model) { |
---|
207 | | - case INTEL_FAM6_BROADWELL_CORE: |
---|
208 | | - case INTEL_FAM6_BROADWELL_XEON_D: |
---|
209 | | - case INTEL_FAM6_BROADWELL_GT3E: |
---|
| 209 | + case INTEL_FAM6_BROADWELL: |
---|
| 210 | + case INTEL_FAM6_BROADWELL_D: |
---|
| 211 | + case INTEL_FAM6_BROADWELL_G: |
---|
210 | 212 | case INTEL_FAM6_BROADWELL_X: |
---|
211 | 213 | /* not setting BRANCH_EN will #GP, erratum BDM106 */ |
---|
212 | 214 | pt_pmu.branch_en_always_on = true; |
---|
.. | .. |
---|
225 | 227 | if (reg & BIT(14)) |
---|
226 | 228 | pt_pmu.vmx = true; |
---|
227 | 229 | } |
---|
228 | | - |
---|
229 | | - attrs = NULL; |
---|
230 | 230 | |
---|
231 | 231 | for (i = 0; i < PT_CPUID_LEAVES; i++) { |
---|
232 | 232 | cpuid_count(20, i, |
---|
.. | .. |
---|
310 | 310 | return false; |
---|
311 | 311 | |
---|
312 | 312 | if (config & RTIT_CTL_CYC_PSB) { |
---|
313 | | - if (!pt_cap_get(PT_CAP_psb_cyc)) |
---|
| 313 | + if (!intel_pt_validate_hw_cap(PT_CAP_psb_cyc)) |
---|
314 | 314 | return false; |
---|
315 | 315 | |
---|
316 | | - allowed = pt_cap_get(PT_CAP_psb_periods); |
---|
| 316 | + allowed = intel_pt_validate_hw_cap(PT_CAP_psb_periods); |
---|
317 | 317 | requested = (config & RTIT_CTL_PSB_FREQ) >> |
---|
318 | 318 | RTIT_CTL_PSB_FREQ_OFFSET; |
---|
319 | 319 | if (requested && (!(allowed & BIT(requested)))) |
---|
320 | 320 | return false; |
---|
321 | 321 | |
---|
322 | | - allowed = pt_cap_get(PT_CAP_cycle_thresholds); |
---|
| 322 | + allowed = intel_pt_validate_hw_cap(PT_CAP_cycle_thresholds); |
---|
323 | 323 | requested = (config & RTIT_CTL_CYC_THRESH) >> |
---|
324 | 324 | RTIT_CTL_CYC_THRESH_OFFSET; |
---|
325 | 325 | if (requested && (!(allowed & BIT(requested)))) |
---|
.. | .. |
---|
334 | 334 | * Spec says that setting mtc period bits while mtc bit in |
---|
335 | 335 | * CPUID is 0 will #GP, so better safe than sorry. |
---|
336 | 336 | */ |
---|
337 | | - if (!pt_cap_get(PT_CAP_mtc)) |
---|
| 337 | + if (!intel_pt_validate_hw_cap(PT_CAP_mtc)) |
---|
338 | 338 | return false; |
---|
339 | 339 | |
---|
340 | | - allowed = pt_cap_get(PT_CAP_mtc_periods); |
---|
| 340 | + allowed = intel_pt_validate_hw_cap(PT_CAP_mtc_periods); |
---|
341 | 341 | if (!allowed) |
---|
342 | 342 | return false; |
---|
343 | 343 | |
---|
.. | .. |
---|
349 | 349 | } |
---|
350 | 350 | |
---|
351 | 351 | if (config & RTIT_CTL_PWR_EVT_EN && |
---|
352 | | - !pt_cap_get(PT_CAP_power_event_trace)) |
---|
| 352 | + !intel_pt_validate_hw_cap(PT_CAP_power_event_trace)) |
---|
353 | 353 | return false; |
---|
354 | 354 | |
---|
355 | 355 | if (config & RTIT_CTL_PTW) { |
---|
356 | | - if (!pt_cap_get(PT_CAP_ptwrite)) |
---|
| 356 | + if (!intel_pt_validate_hw_cap(PT_CAP_ptwrite)) |
---|
357 | 357 | return false; |
---|
358 | 358 | |
---|
359 | 359 | /* FUPonPTW without PTW doesn't make sense */ |
---|
.. | .. |
---|
396 | 396 | * PT configuration helpers |
---|
397 | 397 | * These all are cpu affine and operate on a local PT |
---|
398 | 398 | */ |
---|
| 399 | + |
---|
| 400 | +static void pt_config_start(struct perf_event *event) |
---|
| 401 | +{ |
---|
| 402 | + struct pt *pt = this_cpu_ptr(&pt_ctx); |
---|
| 403 | + u64 ctl = event->hw.config; |
---|
| 404 | + |
---|
| 405 | + ctl |= RTIT_CTL_TRACEEN; |
---|
| 406 | + if (READ_ONCE(pt->vmx_on)) |
---|
| 407 | + perf_aux_output_flag(&pt->handle, PERF_AUX_FLAG_PARTIAL); |
---|
| 408 | + else |
---|
| 409 | + wrmsrl(MSR_IA32_RTIT_CTL, ctl); |
---|
| 410 | + |
---|
| 411 | + WRITE_ONCE(event->hw.config, ctl); |
---|
| 412 | +} |
---|
399 | 413 | |
---|
400 | 414 | /* Address ranges and their corresponding msr configuration registers */ |
---|
401 | 415 | static const struct pt_address_range { |
---|
.. | .. |
---|
460 | 474 | pt->filters.filter[range].msr_b = filter->msr_b; |
---|
461 | 475 | } |
---|
462 | 476 | |
---|
463 | | - rtit_ctl |= filter->config << pt_address_ranges[range].reg_off; |
---|
| 477 | + rtit_ctl |= (u64)filter->config << pt_address_ranges[range].reg_off; |
---|
464 | 478 | } |
---|
465 | 479 | |
---|
466 | 480 | return rtit_ctl; |
---|
.. | .. |
---|
469 | 483 | static void pt_config(struct perf_event *event) |
---|
470 | 484 | { |
---|
471 | 485 | struct pt *pt = this_cpu_ptr(&pt_ctx); |
---|
| 486 | + struct pt_buffer *buf = perf_get_aux(&pt->handle); |
---|
472 | 487 | u64 reg; |
---|
473 | 488 | |
---|
474 | 489 | /* First round: clear STATUS, in particular the PSB byte counter. */ |
---|
.. | .. |
---|
478 | 493 | } |
---|
479 | 494 | |
---|
480 | 495 | reg = pt_config_filters(event); |
---|
481 | | - reg |= RTIT_CTL_TOPA | RTIT_CTL_TRACEEN; |
---|
| 496 | + reg |= RTIT_CTL_TRACEEN; |
---|
| 497 | + if (!buf->single) |
---|
| 498 | + reg |= RTIT_CTL_TOPA; |
---|
482 | 499 | |
---|
483 | 500 | /* |
---|
484 | 501 | * Previously, we had BRANCH_EN on by default, but now that PT has |
---|
.. | .. |
---|
501 | 518 | reg |= (event->attr.config & PT_CONFIG_MASK); |
---|
502 | 519 | |
---|
503 | 520 | event->hw.config = reg; |
---|
504 | | - if (READ_ONCE(pt->vmx_on)) |
---|
505 | | - perf_aux_output_flag(&pt->handle, PERF_AUX_FLAG_PARTIAL); |
---|
506 | | - else |
---|
507 | | - wrmsrl(MSR_IA32_RTIT_CTL, reg); |
---|
| 521 | + pt_config_start(event); |
---|
508 | 522 | } |
---|
509 | 523 | |
---|
510 | 524 | static void pt_config_stop(struct perf_event *event) |
---|
.. | .. |
---|
533 | 547 | wmb(); |
---|
534 | 548 | } |
---|
535 | 549 | |
---|
536 | | -static void pt_config_buffer(void *buf, unsigned int topa_idx, |
---|
537 | | - unsigned int output_off) |
---|
538 | | -{ |
---|
539 | | - u64 reg; |
---|
540 | | - |
---|
541 | | - wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, virt_to_phys(buf)); |
---|
542 | | - |
---|
543 | | - reg = 0x7f | ((u64)topa_idx << 7) | ((u64)output_off << 32); |
---|
544 | | - |
---|
545 | | - wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, reg); |
---|
546 | | -} |
---|
| 550 | +/** |
---|
| 551 | + * struct topa - ToPA metadata |
---|
| 552 | + * @list: linkage to struct pt_buffer's list of tables |
---|
| 553 | + * @offset: offset of the first entry in this table in the buffer |
---|
| 554 | + * @size: total size of all entries in this table |
---|
| 555 | + * @last: index of the last initialized entry in this table |
---|
| 556 | + * @z_count: how many times the first entry repeats |
---|
| 557 | + */ |
---|
| 558 | +struct topa { |
---|
| 559 | + struct list_head list; |
---|
| 560 | + u64 offset; |
---|
| 561 | + size_t size; |
---|
| 562 | + int last; |
---|
| 563 | + unsigned int z_count; |
---|
| 564 | +}; |
---|
547 | 565 | |
---|
548 | 566 | /* |
---|
549 | 567 | * Keep ToPA table-related metadata on the same page as the actual table, |
---|
550 | 568 | * taking up a few words from the top |
---|
551 | 569 | */ |
---|
552 | 570 | |
---|
553 | | -#define TENTS_PER_PAGE (((PAGE_SIZE - 40) / sizeof(struct topa_entry)) - 1) |
---|
| 571 | +#define TENTS_PER_PAGE \ |
---|
| 572 | + ((PAGE_SIZE - sizeof(struct topa)) / sizeof(struct topa_entry)) |
---|
554 | 573 | |
---|
555 | 574 | /** |
---|
556 | | - * struct topa - page-sized ToPA table with metadata at the top |
---|
| 575 | + * struct topa_page - page-sized ToPA table with metadata at the top |
---|
557 | 576 | * @table: actual ToPA table entries, as understood by PT hardware |
---|
558 | | - * @list: linkage to struct pt_buffer's list of tables |
---|
559 | | - * @phys: physical address of this page |
---|
560 | | - * @offset: offset of the first entry in this table in the buffer |
---|
561 | | - * @size: total size of all entries in this table |
---|
562 | | - * @last: index of the last initialized entry in this table |
---|
| 577 | + * @topa: metadata |
---|
563 | 578 | */ |
---|
564 | | -struct topa { |
---|
| 579 | +struct topa_page { |
---|
565 | 580 | struct topa_entry table[TENTS_PER_PAGE]; |
---|
566 | | - struct list_head list; |
---|
567 | | - u64 phys; |
---|
568 | | - u64 offset; |
---|
569 | | - size_t size; |
---|
570 | | - int last; |
---|
| 581 | + struct topa topa; |
---|
571 | 582 | }; |
---|
572 | 583 | |
---|
| 584 | +static inline struct topa_page *topa_to_page(struct topa *topa) |
---|
| 585 | +{ |
---|
| 586 | + return container_of(topa, struct topa_page, topa); |
---|
| 587 | +} |
---|
| 588 | + |
---|
| 589 | +static inline struct topa_page *topa_entry_to_page(struct topa_entry *te) |
---|
| 590 | +{ |
---|
| 591 | + return (struct topa_page *)((unsigned long)te & PAGE_MASK); |
---|
| 592 | +} |
---|
| 593 | + |
---|
| 594 | +static inline phys_addr_t topa_pfn(struct topa *topa) |
---|
| 595 | +{ |
---|
| 596 | + return PFN_DOWN(virt_to_phys(topa_to_page(topa))); |
---|
| 597 | +} |
---|
| 598 | + |
---|
573 | 599 | /* make -1 stand for the last table entry */ |
---|
574 | | -#define TOPA_ENTRY(t, i) ((i) == -1 ? &(t)->table[(t)->last] : &(t)->table[(i)]) |
---|
| 600 | +#define TOPA_ENTRY(t, i) \ |
---|
| 601 | + ((i) == -1 \ |
---|
| 602 | + ? &topa_to_page(t)->table[(t)->last] \ |
---|
| 603 | + : &topa_to_page(t)->table[(i)]) |
---|
| 604 | +#define TOPA_ENTRY_SIZE(t, i) (sizes(TOPA_ENTRY((t), (i))->size)) |
---|
| 605 | +#define TOPA_ENTRY_PAGES(t, i) (1 << TOPA_ENTRY((t), (i))->size) |
---|
| 606 | + |
---|
| 607 | +static void pt_config_buffer(struct pt_buffer *buf) |
---|
| 608 | +{ |
---|
| 609 | + struct pt *pt = this_cpu_ptr(&pt_ctx); |
---|
| 610 | + u64 reg, mask; |
---|
| 611 | + void *base; |
---|
| 612 | + |
---|
| 613 | + if (buf->single) { |
---|
| 614 | + base = buf->data_pages[0]; |
---|
| 615 | + mask = (buf->nr_pages * PAGE_SIZE - 1) >> 7; |
---|
| 616 | + } else { |
---|
| 617 | + base = topa_to_page(buf->cur)->table; |
---|
| 618 | + mask = (u64)buf->cur_idx; |
---|
| 619 | + } |
---|
| 620 | + |
---|
| 621 | + reg = virt_to_phys(base); |
---|
| 622 | + if (pt->output_base != reg) { |
---|
| 623 | + pt->output_base = reg; |
---|
| 624 | + wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, reg); |
---|
| 625 | + } |
---|
| 626 | + |
---|
| 627 | + reg = 0x7f | (mask << 7) | ((u64)buf->output_off << 32); |
---|
| 628 | + if (pt->output_mask != reg) { |
---|
| 629 | + pt->output_mask = reg; |
---|
| 630 | + wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, reg); |
---|
| 631 | + } |
---|
| 632 | +} |
---|
575 | 633 | |
---|
576 | 634 | /** |
---|
577 | 635 | * topa_alloc() - allocate page-sized ToPA table |
---|
.. | .. |
---|
583 | 641 | static struct topa *topa_alloc(int cpu, gfp_t gfp) |
---|
584 | 642 | { |
---|
585 | 643 | int node = cpu_to_node(cpu); |
---|
586 | | - struct topa *topa; |
---|
| 644 | + struct topa_page *tp; |
---|
587 | 645 | struct page *p; |
---|
588 | 646 | |
---|
589 | 647 | p = alloc_pages_node(node, gfp | __GFP_ZERO, 0); |
---|
590 | 648 | if (!p) |
---|
591 | 649 | return NULL; |
---|
592 | 650 | |
---|
593 | | - topa = page_address(p); |
---|
594 | | - topa->last = 0; |
---|
595 | | - topa->phys = page_to_phys(p); |
---|
| 651 | + tp = page_address(p); |
---|
| 652 | + tp->topa.last = 0; |
---|
596 | 653 | |
---|
597 | 654 | /* |
---|
598 | 655 | * In case of singe-entry ToPA, always put the self-referencing END |
---|
599 | 656 | * link as the 2nd entry in the table |
---|
600 | 657 | */ |
---|
601 | | - if (!pt_cap_get(PT_CAP_topa_multiple_entries)) { |
---|
602 | | - TOPA_ENTRY(topa, 1)->base = topa->phys >> TOPA_SHIFT; |
---|
603 | | - TOPA_ENTRY(topa, 1)->end = 1; |
---|
| 658 | + if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries)) { |
---|
| 659 | + TOPA_ENTRY(&tp->topa, 1)->base = page_to_phys(p) >> TOPA_SHIFT; |
---|
| 660 | + TOPA_ENTRY(&tp->topa, 1)->end = 1; |
---|
604 | 661 | } |
---|
605 | 662 | |
---|
606 | | - return topa; |
---|
| 663 | + return &tp->topa; |
---|
607 | 664 | } |
---|
608 | 665 | |
---|
609 | 666 | /** |
---|
.. | .. |
---|
638 | 695 | topa->offset = last->offset + last->size; |
---|
639 | 696 | buf->last = topa; |
---|
640 | 697 | |
---|
641 | | - if (!pt_cap_get(PT_CAP_topa_multiple_entries)) |
---|
| 698 | + if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries)) |
---|
642 | 699 | return; |
---|
643 | 700 | |
---|
644 | 701 | BUG_ON(last->last != TENTS_PER_PAGE - 1); |
---|
645 | 702 | |
---|
646 | | - TOPA_ENTRY(last, -1)->base = topa->phys >> TOPA_SHIFT; |
---|
| 703 | + TOPA_ENTRY(last, -1)->base = topa_pfn(topa); |
---|
647 | 704 | TOPA_ENTRY(last, -1)->end = 1; |
---|
648 | 705 | } |
---|
649 | 706 | |
---|
.. | .. |
---|
654 | 711 | static bool topa_table_full(struct topa *topa) |
---|
655 | 712 | { |
---|
656 | 713 | /* single-entry ToPA is a special case */ |
---|
657 | | - if (!pt_cap_get(PT_CAP_topa_multiple_entries)) |
---|
| 714 | + if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries)) |
---|
658 | 715 | return !!topa->last; |
---|
659 | 716 | |
---|
660 | 717 | return topa->last == TENTS_PER_PAGE - 1; |
---|
.. | .. |
---|
670 | 727 | * |
---|
671 | 728 | * Return: 0 on success or error code. |
---|
672 | 729 | */ |
---|
673 | | -static int topa_insert_pages(struct pt_buffer *buf, gfp_t gfp) |
---|
| 730 | +static int topa_insert_pages(struct pt_buffer *buf, int cpu, gfp_t gfp) |
---|
674 | 731 | { |
---|
675 | 732 | struct topa *topa = buf->last; |
---|
676 | 733 | int order = 0; |
---|
.. | .. |
---|
681 | 738 | order = page_private(p); |
---|
682 | 739 | |
---|
683 | 740 | if (topa_table_full(topa)) { |
---|
684 | | - topa = topa_alloc(buf->cpu, gfp); |
---|
| 741 | + topa = topa_alloc(cpu, gfp); |
---|
685 | 742 | if (!topa) |
---|
686 | 743 | return -ENOMEM; |
---|
687 | 744 | |
---|
688 | 745 | topa_insert_table(buf, topa); |
---|
689 | 746 | } |
---|
690 | 747 | |
---|
| 748 | + if (topa->z_count == topa->last - 1) { |
---|
| 749 | + if (order == TOPA_ENTRY(topa, topa->last - 1)->size) |
---|
| 750 | + topa->z_count++; |
---|
| 751 | + } |
---|
| 752 | + |
---|
691 | 753 | TOPA_ENTRY(topa, -1)->base = page_to_phys(p) >> TOPA_SHIFT; |
---|
692 | 754 | TOPA_ENTRY(topa, -1)->size = order; |
---|
693 | | - if (!buf->snapshot && !pt_cap_get(PT_CAP_topa_multiple_entries)) { |
---|
| 755 | + if (!buf->snapshot && |
---|
| 756 | + !intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries)) { |
---|
694 | 757 | TOPA_ENTRY(topa, -1)->intr = 1; |
---|
695 | 758 | TOPA_ENTRY(topa, -1)->stop = 1; |
---|
696 | 759 | } |
---|
.. | .. |
---|
712 | 775 | struct topa *topa; |
---|
713 | 776 | |
---|
714 | 777 | list_for_each_entry(topa, &buf->tables, list) { |
---|
| 778 | + struct topa_page *tp = topa_to_page(topa); |
---|
715 | 779 | int i; |
---|
716 | 780 | |
---|
717 | | - pr_debug("# table @%p (%016Lx), off %llx size %zx\n", topa->table, |
---|
718 | | - topa->phys, topa->offset, topa->size); |
---|
| 781 | + pr_debug("# table @%p, off %llx size %zx\n", tp->table, |
---|
| 782 | + topa->offset, topa->size); |
---|
719 | 783 | for (i = 0; i < TENTS_PER_PAGE; i++) { |
---|
720 | 784 | pr_debug("# entry @%p (%lx sz %u %c%c%c) raw=%16llx\n", |
---|
721 | | - &topa->table[i], |
---|
722 | | - (unsigned long)topa->table[i].base << TOPA_SHIFT, |
---|
723 | | - sizes(topa->table[i].size), |
---|
724 | | - topa->table[i].end ? 'E' : ' ', |
---|
725 | | - topa->table[i].intr ? 'I' : ' ', |
---|
726 | | - topa->table[i].stop ? 'S' : ' ', |
---|
727 | | - *(u64 *)&topa->table[i]); |
---|
728 | | - if ((pt_cap_get(PT_CAP_topa_multiple_entries) && |
---|
729 | | - topa->table[i].stop) || |
---|
730 | | - topa->table[i].end) |
---|
| 785 | + &tp->table[i], |
---|
| 786 | + (unsigned long)tp->table[i].base << TOPA_SHIFT, |
---|
| 787 | + sizes(tp->table[i].size), |
---|
| 788 | + tp->table[i].end ? 'E' : ' ', |
---|
| 789 | + tp->table[i].intr ? 'I' : ' ', |
---|
| 790 | + tp->table[i].stop ? 'S' : ' ', |
---|
| 791 | + *(u64 *)&tp->table[i]); |
---|
| 792 | + if ((intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries) && |
---|
| 793 | + tp->table[i].stop) || |
---|
| 794 | + tp->table[i].end) |
---|
731 | 795 | break; |
---|
| 796 | + if (!i && topa->z_count) |
---|
| 797 | + i += topa->z_count; |
---|
732 | 798 | } |
---|
733 | 799 | } |
---|
734 | 800 | } |
---|
.. | .. |
---|
765 | 831 | struct pt_buffer *buf = perf_get_aux(&pt->handle); |
---|
766 | 832 | u64 topa_idx, base, old; |
---|
767 | 833 | |
---|
| 834 | + if (buf->single) { |
---|
| 835 | + local_set(&buf->data_size, buf->output_off); |
---|
| 836 | + return; |
---|
| 837 | + } |
---|
| 838 | + |
---|
768 | 839 | /* offset of the first region in this table from the beginning of buf */ |
---|
769 | 840 | base = buf->cur->offset + buf->output_off; |
---|
770 | 841 | |
---|
771 | 842 | /* offset of the current output region within this table */ |
---|
772 | 843 | for (topa_idx = 0; topa_idx < buf->cur_idx; topa_idx++) |
---|
773 | | - base += sizes(buf->cur->table[topa_idx].size); |
---|
| 844 | + base += TOPA_ENTRY_SIZE(buf->cur, topa_idx); |
---|
774 | 845 | |
---|
775 | 846 | if (buf->snapshot) { |
---|
776 | 847 | local_set(&buf->data_size, base); |
---|
.. | .. |
---|
790 | 861 | */ |
---|
791 | 862 | static void *pt_buffer_region(struct pt_buffer *buf) |
---|
792 | 863 | { |
---|
793 | | - return phys_to_virt(buf->cur->table[buf->cur_idx].base << TOPA_SHIFT); |
---|
| 864 | + return phys_to_virt(TOPA_ENTRY(buf->cur, buf->cur_idx)->base << TOPA_SHIFT); |
---|
794 | 865 | } |
---|
795 | 866 | |
---|
796 | 867 | /** |
---|
.. | .. |
---|
799 | 870 | */ |
---|
800 | 871 | static size_t pt_buffer_region_size(struct pt_buffer *buf) |
---|
801 | 872 | { |
---|
802 | | - return sizes(buf->cur->table[buf->cur_idx].size); |
---|
| 873 | + return TOPA_ENTRY_SIZE(buf->cur, buf->cur_idx); |
---|
803 | 874 | } |
---|
804 | 875 | |
---|
805 | 876 | /** |
---|
.. | .. |
---|
828 | 899 | * means we are already losing data; need to let the decoder |
---|
829 | 900 | * know. |
---|
830 | 901 | */ |
---|
831 | | - if (!pt_cap_get(PT_CAP_topa_multiple_entries) || |
---|
832 | | - buf->output_off == sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size)) { |
---|
| 902 | + if (!buf->single && |
---|
| 903 | + (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries) || |
---|
| 904 | + buf->output_off == pt_buffer_region_size(buf))) { |
---|
833 | 905 | perf_aux_output_flag(&pt->handle, |
---|
834 | 906 | PERF_AUX_FLAG_TRUNCATED); |
---|
835 | 907 | advance++; |
---|
.. | .. |
---|
840 | 912 | * Also on single-entry ToPA implementations, interrupt will come |
---|
841 | 913 | * before the output reaches its output region's boundary. |
---|
842 | 914 | */ |
---|
843 | | - if (!pt_cap_get(PT_CAP_topa_multiple_entries) && !buf->snapshot && |
---|
| 915 | + if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries) && |
---|
| 916 | + !buf->snapshot && |
---|
844 | 917 | pt_buffer_region_size(buf) - buf->output_off <= TOPA_PMI_MARGIN) { |
---|
845 | 918 | void *head = pt_buffer_region(buf); |
---|
846 | 919 | |
---|
.. | .. |
---|
865 | 938 | */ |
---|
866 | 939 | static void pt_read_offset(struct pt_buffer *buf) |
---|
867 | 940 | { |
---|
868 | | - u64 offset, base_topa; |
---|
| 941 | + struct pt *pt = this_cpu_ptr(&pt_ctx); |
---|
| 942 | + struct topa_page *tp; |
---|
869 | 943 | |
---|
870 | | - rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, base_topa); |
---|
871 | | - buf->cur = phys_to_virt(base_topa); |
---|
| 944 | + if (!buf->single) { |
---|
| 945 | + rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, pt->output_base); |
---|
| 946 | + tp = phys_to_virt(pt->output_base); |
---|
| 947 | + buf->cur = &tp->topa; |
---|
| 948 | + } |
---|
872 | 949 | |
---|
873 | | - rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, offset); |
---|
| 950 | + rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, pt->output_mask); |
---|
874 | 951 | /* offset within current output region */ |
---|
875 | | - buf->output_off = offset >> 32; |
---|
| 952 | + buf->output_off = pt->output_mask >> 32; |
---|
876 | 953 | /* index of current output region within this table */ |
---|
877 | | - buf->cur_idx = (offset & 0xffffff80) >> 7; |
---|
| 954 | + if (!buf->single) |
---|
| 955 | + buf->cur_idx = (pt->output_mask & 0xffffff80) >> 7; |
---|
878 | 956 | } |
---|
879 | 957 | |
---|
880 | | -/** |
---|
881 | | - * pt_topa_next_entry() - obtain index of the first page in the next ToPA entry |
---|
882 | | - * @buf: PT buffer. |
---|
883 | | - * @pg: Page offset in the buffer. |
---|
884 | | - * |
---|
885 | | - * When advancing to the next output region (ToPA entry), given a page offset |
---|
886 | | - * into the buffer, we need to find the offset of the first page in the next |
---|
887 | | - * region. |
---|
888 | | - */ |
---|
889 | | -static unsigned int pt_topa_next_entry(struct pt_buffer *buf, unsigned int pg) |
---|
| 958 | +static struct topa_entry * |
---|
| 959 | +pt_topa_entry_for_page(struct pt_buffer *buf, unsigned int pg) |
---|
890 | 960 | { |
---|
891 | | - struct topa_entry *te = buf->topa_index[pg]; |
---|
| 961 | + struct topa_page *tp; |
---|
| 962 | + struct topa *topa; |
---|
| 963 | + unsigned int idx, cur_pg = 0, z_pg = 0, start_idx = 0; |
---|
892 | 964 | |
---|
893 | | - /* one region */ |
---|
894 | | - if (buf->first == buf->last && buf->first->last == 1) |
---|
895 | | - return pg; |
---|
| 965 | + /* |
---|
| 966 | + * Indicates a bug in the caller. |
---|
| 967 | + */ |
---|
| 968 | + if (WARN_ON_ONCE(pg >= buf->nr_pages)) |
---|
| 969 | + return NULL; |
---|
896 | 970 | |
---|
897 | | - do { |
---|
898 | | - pg++; |
---|
899 | | - pg &= buf->nr_pages - 1; |
---|
900 | | - } while (buf->topa_index[pg] == te); |
---|
| 971 | + /* |
---|
| 972 | + * First, find the ToPA table where @pg fits. With high |
---|
| 973 | + * order allocations, there shouldn't be many of these. |
---|
| 974 | + */ |
---|
| 975 | + list_for_each_entry(topa, &buf->tables, list) { |
---|
| 976 | + if (topa->offset + topa->size > pg << PAGE_SHIFT) |
---|
| 977 | + goto found; |
---|
| 978 | + } |
---|
901 | 979 | |
---|
902 | | - return pg; |
---|
| 980 | + /* |
---|
| 981 | + * Hitting this means we have a problem in the ToPA |
---|
| 982 | + * allocation code. |
---|
| 983 | + */ |
---|
| 984 | + WARN_ON_ONCE(1); |
---|
| 985 | + |
---|
| 986 | + return NULL; |
---|
| 987 | + |
---|
| 988 | +found: |
---|
| 989 | + /* |
---|
| 990 | + * Indicates a problem in the ToPA allocation code. |
---|
| 991 | + */ |
---|
| 992 | + if (WARN_ON_ONCE(topa->last == -1)) |
---|
| 993 | + return NULL; |
---|
| 994 | + |
---|
| 995 | + tp = topa_to_page(topa); |
---|
| 996 | + cur_pg = PFN_DOWN(topa->offset); |
---|
| 997 | + if (topa->z_count) { |
---|
| 998 | + z_pg = TOPA_ENTRY_PAGES(topa, 0) * (topa->z_count + 1); |
---|
| 999 | + start_idx = topa->z_count + 1; |
---|
| 1000 | + } |
---|
| 1001 | + |
---|
| 1002 | + /* |
---|
| 1003 | + * Multiple entries at the beginning of the table have the same size, |
---|
| 1004 | + * ideally all of them; if @pg falls there, the search is done. |
---|
| 1005 | + */ |
---|
| 1006 | + if (pg >= cur_pg && pg < cur_pg + z_pg) { |
---|
| 1007 | + idx = (pg - cur_pg) / TOPA_ENTRY_PAGES(topa, 0); |
---|
| 1008 | + return &tp->table[idx]; |
---|
| 1009 | + } |
---|
| 1010 | + |
---|
| 1011 | + /* |
---|
| 1012 | + * Otherwise, slow path: iterate through the remaining entries. |
---|
| 1013 | + */ |
---|
| 1014 | + for (idx = start_idx, cur_pg += z_pg; idx < topa->last; idx++) { |
---|
| 1015 | + if (cur_pg + TOPA_ENTRY_PAGES(topa, idx) > pg) |
---|
| 1016 | + return &tp->table[idx]; |
---|
| 1017 | + |
---|
| 1018 | + cur_pg += TOPA_ENTRY_PAGES(topa, idx); |
---|
| 1019 | + } |
---|
| 1020 | + |
---|
| 1021 | + /* |
---|
| 1022 | + * Means we couldn't find a ToPA entry in the table that does match. |
---|
| 1023 | + */ |
---|
| 1024 | + WARN_ON_ONCE(1); |
---|
| 1025 | + |
---|
| 1026 | + return NULL; |
---|
| 1027 | +} |
---|
| 1028 | + |
---|
| 1029 | +static struct topa_entry * |
---|
| 1030 | +pt_topa_prev_entry(struct pt_buffer *buf, struct topa_entry *te) |
---|
| 1031 | +{ |
---|
| 1032 | + unsigned long table = (unsigned long)te & ~(PAGE_SIZE - 1); |
---|
| 1033 | + struct topa_page *tp; |
---|
| 1034 | + struct topa *topa; |
---|
| 1035 | + |
---|
| 1036 | + tp = (struct topa_page *)table; |
---|
| 1037 | + if (tp->table != te) |
---|
| 1038 | + return --te; |
---|
| 1039 | + |
---|
| 1040 | + topa = &tp->topa; |
---|
| 1041 | + if (topa == buf->first) |
---|
| 1042 | + topa = buf->last; |
---|
| 1043 | + else |
---|
| 1044 | + topa = list_prev_entry(topa, list); |
---|
| 1045 | + |
---|
| 1046 | + tp = topa_to_page(topa); |
---|
| 1047 | + |
---|
| 1048 | + return &tp->table[topa->last - 1]; |
---|
903 | 1049 | } |
---|
904 | 1050 | |
---|
905 | 1051 | /** |
---|
.. | .. |
---|
922 | 1068 | unsigned long head = local64_read(&buf->head); |
---|
923 | 1069 | unsigned long idx, npages, wakeup; |
---|
924 | 1070 | |
---|
| 1071 | + if (buf->single) |
---|
| 1072 | + return 0; |
---|
| 1073 | + |
---|
925 | 1074 | /* can't stop in the middle of an output region */ |
---|
926 | | - if (buf->output_off + handle->size + 1 < |
---|
927 | | - sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size)) { |
---|
| 1075 | + if (buf->output_off + handle->size + 1 < pt_buffer_region_size(buf)) { |
---|
928 | 1076 | perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); |
---|
929 | 1077 | return -EINVAL; |
---|
930 | 1078 | } |
---|
931 | 1079 | |
---|
932 | 1080 | |
---|
933 | 1081 | /* single entry ToPA is handled by marking all regions STOP=1 INT=1 */ |
---|
934 | | - if (!pt_cap_get(PT_CAP_topa_multiple_entries)) |
---|
| 1082 | + if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries)) |
---|
935 | 1083 | return 0; |
---|
936 | 1084 | |
---|
937 | 1085 | /* clear STOP and INT from current entry */ |
---|
938 | | - buf->topa_index[buf->stop_pos]->stop = 0; |
---|
939 | | - buf->topa_index[buf->stop_pos]->intr = 0; |
---|
940 | | - buf->topa_index[buf->intr_pos]->intr = 0; |
---|
| 1086 | + if (buf->stop_te) { |
---|
| 1087 | + buf->stop_te->stop = 0; |
---|
| 1088 | + buf->stop_te->intr = 0; |
---|
| 1089 | + } |
---|
| 1090 | + |
---|
| 1091 | + if (buf->intr_te) |
---|
| 1092 | + buf->intr_te->intr = 0; |
---|
941 | 1093 | |
---|
942 | 1094 | /* how many pages till the STOP marker */ |
---|
943 | 1095 | npages = handle->size >> PAGE_SHIFT; |
---|
.. | .. |
---|
948 | 1100 | |
---|
949 | 1101 | idx = (head >> PAGE_SHIFT) + npages; |
---|
950 | 1102 | idx &= buf->nr_pages - 1; |
---|
951 | | - buf->stop_pos = idx; |
---|
| 1103 | + |
---|
| 1104 | + if (idx != buf->stop_pos) { |
---|
| 1105 | + buf->stop_pos = idx; |
---|
| 1106 | + buf->stop_te = pt_topa_entry_for_page(buf, idx); |
---|
| 1107 | + buf->stop_te = pt_topa_prev_entry(buf, buf->stop_te); |
---|
| 1108 | + } |
---|
952 | 1109 | |
---|
953 | 1110 | wakeup = handle->wakeup >> PAGE_SHIFT; |
---|
954 | 1111 | |
---|
.. | .. |
---|
958 | 1115 | idx = wakeup; |
---|
959 | 1116 | |
---|
960 | 1117 | idx &= buf->nr_pages - 1; |
---|
961 | | - buf->intr_pos = idx; |
---|
962 | | - |
---|
963 | | - buf->topa_index[buf->stop_pos]->stop = 1; |
---|
964 | | - buf->topa_index[buf->stop_pos]->intr = 1; |
---|
965 | | - buf->topa_index[buf->intr_pos]->intr = 1; |
---|
966 | | - |
---|
967 | | - return 0; |
---|
968 | | -} |
---|
969 | | - |
---|
970 | | -/** |
---|
971 | | - * pt_buffer_setup_topa_index() - build topa_index[] table of regions |
---|
972 | | - * @buf: PT buffer. |
---|
973 | | - * |
---|
974 | | - * topa_index[] references output regions indexed by offset into the |
---|
975 | | - * buffer for purposes of quick reverse lookup. |
---|
976 | | - */ |
---|
977 | | -static void pt_buffer_setup_topa_index(struct pt_buffer *buf) |
---|
978 | | -{ |
---|
979 | | - struct topa *cur = buf->first, *prev = buf->last; |
---|
980 | | - struct topa_entry *te_cur = TOPA_ENTRY(cur, 0), |
---|
981 | | - *te_prev = TOPA_ENTRY(prev, prev->last - 1); |
---|
982 | | - int pg = 0, idx = 0; |
---|
983 | | - |
---|
984 | | - while (pg < buf->nr_pages) { |
---|
985 | | - int tidx; |
---|
986 | | - |
---|
987 | | - /* pages within one topa entry */ |
---|
988 | | - for (tidx = 0; tidx < 1 << te_cur->size; tidx++, pg++) |
---|
989 | | - buf->topa_index[pg] = te_prev; |
---|
990 | | - |
---|
991 | | - te_prev = te_cur; |
---|
992 | | - |
---|
993 | | - if (idx == cur->last - 1) { |
---|
994 | | - /* advance to next topa table */ |
---|
995 | | - idx = 0; |
---|
996 | | - cur = list_entry(cur->list.next, struct topa, list); |
---|
997 | | - } else { |
---|
998 | | - idx++; |
---|
999 | | - } |
---|
1000 | | - te_cur = TOPA_ENTRY(cur, idx); |
---|
| 1118 | + if (idx != buf->intr_pos) { |
---|
| 1119 | + buf->intr_pos = idx; |
---|
| 1120 | + buf->intr_te = pt_topa_entry_for_page(buf, idx); |
---|
| 1121 | + buf->intr_te = pt_topa_prev_entry(buf, buf->intr_te); |
---|
1001 | 1122 | } |
---|
1002 | 1123 | |
---|
| 1124 | + buf->stop_te->stop = 1; |
---|
| 1125 | + buf->stop_te->intr = 1; |
---|
| 1126 | + buf->intr_te->intr = 1; |
---|
| 1127 | + |
---|
| 1128 | + return 0; |
---|
1003 | 1129 | } |
---|
1004 | 1130 | |
---|
1005 | 1131 | /** |
---|
.. | .. |
---|
1019 | 1145 | */ |
---|
1020 | 1146 | static void pt_buffer_reset_offsets(struct pt_buffer *buf, unsigned long head) |
---|
1021 | 1147 | { |
---|
| 1148 | + struct topa_page *cur_tp; |
---|
| 1149 | + struct topa_entry *te; |
---|
1022 | 1150 | int pg; |
---|
1023 | 1151 | |
---|
1024 | 1152 | if (buf->snapshot) |
---|
1025 | 1153 | head &= (buf->nr_pages << PAGE_SHIFT) - 1; |
---|
1026 | 1154 | |
---|
1027 | | - pg = (head >> PAGE_SHIFT) & (buf->nr_pages - 1); |
---|
1028 | | - pg = pt_topa_next_entry(buf, pg); |
---|
| 1155 | + if (!buf->single) { |
---|
| 1156 | + pg = (head >> PAGE_SHIFT) & (buf->nr_pages - 1); |
---|
| 1157 | + te = pt_topa_entry_for_page(buf, pg); |
---|
1029 | 1158 | |
---|
1030 | | - buf->cur = (struct topa *)((unsigned long)buf->topa_index[pg] & PAGE_MASK); |
---|
1031 | | - buf->cur_idx = ((unsigned long)buf->topa_index[pg] - |
---|
1032 | | - (unsigned long)buf->cur) / sizeof(struct topa_entry); |
---|
1033 | | - buf->output_off = head & (sizes(buf->cur->table[buf->cur_idx].size) - 1); |
---|
| 1159 | + cur_tp = topa_entry_to_page(te); |
---|
| 1160 | + buf->cur = &cur_tp->topa; |
---|
| 1161 | + buf->cur_idx = te - TOPA_ENTRY(buf->cur, 0); |
---|
| 1162 | + buf->output_off = head & (pt_buffer_region_size(buf) - 1); |
---|
| 1163 | + } else { |
---|
| 1164 | + buf->output_off = head; |
---|
| 1165 | + } |
---|
1034 | 1166 | |
---|
1035 | 1167 | local64_set(&buf->head, head); |
---|
1036 | 1168 | local_set(&buf->data_size, 0); |
---|
.. | .. |
---|
1043 | 1175 | static void pt_buffer_fini_topa(struct pt_buffer *buf) |
---|
1044 | 1176 | { |
---|
1045 | 1177 | struct topa *topa, *iter; |
---|
| 1178 | + |
---|
| 1179 | + if (buf->single) |
---|
| 1180 | + return; |
---|
1046 | 1181 | |
---|
1047 | 1182 | list_for_each_entry_safe(topa, iter, &buf->tables, list) { |
---|
1048 | 1183 | /* |
---|
.. | .. |
---|
1059 | 1194 | * @size: Total size of all regions within this ToPA. |
---|
1060 | 1195 | * @gfp: Allocation flags. |
---|
1061 | 1196 | */ |
---|
1062 | | -static int pt_buffer_init_topa(struct pt_buffer *buf, unsigned long nr_pages, |
---|
1063 | | - gfp_t gfp) |
---|
| 1197 | +static int pt_buffer_init_topa(struct pt_buffer *buf, int cpu, |
---|
| 1198 | + unsigned long nr_pages, gfp_t gfp) |
---|
1064 | 1199 | { |
---|
1065 | 1200 | struct topa *topa; |
---|
1066 | 1201 | int err; |
---|
1067 | 1202 | |
---|
1068 | | - topa = topa_alloc(buf->cpu, gfp); |
---|
| 1203 | + topa = topa_alloc(cpu, gfp); |
---|
1069 | 1204 | if (!topa) |
---|
1070 | 1205 | return -ENOMEM; |
---|
1071 | 1206 | |
---|
1072 | 1207 | topa_insert_table(buf, topa); |
---|
1073 | 1208 | |
---|
1074 | 1209 | while (buf->nr_pages < nr_pages) { |
---|
1075 | | - err = topa_insert_pages(buf, gfp); |
---|
| 1210 | + err = topa_insert_pages(buf, cpu, gfp); |
---|
1076 | 1211 | if (err) { |
---|
1077 | 1212 | pt_buffer_fini_topa(buf); |
---|
1078 | 1213 | return -ENOMEM; |
---|
1079 | 1214 | } |
---|
1080 | 1215 | } |
---|
1081 | 1216 | |
---|
1082 | | - pt_buffer_setup_topa_index(buf); |
---|
1083 | | - |
---|
1084 | 1217 | /* link last table to the first one, unless we're double buffering */ |
---|
1085 | | - if (pt_cap_get(PT_CAP_topa_multiple_entries)) { |
---|
1086 | | - TOPA_ENTRY(buf->last, -1)->base = buf->first->phys >> TOPA_SHIFT; |
---|
| 1218 | + if (intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries)) { |
---|
| 1219 | + TOPA_ENTRY(buf->last, -1)->base = topa_pfn(buf->first); |
---|
1087 | 1220 | TOPA_ENTRY(buf->last, -1)->end = 1; |
---|
1088 | 1221 | } |
---|
1089 | 1222 | |
---|
1090 | 1223 | pt_topa_dump(buf); |
---|
1091 | 1224 | return 0; |
---|
| 1225 | +} |
---|
| 1226 | + |
---|
| 1227 | +static int pt_buffer_try_single(struct pt_buffer *buf, int nr_pages) |
---|
| 1228 | +{ |
---|
| 1229 | + struct page *p = virt_to_page(buf->data_pages[0]); |
---|
| 1230 | + int ret = -ENOTSUPP, order = 0; |
---|
| 1231 | + |
---|
| 1232 | + /* |
---|
| 1233 | + * We can use single range output mode |
---|
| 1234 | + * + in snapshot mode, where we don't need interrupts; |
---|
| 1235 | + * + if the hardware supports it; |
---|
| 1236 | + * + if the entire buffer is one contiguous allocation. |
---|
| 1237 | + */ |
---|
| 1238 | + if (!buf->snapshot) |
---|
| 1239 | + goto out; |
---|
| 1240 | + |
---|
| 1241 | + if (!intel_pt_validate_hw_cap(PT_CAP_single_range_output)) |
---|
| 1242 | + goto out; |
---|
| 1243 | + |
---|
| 1244 | + if (PagePrivate(p)) |
---|
| 1245 | + order = page_private(p); |
---|
| 1246 | + |
---|
| 1247 | + if (1 << order != nr_pages) |
---|
| 1248 | + goto out; |
---|
| 1249 | + |
---|
| 1250 | + /* |
---|
| 1251 | + * Some processors cannot always support single range for more than |
---|
| 1252 | + * 4KB - refer errata TGL052, ADL037 and RPL017. Future processors might |
---|
| 1253 | + * also be affected, so for now rather than trying to keep track of |
---|
| 1254 | + * which ones, just disable it for all. |
---|
| 1255 | + */ |
---|
| 1256 | + if (nr_pages > 1) |
---|
| 1257 | + goto out; |
---|
| 1258 | + |
---|
| 1259 | + buf->single = true; |
---|
| 1260 | + buf->nr_pages = nr_pages; |
---|
| 1261 | + ret = 0; |
---|
| 1262 | +out: |
---|
| 1263 | + return ret; |
---|
1092 | 1264 | } |
---|
1093 | 1265 | |
---|
1094 | 1266 | /** |
---|
.. | .. |
---|
1113 | 1285 | if (!nr_pages) |
---|
1114 | 1286 | return NULL; |
---|
1115 | 1287 | |
---|
| 1288 | + /* |
---|
| 1289 | + * Only support AUX sampling in snapshot mode, where we don't |
---|
| 1290 | + * generate NMIs. |
---|
| 1291 | + */ |
---|
| 1292 | + if (event->attr.aux_sample_size && !snapshot) |
---|
| 1293 | + return NULL; |
---|
| 1294 | + |
---|
1116 | 1295 | if (cpu == -1) |
---|
1117 | 1296 | cpu = raw_smp_processor_id(); |
---|
1118 | 1297 | node = cpu_to_node(cpu); |
---|
1119 | 1298 | |
---|
1120 | | - buf = kzalloc_node(offsetof(struct pt_buffer, topa_index[nr_pages]), |
---|
1121 | | - GFP_KERNEL, node); |
---|
| 1299 | + buf = kzalloc_node(sizeof(struct pt_buffer), GFP_KERNEL, node); |
---|
1122 | 1300 | if (!buf) |
---|
1123 | 1301 | return NULL; |
---|
1124 | 1302 | |
---|
1125 | | - buf->cpu = cpu; |
---|
1126 | 1303 | buf->snapshot = snapshot; |
---|
1127 | 1304 | buf->data_pages = pages; |
---|
| 1305 | + buf->stop_pos = -1; |
---|
| 1306 | + buf->intr_pos = -1; |
---|
1128 | 1307 | |
---|
1129 | 1308 | INIT_LIST_HEAD(&buf->tables); |
---|
1130 | 1309 | |
---|
1131 | | - ret = pt_buffer_init_topa(buf, nr_pages, GFP_KERNEL); |
---|
| 1310 | + ret = pt_buffer_try_single(buf, nr_pages); |
---|
| 1311 | + if (!ret) |
---|
| 1312 | + return buf; |
---|
| 1313 | + |
---|
| 1314 | + ret = pt_buffer_init_topa(buf, cpu, nr_pages, GFP_KERNEL); |
---|
1132 | 1315 | if (ret) { |
---|
1133 | 1316 | kfree(buf); |
---|
1134 | 1317 | return NULL; |
---|
.. | .. |
---|
1154 | 1337 | struct pt_filters *filters; |
---|
1155 | 1338 | int node = event->cpu == -1 ? -1 : cpu_to_node(event->cpu); |
---|
1156 | 1339 | |
---|
1157 | | - if (!pt_cap_get(PT_CAP_num_address_ranges)) |
---|
| 1340 | + if (!intel_pt_validate_hw_cap(PT_CAP_num_address_ranges)) |
---|
1158 | 1341 | return 0; |
---|
1159 | 1342 | |
---|
1160 | 1343 | filters = kzalloc_node(sizeof(struct pt_filters), GFP_KERNEL, node); |
---|
.. | .. |
---|
1176 | 1359 | event->hw.addr_filters = NULL; |
---|
1177 | 1360 | } |
---|
1178 | 1361 | |
---|
1179 | | -static inline bool valid_kernel_ip(unsigned long ip) |
---|
| 1362 | +#ifdef CONFIG_X86_64 |
---|
| 1363 | +static u64 canonical_address(u64 vaddr, u8 vaddr_bits) |
---|
1180 | 1364 | { |
---|
1181 | | - return virt_addr_valid(ip) && kernel_ip(ip); |
---|
| 1365 | + return ((s64)vaddr << (64 - vaddr_bits)) >> (64 - vaddr_bits); |
---|
1182 | 1366 | } |
---|
| 1367 | + |
---|
| 1368 | +static u64 is_canonical_address(u64 vaddr, u8 vaddr_bits) |
---|
| 1369 | +{ |
---|
| 1370 | + return canonical_address(vaddr, vaddr_bits) == vaddr; |
---|
| 1371 | +} |
---|
| 1372 | + |
---|
| 1373 | +/* Clamp to a canonical address greater-than-or-equal-to the address given */ |
---|
| 1374 | +static u64 clamp_to_ge_canonical_addr(u64 vaddr, u8 vaddr_bits) |
---|
| 1375 | +{ |
---|
| 1376 | + return is_canonical_address(vaddr, vaddr_bits) ? |
---|
| 1377 | + vaddr : |
---|
| 1378 | + -BIT_ULL(vaddr_bits - 1); |
---|
| 1379 | +} |
---|
| 1380 | + |
---|
| 1381 | +/* Clamp to a canonical address less-than-or-equal-to the address given */ |
---|
| 1382 | +static u64 clamp_to_le_canonical_addr(u64 vaddr, u8 vaddr_bits) |
---|
| 1383 | +{ |
---|
| 1384 | + return is_canonical_address(vaddr, vaddr_bits) ? |
---|
| 1385 | + vaddr : |
---|
| 1386 | + BIT_ULL(vaddr_bits - 1) - 1; |
---|
| 1387 | +} |
---|
| 1388 | +#else |
---|
| 1389 | +#define clamp_to_ge_canonical_addr(x, y) (x) |
---|
| 1390 | +#define clamp_to_le_canonical_addr(x, y) (x) |
---|
| 1391 | +#endif |
---|
1183 | 1392 | |
---|
1184 | 1393 | static int pt_event_addr_filters_validate(struct list_head *filters) |
---|
1185 | 1394 | { |
---|
.. | .. |
---|
1195 | 1404 | filter->action == PERF_ADDR_FILTER_ACTION_START) |
---|
1196 | 1405 | return -EOPNOTSUPP; |
---|
1197 | 1406 | |
---|
1198 | | - if (!filter->path.dentry) { |
---|
1199 | | - if (!valid_kernel_ip(filter->offset)) |
---|
1200 | | - return -EINVAL; |
---|
1201 | | - |
---|
1202 | | - if (!valid_kernel_ip(filter->offset + filter->size)) |
---|
1203 | | - return -EINVAL; |
---|
1204 | | - } |
---|
1205 | | - |
---|
1206 | | - if (++range > pt_cap_get(PT_CAP_num_address_ranges)) |
---|
| 1407 | + if (++range > intel_pt_validate_hw_cap(PT_CAP_num_address_ranges)) |
---|
1207 | 1408 | return -EOPNOTSUPP; |
---|
1208 | 1409 | } |
---|
1209 | 1410 | |
---|
.. | .. |
---|
1226 | 1427 | if (filter->path.dentry && !fr[range].start) { |
---|
1227 | 1428 | msr_a = msr_b = 0; |
---|
1228 | 1429 | } else { |
---|
1229 | | - /* apply the offset */ |
---|
1230 | | - msr_a = fr[range].start; |
---|
1231 | | - msr_b = msr_a + fr[range].size - 1; |
---|
| 1430 | + unsigned long n = fr[range].size - 1; |
---|
| 1431 | + unsigned long a = fr[range].start; |
---|
| 1432 | + unsigned long b; |
---|
| 1433 | + |
---|
| 1434 | + if (a > ULONG_MAX - n) |
---|
| 1435 | + b = ULONG_MAX; |
---|
| 1436 | + else |
---|
| 1437 | + b = a + n; |
---|
| 1438 | + /* |
---|
| 1439 | + * Apply the offset. 64-bit addresses written to the |
---|
| 1440 | + * MSRs must be canonical, but the range can encompass |
---|
| 1441 | + * non-canonical addresses. Since software cannot |
---|
| 1442 | + * execute at non-canonical addresses, adjusting to |
---|
| 1443 | + * canonical addresses does not affect the result of the |
---|
| 1444 | + * address filter. |
---|
| 1445 | + */ |
---|
| 1446 | + msr_a = clamp_to_ge_canonical_addr(a, boot_cpu_data.x86_virt_bits); |
---|
| 1447 | + msr_b = clamp_to_le_canonical_addr(b, boot_cpu_data.x86_virt_bits); |
---|
| 1448 | + if (msr_b < msr_a) |
---|
| 1449 | + msr_a = msr_b = 0; |
---|
1232 | 1450 | } |
---|
1233 | 1451 | |
---|
1234 | 1452 | filters->filter[range].msr_a = msr_a; |
---|
.. | .. |
---|
1294 | 1512 | return; |
---|
1295 | 1513 | } |
---|
1296 | 1514 | |
---|
1297 | | - pt_config_buffer(buf->cur->table, buf->cur_idx, |
---|
1298 | | - buf->output_off); |
---|
1299 | | - pt_config(event); |
---|
| 1515 | + pt_config_buffer(buf); |
---|
| 1516 | + pt_config_start(event); |
---|
1300 | 1517 | } |
---|
1301 | 1518 | } |
---|
1302 | 1519 | |
---|
.. | .. |
---|
1359 | 1576 | WRITE_ONCE(pt->handle_nmi, 1); |
---|
1360 | 1577 | hwc->state = 0; |
---|
1361 | 1578 | |
---|
1362 | | - pt_config_buffer(buf->cur->table, buf->cur_idx, |
---|
1363 | | - buf->output_off); |
---|
| 1579 | + pt_config_buffer(buf); |
---|
1364 | 1580 | pt_config(event); |
---|
1365 | 1581 | |
---|
1366 | 1582 | return; |
---|
.. | .. |
---|
1409 | 1625 | buf->nr_pages << PAGE_SHIFT); |
---|
1410 | 1626 | perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0)); |
---|
1411 | 1627 | } |
---|
| 1628 | +} |
---|
| 1629 | + |
---|
| 1630 | +static long pt_event_snapshot_aux(struct perf_event *event, |
---|
| 1631 | + struct perf_output_handle *handle, |
---|
| 1632 | + unsigned long size) |
---|
| 1633 | +{ |
---|
| 1634 | + struct pt *pt = this_cpu_ptr(&pt_ctx); |
---|
| 1635 | + struct pt_buffer *buf = perf_get_aux(&pt->handle); |
---|
| 1636 | + unsigned long from = 0, to; |
---|
| 1637 | + long ret; |
---|
| 1638 | + |
---|
| 1639 | + if (WARN_ON_ONCE(!buf)) |
---|
| 1640 | + return 0; |
---|
| 1641 | + |
---|
| 1642 | + /* |
---|
| 1643 | + * Sampling is only allowed on snapshot events; |
---|
| 1644 | + * see pt_buffer_setup_aux(). |
---|
| 1645 | + */ |
---|
| 1646 | + if (WARN_ON_ONCE(!buf->snapshot)) |
---|
| 1647 | + return 0; |
---|
| 1648 | + |
---|
| 1649 | + /* |
---|
| 1650 | + * Here, handle_nmi tells us if the tracing is on |
---|
| 1651 | + */ |
---|
| 1652 | + if (READ_ONCE(pt->handle_nmi)) |
---|
| 1653 | + pt_config_stop(event); |
---|
| 1654 | + |
---|
| 1655 | + pt_read_offset(buf); |
---|
| 1656 | + pt_update_head(pt); |
---|
| 1657 | + |
---|
| 1658 | + to = local_read(&buf->data_size); |
---|
| 1659 | + if (to < size) |
---|
| 1660 | + from = buf->nr_pages << PAGE_SHIFT; |
---|
| 1661 | + from += to - size; |
---|
| 1662 | + |
---|
| 1663 | + ret = perf_output_copy_aux(&pt->handle, handle, from, to); |
---|
| 1664 | + |
---|
| 1665 | + /* |
---|
| 1666 | + * If the tracing was on when we turned up, restart it. |
---|
| 1667 | + * Compiler barrier not needed as we couldn't have been |
---|
| 1668 | + * preempted by anything that touches pt->handle_nmi. |
---|
| 1669 | + */ |
---|
| 1670 | + if (pt->handle_nmi) |
---|
| 1671 | + pt_config_start(event); |
---|
| 1672 | + |
---|
| 1673 | + return ret; |
---|
1412 | 1674 | } |
---|
1413 | 1675 | |
---|
1414 | 1676 | static void pt_event_del(struct perf_event *event, int mode) |
---|
.. | .. |
---|
1479 | 1741 | pt_event_stop(pt->handle.event, PERF_EF_UPDATE); |
---|
1480 | 1742 | } |
---|
1481 | 1743 | |
---|
| 1744 | +int is_intel_pt_event(struct perf_event *event) |
---|
| 1745 | +{ |
---|
| 1746 | + return event->pmu == &pt_pmu.pmu; |
---|
| 1747 | +} |
---|
| 1748 | + |
---|
1482 | 1749 | static __init int pt_init(void) |
---|
1483 | 1750 | { |
---|
1484 | 1751 | int ret, cpu, prior_warn = 0; |
---|
.. | .. |
---|
1509 | 1776 | if (ret) |
---|
1510 | 1777 | return ret; |
---|
1511 | 1778 | |
---|
1512 | | - if (!pt_cap_get(PT_CAP_topa_output)) { |
---|
| 1779 | + if (!intel_pt_validate_hw_cap(PT_CAP_topa_output)) { |
---|
1513 | 1780 | pr_warn("ToPA output is not supported on this CPU\n"); |
---|
1514 | 1781 | return -ENODEV; |
---|
1515 | 1782 | } |
---|
1516 | 1783 | |
---|
1517 | | - if (!pt_cap_get(PT_CAP_topa_multiple_entries)) |
---|
1518 | | - pt_pmu.pmu.capabilities = |
---|
1519 | | - PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_AUX_SW_DOUBLEBUF; |
---|
| 1784 | + if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries)) |
---|
| 1785 | + pt_pmu.pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG; |
---|
1520 | 1786 | |
---|
1521 | 1787 | pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE; |
---|
1522 | 1788 | pt_pmu.pmu.attr_groups = pt_attr_groups; |
---|
.. | .. |
---|
1526 | 1792 | pt_pmu.pmu.del = pt_event_del; |
---|
1527 | 1793 | pt_pmu.pmu.start = pt_event_start; |
---|
1528 | 1794 | pt_pmu.pmu.stop = pt_event_stop; |
---|
| 1795 | + pt_pmu.pmu.snapshot_aux = pt_event_snapshot_aux; |
---|
1529 | 1796 | pt_pmu.pmu.read = pt_event_read; |
---|
1530 | 1797 | pt_pmu.pmu.setup_aux = pt_buffer_setup_aux; |
---|
1531 | 1798 | pt_pmu.pmu.free_aux = pt_buffer_free_aux; |
---|
1532 | 1799 | pt_pmu.pmu.addr_filters_sync = pt_event_addr_filters_sync; |
---|
1533 | 1800 | pt_pmu.pmu.addr_filters_validate = pt_event_addr_filters_validate; |
---|
1534 | 1801 | pt_pmu.pmu.nr_addr_filters = |
---|
1535 | | - pt_cap_get(PT_CAP_num_address_ranges); |
---|
| 1802 | + intel_pt_validate_hw_cap(PT_CAP_num_address_ranges); |
---|
1536 | 1803 | |
---|
1537 | 1804 | ret = perf_pmu_register(&pt_pmu.pmu, "intel_pt", -1); |
---|
1538 | 1805 | |
---|