| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * Intel(R) Processor Trace PMU driver for perf |
|---|
| 3 | 4 | * Copyright (c) 2013-2014, Intel Corporation. |
|---|
| 4 | | - * |
|---|
| 5 | | - * This program is free software; you can redistribute it and/or modify it |
|---|
| 6 | | - * under the terms and conditions of the GNU General Public License, |
|---|
| 7 | | - * version 2, as published by the Free Software Foundation. |
|---|
| 8 | | - * |
|---|
| 9 | | - * This program is distributed in the hope it will be useful, but WITHOUT |
|---|
| 10 | | - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|---|
| 11 | | - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
|---|
| 12 | | - * more details. |
|---|
| 13 | 5 | * |
|---|
| 14 | 6 | * Intel PT is specified in the Intel Architecture Instruction Set Extensions |
|---|
| 15 | 7 | * Programming Reference: |
|---|
| .. | .. |
|---|
| 21 | 13 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
|---|
| 22 | 14 | |
|---|
| 23 | 15 | #include <linux/types.h> |
|---|
| 16 | +#include <linux/bits.h> |
|---|
| 17 | +#include <linux/limits.h> |
|---|
| 24 | 18 | #include <linux/slab.h> |
|---|
| 25 | 19 | #include <linux/device.h> |
|---|
| 26 | 20 | |
|---|
| .. | .. |
|---|
| 68 | 62 | PT_CAP(topa_output, 0, CPUID_ECX, BIT(0)), |
|---|
| 69 | 63 | PT_CAP(topa_multiple_entries, 0, CPUID_ECX, BIT(1)), |
|---|
| 70 | 64 | PT_CAP(single_range_output, 0, CPUID_ECX, BIT(2)), |
|---|
| 65 | + PT_CAP(output_subsys, 0, CPUID_ECX, BIT(3)), |
|---|
| 71 | 66 | PT_CAP(payloads_lip, 0, CPUID_ECX, BIT(31)), |
|---|
| 72 | 67 | PT_CAP(num_address_ranges, 1, CPUID_EAX, 0x7), |
|---|
| 73 | 68 | PT_CAP(mtc_periods, 1, CPUID_EAX, 0xffff0000), |
|---|
| .. | .. |
|---|
| 75 | 70 | PT_CAP(psb_periods, 1, CPUID_EBX, 0xffff0000), |
|---|
| 76 | 71 | }; |
|---|
| 77 | 72 | |
|---|
| 78 | | -static u32 pt_cap_get(enum pt_capabilities cap) |
|---|
| 73 | +u32 intel_pt_validate_cap(u32 *caps, enum pt_capabilities capability) |
|---|
| 79 | 74 | { |
|---|
| 80 | | - struct pt_cap_desc *cd = &pt_caps[cap]; |
|---|
| 81 | | - u32 c = pt_pmu.caps[cd->leaf * PT_CPUID_REGS_NUM + cd->reg]; |
|---|
| 75 | + struct pt_cap_desc *cd = &pt_caps[capability]; |
|---|
| 76 | + u32 c = caps[cd->leaf * PT_CPUID_REGS_NUM + cd->reg]; |
|---|
| 82 | 77 | unsigned int shift = __ffs(cd->mask); |
|---|
| 83 | 78 | |
|---|
| 84 | 79 | return (c & cd->mask) >> shift; |
|---|
| 85 | 80 | } |
|---|
| 81 | +EXPORT_SYMBOL_GPL(intel_pt_validate_cap); |
|---|
| 82 | + |
|---|
| 83 | +u32 intel_pt_validate_hw_cap(enum pt_capabilities cap) |
|---|
| 84 | +{ |
|---|
| 85 | + return intel_pt_validate_cap(pt_pmu.caps, cap); |
|---|
| 86 | +} |
|---|
| 87 | +EXPORT_SYMBOL_GPL(intel_pt_validate_hw_cap); |
|---|
| 86 | 88 | |
|---|
| 87 | 89 | static ssize_t pt_cap_show(struct device *cdev, |
|---|
| 88 | 90 | struct device_attribute *attr, |
|---|
| .. | .. |
|---|
| 92 | 94 | container_of(attr, struct dev_ext_attribute, attr); |
|---|
| 93 | 95 | enum pt_capabilities cap = (long)ea->var; |
|---|
| 94 | 96 | |
|---|
| 95 | | - return snprintf(buf, PAGE_SIZE, "%x\n", pt_cap_get(cap)); |
|---|
| 97 | + return snprintf(buf, PAGE_SIZE, "%x\n", intel_pt_validate_hw_cap(cap)); |
|---|
| 96 | 98 | } |
|---|
| 97 | 99 | |
|---|
| 98 | | -static struct attribute_group pt_cap_group = { |
|---|
| 100 | +static struct attribute_group pt_cap_group __ro_after_init = { |
|---|
| 99 | 101 | .name = "caps", |
|---|
| 100 | 102 | }; |
|---|
| 101 | 103 | |
|---|
| .. | .. |
|---|
| 204 | 206 | |
|---|
| 205 | 207 | /* model-specific quirks */ |
|---|
| 206 | 208 | switch (boot_cpu_data.x86_model) { |
|---|
| 207 | | - case INTEL_FAM6_BROADWELL_CORE: |
|---|
| 208 | | - case INTEL_FAM6_BROADWELL_XEON_D: |
|---|
| 209 | | - case INTEL_FAM6_BROADWELL_GT3E: |
|---|
| 209 | + case INTEL_FAM6_BROADWELL: |
|---|
| 210 | + case INTEL_FAM6_BROADWELL_D: |
|---|
| 211 | + case INTEL_FAM6_BROADWELL_G: |
|---|
| 210 | 212 | case INTEL_FAM6_BROADWELL_X: |
|---|
| 211 | 213 | /* not setting BRANCH_EN will #GP, erratum BDM106 */ |
|---|
| 212 | 214 | pt_pmu.branch_en_always_on = true; |
|---|
| .. | .. |
|---|
| 225 | 227 | if (reg & BIT(14)) |
|---|
| 226 | 228 | pt_pmu.vmx = true; |
|---|
| 227 | 229 | } |
|---|
| 228 | | - |
|---|
| 229 | | - attrs = NULL; |
|---|
| 230 | 230 | |
|---|
| 231 | 231 | for (i = 0; i < PT_CPUID_LEAVES; i++) { |
|---|
| 232 | 232 | cpuid_count(20, i, |
|---|
| .. | .. |
|---|
| 310 | 310 | return false; |
|---|
| 311 | 311 | |
|---|
| 312 | 312 | if (config & RTIT_CTL_CYC_PSB) { |
|---|
| 313 | | - if (!pt_cap_get(PT_CAP_psb_cyc)) |
|---|
| 313 | + if (!intel_pt_validate_hw_cap(PT_CAP_psb_cyc)) |
|---|
| 314 | 314 | return false; |
|---|
| 315 | 315 | |
|---|
| 316 | | - allowed = pt_cap_get(PT_CAP_psb_periods); |
|---|
| 316 | + allowed = intel_pt_validate_hw_cap(PT_CAP_psb_periods); |
|---|
| 317 | 317 | requested = (config & RTIT_CTL_PSB_FREQ) >> |
|---|
| 318 | 318 | RTIT_CTL_PSB_FREQ_OFFSET; |
|---|
| 319 | 319 | if (requested && (!(allowed & BIT(requested)))) |
|---|
| 320 | 320 | return false; |
|---|
| 321 | 321 | |
|---|
| 322 | | - allowed = pt_cap_get(PT_CAP_cycle_thresholds); |
|---|
| 322 | + allowed = intel_pt_validate_hw_cap(PT_CAP_cycle_thresholds); |
|---|
| 323 | 323 | requested = (config & RTIT_CTL_CYC_THRESH) >> |
|---|
| 324 | 324 | RTIT_CTL_CYC_THRESH_OFFSET; |
|---|
| 325 | 325 | if (requested && (!(allowed & BIT(requested)))) |
|---|
| .. | .. |
|---|
| 334 | 334 | * Spec says that setting mtc period bits while mtc bit in |
|---|
| 335 | 335 | * CPUID is 0 will #GP, so better safe than sorry. |
|---|
| 336 | 336 | */ |
|---|
| 337 | | - if (!pt_cap_get(PT_CAP_mtc)) |
|---|
| 337 | + if (!intel_pt_validate_hw_cap(PT_CAP_mtc)) |
|---|
| 338 | 338 | return false; |
|---|
| 339 | 339 | |
|---|
| 340 | | - allowed = pt_cap_get(PT_CAP_mtc_periods); |
|---|
| 340 | + allowed = intel_pt_validate_hw_cap(PT_CAP_mtc_periods); |
|---|
| 341 | 341 | if (!allowed) |
|---|
| 342 | 342 | return false; |
|---|
| 343 | 343 | |
|---|
| .. | .. |
|---|
| 349 | 349 | } |
|---|
| 350 | 350 | |
|---|
| 351 | 351 | if (config & RTIT_CTL_PWR_EVT_EN && |
|---|
| 352 | | - !pt_cap_get(PT_CAP_power_event_trace)) |
|---|
| 352 | + !intel_pt_validate_hw_cap(PT_CAP_power_event_trace)) |
|---|
| 353 | 353 | return false; |
|---|
| 354 | 354 | |
|---|
| 355 | 355 | if (config & RTIT_CTL_PTW) { |
|---|
| 356 | | - if (!pt_cap_get(PT_CAP_ptwrite)) |
|---|
| 356 | + if (!intel_pt_validate_hw_cap(PT_CAP_ptwrite)) |
|---|
| 357 | 357 | return false; |
|---|
| 358 | 358 | |
|---|
| 359 | 359 | /* FUPonPTW without PTW doesn't make sense */ |
|---|
| .. | .. |
|---|
| 396 | 396 | * PT configuration helpers |
|---|
| 397 | 397 | * These all are cpu affine and operate on a local PT |
|---|
| 398 | 398 | */ |
|---|
| 399 | + |
|---|
| 400 | +static void pt_config_start(struct perf_event *event) |
|---|
| 401 | +{ |
|---|
| 402 | + struct pt *pt = this_cpu_ptr(&pt_ctx); |
|---|
| 403 | + u64 ctl = event->hw.config; |
|---|
| 404 | + |
|---|
| 405 | + ctl |= RTIT_CTL_TRACEEN; |
|---|
| 406 | + if (READ_ONCE(pt->vmx_on)) |
|---|
| 407 | + perf_aux_output_flag(&pt->handle, PERF_AUX_FLAG_PARTIAL); |
|---|
| 408 | + else |
|---|
| 409 | + wrmsrl(MSR_IA32_RTIT_CTL, ctl); |
|---|
| 410 | + |
|---|
| 411 | + WRITE_ONCE(event->hw.config, ctl); |
|---|
| 412 | +} |
|---|
| 399 | 413 | |
|---|
| 400 | 414 | /* Address ranges and their corresponding msr configuration registers */ |
|---|
| 401 | 415 | static const struct pt_address_range { |
|---|
| .. | .. |
|---|
| 460 | 474 | pt->filters.filter[range].msr_b = filter->msr_b; |
|---|
| 461 | 475 | } |
|---|
| 462 | 476 | |
|---|
| 463 | | - rtit_ctl |= filter->config << pt_address_ranges[range].reg_off; |
|---|
| 477 | + rtit_ctl |= (u64)filter->config << pt_address_ranges[range].reg_off; |
|---|
| 464 | 478 | } |
|---|
| 465 | 479 | |
|---|
| 466 | 480 | return rtit_ctl; |
|---|
| .. | .. |
|---|
| 469 | 483 | static void pt_config(struct perf_event *event) |
|---|
| 470 | 484 | { |
|---|
| 471 | 485 | struct pt *pt = this_cpu_ptr(&pt_ctx); |
|---|
| 486 | + struct pt_buffer *buf = perf_get_aux(&pt->handle); |
|---|
| 472 | 487 | u64 reg; |
|---|
| 473 | 488 | |
|---|
| 474 | 489 | /* First round: clear STATUS, in particular the PSB byte counter. */ |
|---|
| .. | .. |
|---|
| 478 | 493 | } |
|---|
| 479 | 494 | |
|---|
| 480 | 495 | reg = pt_config_filters(event); |
|---|
| 481 | | - reg |= RTIT_CTL_TOPA | RTIT_CTL_TRACEEN; |
|---|
| 496 | + reg |= RTIT_CTL_TRACEEN; |
|---|
| 497 | + if (!buf->single) |
|---|
| 498 | + reg |= RTIT_CTL_TOPA; |
|---|
| 482 | 499 | |
|---|
| 483 | 500 | /* |
|---|
| 484 | 501 | * Previously, we had BRANCH_EN on by default, but now that PT has |
|---|
| .. | .. |
|---|
| 501 | 518 | reg |= (event->attr.config & PT_CONFIG_MASK); |
|---|
| 502 | 519 | |
|---|
| 503 | 520 | event->hw.config = reg; |
|---|
| 504 | | - if (READ_ONCE(pt->vmx_on)) |
|---|
| 505 | | - perf_aux_output_flag(&pt->handle, PERF_AUX_FLAG_PARTIAL); |
|---|
| 506 | | - else |
|---|
| 507 | | - wrmsrl(MSR_IA32_RTIT_CTL, reg); |
|---|
| 521 | + pt_config_start(event); |
|---|
| 508 | 522 | } |
|---|
| 509 | 523 | |
|---|
| 510 | 524 | static void pt_config_stop(struct perf_event *event) |
|---|
| .. | .. |
|---|
| 533 | 547 | wmb(); |
|---|
| 534 | 548 | } |
|---|
| 535 | 549 | |
|---|
| 536 | | -static void pt_config_buffer(void *buf, unsigned int topa_idx, |
|---|
| 537 | | - unsigned int output_off) |
|---|
| 538 | | -{ |
|---|
| 539 | | - u64 reg; |
|---|
| 540 | | - |
|---|
| 541 | | - wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, virt_to_phys(buf)); |
|---|
| 542 | | - |
|---|
| 543 | | - reg = 0x7f | ((u64)topa_idx << 7) | ((u64)output_off << 32); |
|---|
| 544 | | - |
|---|
| 545 | | - wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, reg); |
|---|
| 546 | | -} |
|---|
| 550 | +/** |
|---|
| 551 | + * struct topa - ToPA metadata |
|---|
| 552 | + * @list: linkage to struct pt_buffer's list of tables |
|---|
| 553 | + * @offset: offset of the first entry in this table in the buffer |
|---|
| 554 | + * @size: total size of all entries in this table |
|---|
| 555 | + * @last: index of the last initialized entry in this table |
|---|
| 556 | + * @z_count: how many times the first entry repeats |
|---|
| 557 | + */ |
|---|
| 558 | +struct topa { |
|---|
| 559 | + struct list_head list; |
|---|
| 560 | + u64 offset; |
|---|
| 561 | + size_t size; |
|---|
| 562 | + int last; |
|---|
| 563 | + unsigned int z_count; |
|---|
| 564 | +}; |
|---|
| 547 | 565 | |
|---|
| 548 | 566 | /* |
|---|
| 549 | 567 | * Keep ToPA table-related metadata on the same page as the actual table, |
|---|
| 550 | 568 | * taking up a few words from the top |
|---|
| 551 | 569 | */ |
|---|
| 552 | 570 | |
|---|
| 553 | | -#define TENTS_PER_PAGE (((PAGE_SIZE - 40) / sizeof(struct topa_entry)) - 1) |
|---|
| 571 | +#define TENTS_PER_PAGE \ |
|---|
| 572 | + ((PAGE_SIZE - sizeof(struct topa)) / sizeof(struct topa_entry)) |
|---|
| 554 | 573 | |
|---|
| 555 | 574 | /** |
|---|
| 556 | | - * struct topa - page-sized ToPA table with metadata at the top |
|---|
| 575 | + * struct topa_page - page-sized ToPA table with metadata at the top |
|---|
| 557 | 576 | * @table: actual ToPA table entries, as understood by PT hardware |
|---|
| 558 | | - * @list: linkage to struct pt_buffer's list of tables |
|---|
| 559 | | - * @phys: physical address of this page |
|---|
| 560 | | - * @offset: offset of the first entry in this table in the buffer |
|---|
| 561 | | - * @size: total size of all entries in this table |
|---|
| 562 | | - * @last: index of the last initialized entry in this table |
|---|
| 577 | + * @topa: metadata |
|---|
| 563 | 578 | */ |
|---|
| 564 | | -struct topa { |
|---|
| 579 | +struct topa_page { |
|---|
| 565 | 580 | struct topa_entry table[TENTS_PER_PAGE]; |
|---|
| 566 | | - struct list_head list; |
|---|
| 567 | | - u64 phys; |
|---|
| 568 | | - u64 offset; |
|---|
| 569 | | - size_t size; |
|---|
| 570 | | - int last; |
|---|
| 581 | + struct topa topa; |
|---|
| 571 | 582 | }; |
|---|
| 572 | 583 | |
|---|
| 584 | +static inline struct topa_page *topa_to_page(struct topa *topa) |
|---|
| 585 | +{ |
|---|
| 586 | + return container_of(topa, struct topa_page, topa); |
|---|
| 587 | +} |
|---|
| 588 | + |
|---|
| 589 | +static inline struct topa_page *topa_entry_to_page(struct topa_entry *te) |
|---|
| 590 | +{ |
|---|
| 591 | + return (struct topa_page *)((unsigned long)te & PAGE_MASK); |
|---|
| 592 | +} |
|---|
| 593 | + |
|---|
| 594 | +static inline phys_addr_t topa_pfn(struct topa *topa) |
|---|
| 595 | +{ |
|---|
| 596 | + return PFN_DOWN(virt_to_phys(topa_to_page(topa))); |
|---|
| 597 | +} |
|---|
| 598 | + |
|---|
| 573 | 599 | /* make -1 stand for the last table entry */ |
|---|
| 574 | | -#define TOPA_ENTRY(t, i) ((i) == -1 ? &(t)->table[(t)->last] : &(t)->table[(i)]) |
|---|
| 600 | +#define TOPA_ENTRY(t, i) \ |
|---|
| 601 | + ((i) == -1 \ |
|---|
| 602 | + ? &topa_to_page(t)->table[(t)->last] \ |
|---|
| 603 | + : &topa_to_page(t)->table[(i)]) |
|---|
| 604 | +#define TOPA_ENTRY_SIZE(t, i) (sizes(TOPA_ENTRY((t), (i))->size)) |
|---|
| 605 | +#define TOPA_ENTRY_PAGES(t, i) (1 << TOPA_ENTRY((t), (i))->size) |
|---|
| 606 | + |
|---|
| 607 | +static void pt_config_buffer(struct pt_buffer *buf) |
|---|
| 608 | +{ |
|---|
| 609 | + struct pt *pt = this_cpu_ptr(&pt_ctx); |
|---|
| 610 | + u64 reg, mask; |
|---|
| 611 | + void *base; |
|---|
| 612 | + |
|---|
| 613 | + if (buf->single) { |
|---|
| 614 | + base = buf->data_pages[0]; |
|---|
| 615 | + mask = (buf->nr_pages * PAGE_SIZE - 1) >> 7; |
|---|
| 616 | + } else { |
|---|
| 617 | + base = topa_to_page(buf->cur)->table; |
|---|
| 618 | + mask = (u64)buf->cur_idx; |
|---|
| 619 | + } |
|---|
| 620 | + |
|---|
| 621 | + reg = virt_to_phys(base); |
|---|
| 622 | + if (pt->output_base != reg) { |
|---|
| 623 | + pt->output_base = reg; |
|---|
| 624 | + wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, reg); |
|---|
| 625 | + } |
|---|
| 626 | + |
|---|
| 627 | + reg = 0x7f | (mask << 7) | ((u64)buf->output_off << 32); |
|---|
| 628 | + if (pt->output_mask != reg) { |
|---|
| 629 | + pt->output_mask = reg; |
|---|
| 630 | + wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, reg); |
|---|
| 631 | + } |
|---|
| 632 | +} |
|---|
| 575 | 633 | |
|---|
| 576 | 634 | /** |
|---|
| 577 | 635 | * topa_alloc() - allocate page-sized ToPA table |
|---|
| .. | .. |
|---|
| 583 | 641 | static struct topa *topa_alloc(int cpu, gfp_t gfp) |
|---|
| 584 | 642 | { |
|---|
| 585 | 643 | int node = cpu_to_node(cpu); |
|---|
| 586 | | - struct topa *topa; |
|---|
| 644 | + struct topa_page *tp; |
|---|
| 587 | 645 | struct page *p; |
|---|
| 588 | 646 | |
|---|
| 589 | 647 | p = alloc_pages_node(node, gfp | __GFP_ZERO, 0); |
|---|
| 590 | 648 | if (!p) |
|---|
| 591 | 649 | return NULL; |
|---|
| 592 | 650 | |
|---|
| 593 | | - topa = page_address(p); |
|---|
| 594 | | - topa->last = 0; |
|---|
| 595 | | - topa->phys = page_to_phys(p); |
|---|
| 651 | + tp = page_address(p); |
|---|
| 652 | + tp->topa.last = 0; |
|---|
| 596 | 653 | |
|---|
| 597 | 654 | /* |
|---|
| 598 | 655 | * In case of singe-entry ToPA, always put the self-referencing END |
|---|
| 599 | 656 | * link as the 2nd entry in the table |
|---|
| 600 | 657 | */ |
|---|
| 601 | | - if (!pt_cap_get(PT_CAP_topa_multiple_entries)) { |
|---|
| 602 | | - TOPA_ENTRY(topa, 1)->base = topa->phys >> TOPA_SHIFT; |
|---|
| 603 | | - TOPA_ENTRY(topa, 1)->end = 1; |
|---|
| 658 | + if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries)) { |
|---|
| 659 | + TOPA_ENTRY(&tp->topa, 1)->base = page_to_phys(p) >> TOPA_SHIFT; |
|---|
| 660 | + TOPA_ENTRY(&tp->topa, 1)->end = 1; |
|---|
| 604 | 661 | } |
|---|
| 605 | 662 | |
|---|
| 606 | | - return topa; |
|---|
| 663 | + return &tp->topa; |
|---|
| 607 | 664 | } |
|---|
| 608 | 665 | |
|---|
| 609 | 666 | /** |
|---|
| .. | .. |
|---|
| 638 | 695 | topa->offset = last->offset + last->size; |
|---|
| 639 | 696 | buf->last = topa; |
|---|
| 640 | 697 | |
|---|
| 641 | | - if (!pt_cap_get(PT_CAP_topa_multiple_entries)) |
|---|
| 698 | + if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries)) |
|---|
| 642 | 699 | return; |
|---|
| 643 | 700 | |
|---|
| 644 | 701 | BUG_ON(last->last != TENTS_PER_PAGE - 1); |
|---|
| 645 | 702 | |
|---|
| 646 | | - TOPA_ENTRY(last, -1)->base = topa->phys >> TOPA_SHIFT; |
|---|
| 703 | + TOPA_ENTRY(last, -1)->base = topa_pfn(topa); |
|---|
| 647 | 704 | TOPA_ENTRY(last, -1)->end = 1; |
|---|
| 648 | 705 | } |
|---|
| 649 | 706 | |
|---|
| .. | .. |
|---|
| 654 | 711 | static bool topa_table_full(struct topa *topa) |
|---|
| 655 | 712 | { |
|---|
| 656 | 713 | /* single-entry ToPA is a special case */ |
|---|
| 657 | | - if (!pt_cap_get(PT_CAP_topa_multiple_entries)) |
|---|
| 714 | + if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries)) |
|---|
| 658 | 715 | return !!topa->last; |
|---|
| 659 | 716 | |
|---|
| 660 | 717 | return topa->last == TENTS_PER_PAGE - 1; |
|---|
| .. | .. |
|---|
| 670 | 727 | * |
|---|
| 671 | 728 | * Return: 0 on success or error code. |
|---|
| 672 | 729 | */ |
|---|
| 673 | | -static int topa_insert_pages(struct pt_buffer *buf, gfp_t gfp) |
|---|
| 730 | +static int topa_insert_pages(struct pt_buffer *buf, int cpu, gfp_t gfp) |
|---|
| 674 | 731 | { |
|---|
| 675 | 732 | struct topa *topa = buf->last; |
|---|
| 676 | 733 | int order = 0; |
|---|
| .. | .. |
|---|
| 681 | 738 | order = page_private(p); |
|---|
| 682 | 739 | |
|---|
| 683 | 740 | if (topa_table_full(topa)) { |
|---|
| 684 | | - topa = topa_alloc(buf->cpu, gfp); |
|---|
| 741 | + topa = topa_alloc(cpu, gfp); |
|---|
| 685 | 742 | if (!topa) |
|---|
| 686 | 743 | return -ENOMEM; |
|---|
| 687 | 744 | |
|---|
| 688 | 745 | topa_insert_table(buf, topa); |
|---|
| 689 | 746 | } |
|---|
| 690 | 747 | |
|---|
| 748 | + if (topa->z_count == topa->last - 1) { |
|---|
| 749 | + if (order == TOPA_ENTRY(topa, topa->last - 1)->size) |
|---|
| 750 | + topa->z_count++; |
|---|
| 751 | + } |
|---|
| 752 | + |
|---|
| 691 | 753 | TOPA_ENTRY(topa, -1)->base = page_to_phys(p) >> TOPA_SHIFT; |
|---|
| 692 | 754 | TOPA_ENTRY(topa, -1)->size = order; |
|---|
| 693 | | - if (!buf->snapshot && !pt_cap_get(PT_CAP_topa_multiple_entries)) { |
|---|
| 755 | + if (!buf->snapshot && |
|---|
| 756 | + !intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries)) { |
|---|
| 694 | 757 | TOPA_ENTRY(topa, -1)->intr = 1; |
|---|
| 695 | 758 | TOPA_ENTRY(topa, -1)->stop = 1; |
|---|
| 696 | 759 | } |
|---|
| .. | .. |
|---|
| 712 | 775 | struct topa *topa; |
|---|
| 713 | 776 | |
|---|
| 714 | 777 | list_for_each_entry(topa, &buf->tables, list) { |
|---|
| 778 | + struct topa_page *tp = topa_to_page(topa); |
|---|
| 715 | 779 | int i; |
|---|
| 716 | 780 | |
|---|
| 717 | | - pr_debug("# table @%p (%016Lx), off %llx size %zx\n", topa->table, |
|---|
| 718 | | - topa->phys, topa->offset, topa->size); |
|---|
| 781 | + pr_debug("# table @%p, off %llx size %zx\n", tp->table, |
|---|
| 782 | + topa->offset, topa->size); |
|---|
| 719 | 783 | for (i = 0; i < TENTS_PER_PAGE; i++) { |
|---|
| 720 | 784 | pr_debug("# entry @%p (%lx sz %u %c%c%c) raw=%16llx\n", |
|---|
| 721 | | - &topa->table[i], |
|---|
| 722 | | - (unsigned long)topa->table[i].base << TOPA_SHIFT, |
|---|
| 723 | | - sizes(topa->table[i].size), |
|---|
| 724 | | - topa->table[i].end ? 'E' : ' ', |
|---|
| 725 | | - topa->table[i].intr ? 'I' : ' ', |
|---|
| 726 | | - topa->table[i].stop ? 'S' : ' ', |
|---|
| 727 | | - *(u64 *)&topa->table[i]); |
|---|
| 728 | | - if ((pt_cap_get(PT_CAP_topa_multiple_entries) && |
|---|
| 729 | | - topa->table[i].stop) || |
|---|
| 730 | | - topa->table[i].end) |
|---|
| 785 | + &tp->table[i], |
|---|
| 786 | + (unsigned long)tp->table[i].base << TOPA_SHIFT, |
|---|
| 787 | + sizes(tp->table[i].size), |
|---|
| 788 | + tp->table[i].end ? 'E' : ' ', |
|---|
| 789 | + tp->table[i].intr ? 'I' : ' ', |
|---|
| 790 | + tp->table[i].stop ? 'S' : ' ', |
|---|
| 791 | + *(u64 *)&tp->table[i]); |
|---|
| 792 | + if ((intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries) && |
|---|
| 793 | + tp->table[i].stop) || |
|---|
| 794 | + tp->table[i].end) |
|---|
| 731 | 795 | break; |
|---|
| 796 | + if (!i && topa->z_count) |
|---|
| 797 | + i += topa->z_count; |
|---|
| 732 | 798 | } |
|---|
| 733 | 799 | } |
|---|
| 734 | 800 | } |
|---|
| .. | .. |
|---|
| 765 | 831 | struct pt_buffer *buf = perf_get_aux(&pt->handle); |
|---|
| 766 | 832 | u64 topa_idx, base, old; |
|---|
| 767 | 833 | |
|---|
| 834 | + if (buf->single) { |
|---|
| 835 | + local_set(&buf->data_size, buf->output_off); |
|---|
| 836 | + return; |
|---|
| 837 | + } |
|---|
| 838 | + |
|---|
| 768 | 839 | /* offset of the first region in this table from the beginning of buf */ |
|---|
| 769 | 840 | base = buf->cur->offset + buf->output_off; |
|---|
| 770 | 841 | |
|---|
| 771 | 842 | /* offset of the current output region within this table */ |
|---|
| 772 | 843 | for (topa_idx = 0; topa_idx < buf->cur_idx; topa_idx++) |
|---|
| 773 | | - base += sizes(buf->cur->table[topa_idx].size); |
|---|
| 844 | + base += TOPA_ENTRY_SIZE(buf->cur, topa_idx); |
|---|
| 774 | 845 | |
|---|
| 775 | 846 | if (buf->snapshot) { |
|---|
| 776 | 847 | local_set(&buf->data_size, base); |
|---|
| .. | .. |
|---|
| 790 | 861 | */ |
|---|
| 791 | 862 | static void *pt_buffer_region(struct pt_buffer *buf) |
|---|
| 792 | 863 | { |
|---|
| 793 | | - return phys_to_virt(buf->cur->table[buf->cur_idx].base << TOPA_SHIFT); |
|---|
| 864 | + return phys_to_virt(TOPA_ENTRY(buf->cur, buf->cur_idx)->base << TOPA_SHIFT); |
|---|
| 794 | 865 | } |
|---|
| 795 | 866 | |
|---|
| 796 | 867 | /** |
|---|
| .. | .. |
|---|
| 799 | 870 | */ |
|---|
| 800 | 871 | static size_t pt_buffer_region_size(struct pt_buffer *buf) |
|---|
| 801 | 872 | { |
|---|
| 802 | | - return sizes(buf->cur->table[buf->cur_idx].size); |
|---|
| 873 | + return TOPA_ENTRY_SIZE(buf->cur, buf->cur_idx); |
|---|
| 803 | 874 | } |
|---|
| 804 | 875 | |
|---|
| 805 | 876 | /** |
|---|
| .. | .. |
|---|
| 828 | 899 | * means we are already losing data; need to let the decoder |
|---|
| 829 | 900 | * know. |
|---|
| 830 | 901 | */ |
|---|
| 831 | | - if (!pt_cap_get(PT_CAP_topa_multiple_entries) || |
|---|
| 832 | | - buf->output_off == sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size)) { |
|---|
| 902 | + if (!buf->single && |
|---|
| 903 | + (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries) || |
|---|
| 904 | + buf->output_off == pt_buffer_region_size(buf))) { |
|---|
| 833 | 905 | perf_aux_output_flag(&pt->handle, |
|---|
| 834 | 906 | PERF_AUX_FLAG_TRUNCATED); |
|---|
| 835 | 907 | advance++; |
|---|
| .. | .. |
|---|
| 840 | 912 | * Also on single-entry ToPA implementations, interrupt will come |
|---|
| 841 | 913 | * before the output reaches its output region's boundary. |
|---|
| 842 | 914 | */ |
|---|
| 843 | | - if (!pt_cap_get(PT_CAP_topa_multiple_entries) && !buf->snapshot && |
|---|
| 915 | + if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries) && |
|---|
| 916 | + !buf->snapshot && |
|---|
| 844 | 917 | pt_buffer_region_size(buf) - buf->output_off <= TOPA_PMI_MARGIN) { |
|---|
| 845 | 918 | void *head = pt_buffer_region(buf); |
|---|
| 846 | 919 | |
|---|
| .. | .. |
|---|
| 865 | 938 | */ |
|---|
| 866 | 939 | static void pt_read_offset(struct pt_buffer *buf) |
|---|
| 867 | 940 | { |
|---|
| 868 | | - u64 offset, base_topa; |
|---|
| 941 | + struct pt *pt = this_cpu_ptr(&pt_ctx); |
|---|
| 942 | + struct topa_page *tp; |
|---|
| 869 | 943 | |
|---|
| 870 | | - rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, base_topa); |
|---|
| 871 | | - buf->cur = phys_to_virt(base_topa); |
|---|
| 944 | + if (!buf->single) { |
|---|
| 945 | + rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, pt->output_base); |
|---|
| 946 | + tp = phys_to_virt(pt->output_base); |
|---|
| 947 | + buf->cur = &tp->topa; |
|---|
| 948 | + } |
|---|
| 872 | 949 | |
|---|
| 873 | | - rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, offset); |
|---|
| 950 | + rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, pt->output_mask); |
|---|
| 874 | 951 | /* offset within current output region */ |
|---|
| 875 | | - buf->output_off = offset >> 32; |
|---|
| 952 | + buf->output_off = pt->output_mask >> 32; |
|---|
| 876 | 953 | /* index of current output region within this table */ |
|---|
| 877 | | - buf->cur_idx = (offset & 0xffffff80) >> 7; |
|---|
| 954 | + if (!buf->single) |
|---|
| 955 | + buf->cur_idx = (pt->output_mask & 0xffffff80) >> 7; |
|---|
| 878 | 956 | } |
|---|
| 879 | 957 | |
|---|
| 880 | | -/** |
|---|
| 881 | | - * pt_topa_next_entry() - obtain index of the first page in the next ToPA entry |
|---|
| 882 | | - * @buf: PT buffer. |
|---|
| 883 | | - * @pg: Page offset in the buffer. |
|---|
| 884 | | - * |
|---|
| 885 | | - * When advancing to the next output region (ToPA entry), given a page offset |
|---|
| 886 | | - * into the buffer, we need to find the offset of the first page in the next |
|---|
| 887 | | - * region. |
|---|
| 888 | | - */ |
|---|
| 889 | | -static unsigned int pt_topa_next_entry(struct pt_buffer *buf, unsigned int pg) |
|---|
| 958 | +static struct topa_entry * |
|---|
| 959 | +pt_topa_entry_for_page(struct pt_buffer *buf, unsigned int pg) |
|---|
| 890 | 960 | { |
|---|
| 891 | | - struct topa_entry *te = buf->topa_index[pg]; |
|---|
| 961 | + struct topa_page *tp; |
|---|
| 962 | + struct topa *topa; |
|---|
| 963 | + unsigned int idx, cur_pg = 0, z_pg = 0, start_idx = 0; |
|---|
| 892 | 964 | |
|---|
| 893 | | - /* one region */ |
|---|
| 894 | | - if (buf->first == buf->last && buf->first->last == 1) |
|---|
| 895 | | - return pg; |
|---|
| 965 | + /* |
|---|
| 966 | + * Indicates a bug in the caller. |
|---|
| 967 | + */ |
|---|
| 968 | + if (WARN_ON_ONCE(pg >= buf->nr_pages)) |
|---|
| 969 | + return NULL; |
|---|
| 896 | 970 | |
|---|
| 897 | | - do { |
|---|
| 898 | | - pg++; |
|---|
| 899 | | - pg &= buf->nr_pages - 1; |
|---|
| 900 | | - } while (buf->topa_index[pg] == te); |
|---|
| 971 | + /* |
|---|
| 972 | + * First, find the ToPA table where @pg fits. With high |
|---|
| 973 | + * order allocations, there shouldn't be many of these. |
|---|
| 974 | + */ |
|---|
| 975 | + list_for_each_entry(topa, &buf->tables, list) { |
|---|
| 976 | + if (topa->offset + topa->size > pg << PAGE_SHIFT) |
|---|
| 977 | + goto found; |
|---|
| 978 | + } |
|---|
| 901 | 979 | |
|---|
| 902 | | - return pg; |
|---|
| 980 | + /* |
|---|
| 981 | + * Hitting this means we have a problem in the ToPA |
|---|
| 982 | + * allocation code. |
|---|
| 983 | + */ |
|---|
| 984 | + WARN_ON_ONCE(1); |
|---|
| 985 | + |
|---|
| 986 | + return NULL; |
|---|
| 987 | + |
|---|
| 988 | +found: |
|---|
| 989 | + /* |
|---|
| 990 | + * Indicates a problem in the ToPA allocation code. |
|---|
| 991 | + */ |
|---|
| 992 | + if (WARN_ON_ONCE(topa->last == -1)) |
|---|
| 993 | + return NULL; |
|---|
| 994 | + |
|---|
| 995 | + tp = topa_to_page(topa); |
|---|
| 996 | + cur_pg = PFN_DOWN(topa->offset); |
|---|
| 997 | + if (topa->z_count) { |
|---|
| 998 | + z_pg = TOPA_ENTRY_PAGES(topa, 0) * (topa->z_count + 1); |
|---|
| 999 | + start_idx = topa->z_count + 1; |
|---|
| 1000 | + } |
|---|
| 1001 | + |
|---|
| 1002 | + /* |
|---|
| 1003 | + * Multiple entries at the beginning of the table have the same size, |
|---|
| 1004 | + * ideally all of them; if @pg falls there, the search is done. |
|---|
| 1005 | + */ |
|---|
| 1006 | + if (pg >= cur_pg && pg < cur_pg + z_pg) { |
|---|
| 1007 | + idx = (pg - cur_pg) / TOPA_ENTRY_PAGES(topa, 0); |
|---|
| 1008 | + return &tp->table[idx]; |
|---|
| 1009 | + } |
|---|
| 1010 | + |
|---|
| 1011 | + /* |
|---|
| 1012 | + * Otherwise, slow path: iterate through the remaining entries. |
|---|
| 1013 | + */ |
|---|
| 1014 | + for (idx = start_idx, cur_pg += z_pg; idx < topa->last; idx++) { |
|---|
| 1015 | + if (cur_pg + TOPA_ENTRY_PAGES(topa, idx) > pg) |
|---|
| 1016 | + return &tp->table[idx]; |
|---|
| 1017 | + |
|---|
| 1018 | + cur_pg += TOPA_ENTRY_PAGES(topa, idx); |
|---|
| 1019 | + } |
|---|
| 1020 | + |
|---|
| 1021 | + /* |
|---|
| 1022 | + * Means we couldn't find a ToPA entry in the table that does match. |
|---|
| 1023 | + */ |
|---|
| 1024 | + WARN_ON_ONCE(1); |
|---|
| 1025 | + |
|---|
| 1026 | + return NULL; |
|---|
| 1027 | +} |
|---|
| 1028 | + |
|---|
| 1029 | +static struct topa_entry * |
|---|
| 1030 | +pt_topa_prev_entry(struct pt_buffer *buf, struct topa_entry *te) |
|---|
| 1031 | +{ |
|---|
| 1032 | + unsigned long table = (unsigned long)te & ~(PAGE_SIZE - 1); |
|---|
| 1033 | + struct topa_page *tp; |
|---|
| 1034 | + struct topa *topa; |
|---|
| 1035 | + |
|---|
| 1036 | + tp = (struct topa_page *)table; |
|---|
| 1037 | + if (tp->table != te) |
|---|
| 1038 | + return --te; |
|---|
| 1039 | + |
|---|
| 1040 | + topa = &tp->topa; |
|---|
| 1041 | + if (topa == buf->first) |
|---|
| 1042 | + topa = buf->last; |
|---|
| 1043 | + else |
|---|
| 1044 | + topa = list_prev_entry(topa, list); |
|---|
| 1045 | + |
|---|
| 1046 | + tp = topa_to_page(topa); |
|---|
| 1047 | + |
|---|
| 1048 | + return &tp->table[topa->last - 1]; |
|---|
| 903 | 1049 | } |
|---|
| 904 | 1050 | |
|---|
| 905 | 1051 | /** |
|---|
| .. | .. |
|---|
| 922 | 1068 | unsigned long head = local64_read(&buf->head); |
|---|
| 923 | 1069 | unsigned long idx, npages, wakeup; |
|---|
| 924 | 1070 | |
|---|
| 1071 | + if (buf->single) |
|---|
| 1072 | + return 0; |
|---|
| 1073 | + |
|---|
| 925 | 1074 | /* can't stop in the middle of an output region */ |
|---|
| 926 | | - if (buf->output_off + handle->size + 1 < |
|---|
| 927 | | - sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size)) { |
|---|
| 1075 | + if (buf->output_off + handle->size + 1 < pt_buffer_region_size(buf)) { |
|---|
| 928 | 1076 | perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); |
|---|
| 929 | 1077 | return -EINVAL; |
|---|
| 930 | 1078 | } |
|---|
| 931 | 1079 | |
|---|
| 932 | 1080 | |
|---|
| 933 | 1081 | /* single entry ToPA is handled by marking all regions STOP=1 INT=1 */ |
|---|
| 934 | | - if (!pt_cap_get(PT_CAP_topa_multiple_entries)) |
|---|
| 1082 | + if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries)) |
|---|
| 935 | 1083 | return 0; |
|---|
| 936 | 1084 | |
|---|
| 937 | 1085 | /* clear STOP and INT from current entry */ |
|---|
| 938 | | - buf->topa_index[buf->stop_pos]->stop = 0; |
|---|
| 939 | | - buf->topa_index[buf->stop_pos]->intr = 0; |
|---|
| 940 | | - buf->topa_index[buf->intr_pos]->intr = 0; |
|---|
| 1086 | + if (buf->stop_te) { |
|---|
| 1087 | + buf->stop_te->stop = 0; |
|---|
| 1088 | + buf->stop_te->intr = 0; |
|---|
| 1089 | + } |
|---|
| 1090 | + |
|---|
| 1091 | + if (buf->intr_te) |
|---|
| 1092 | + buf->intr_te->intr = 0; |
|---|
| 941 | 1093 | |
|---|
| 942 | 1094 | /* how many pages till the STOP marker */ |
|---|
| 943 | 1095 | npages = handle->size >> PAGE_SHIFT; |
|---|
| .. | .. |
|---|
| 948 | 1100 | |
|---|
| 949 | 1101 | idx = (head >> PAGE_SHIFT) + npages; |
|---|
| 950 | 1102 | idx &= buf->nr_pages - 1; |
|---|
| 951 | | - buf->stop_pos = idx; |
|---|
| 1103 | + |
|---|
| 1104 | + if (idx != buf->stop_pos) { |
|---|
| 1105 | + buf->stop_pos = idx; |
|---|
| 1106 | + buf->stop_te = pt_topa_entry_for_page(buf, idx); |
|---|
| 1107 | + buf->stop_te = pt_topa_prev_entry(buf, buf->stop_te); |
|---|
| 1108 | + } |
|---|
| 952 | 1109 | |
|---|
| 953 | 1110 | wakeup = handle->wakeup >> PAGE_SHIFT; |
|---|
| 954 | 1111 | |
|---|
| .. | .. |
|---|
| 958 | 1115 | idx = wakeup; |
|---|
| 959 | 1116 | |
|---|
| 960 | 1117 | idx &= buf->nr_pages - 1; |
|---|
| 961 | | - buf->intr_pos = idx; |
|---|
| 962 | | - |
|---|
| 963 | | - buf->topa_index[buf->stop_pos]->stop = 1; |
|---|
| 964 | | - buf->topa_index[buf->stop_pos]->intr = 1; |
|---|
| 965 | | - buf->topa_index[buf->intr_pos]->intr = 1; |
|---|
| 966 | | - |
|---|
| 967 | | - return 0; |
|---|
| 968 | | -} |
|---|
| 969 | | - |
|---|
| 970 | | -/** |
|---|
| 971 | | - * pt_buffer_setup_topa_index() - build topa_index[] table of regions |
|---|
| 972 | | - * @buf: PT buffer. |
|---|
| 973 | | - * |
|---|
| 974 | | - * topa_index[] references output regions indexed by offset into the |
|---|
| 975 | | - * buffer for purposes of quick reverse lookup. |
|---|
| 976 | | - */ |
|---|
| 977 | | -static void pt_buffer_setup_topa_index(struct pt_buffer *buf) |
|---|
| 978 | | -{ |
|---|
| 979 | | - struct topa *cur = buf->first, *prev = buf->last; |
|---|
| 980 | | - struct topa_entry *te_cur = TOPA_ENTRY(cur, 0), |
|---|
| 981 | | - *te_prev = TOPA_ENTRY(prev, prev->last - 1); |
|---|
| 982 | | - int pg = 0, idx = 0; |
|---|
| 983 | | - |
|---|
| 984 | | - while (pg < buf->nr_pages) { |
|---|
| 985 | | - int tidx; |
|---|
| 986 | | - |
|---|
| 987 | | - /* pages within one topa entry */ |
|---|
| 988 | | - for (tidx = 0; tidx < 1 << te_cur->size; tidx++, pg++) |
|---|
| 989 | | - buf->topa_index[pg] = te_prev; |
|---|
| 990 | | - |
|---|
| 991 | | - te_prev = te_cur; |
|---|
| 992 | | - |
|---|
| 993 | | - if (idx == cur->last - 1) { |
|---|
| 994 | | - /* advance to next topa table */ |
|---|
| 995 | | - idx = 0; |
|---|
| 996 | | - cur = list_entry(cur->list.next, struct topa, list); |
|---|
| 997 | | - } else { |
|---|
| 998 | | - idx++; |
|---|
| 999 | | - } |
|---|
| 1000 | | - te_cur = TOPA_ENTRY(cur, idx); |
|---|
| 1118 | + if (idx != buf->intr_pos) { |
|---|
| 1119 | + buf->intr_pos = idx; |
|---|
| 1120 | + buf->intr_te = pt_topa_entry_for_page(buf, idx); |
|---|
| 1121 | + buf->intr_te = pt_topa_prev_entry(buf, buf->intr_te); |
|---|
| 1001 | 1122 | } |
|---|
| 1002 | 1123 | |
|---|
| 1124 | + buf->stop_te->stop = 1; |
|---|
| 1125 | + buf->stop_te->intr = 1; |
|---|
| 1126 | + buf->intr_te->intr = 1; |
|---|
| 1127 | + |
|---|
| 1128 | + return 0; |
|---|
| 1003 | 1129 | } |
|---|
| 1004 | 1130 | |
|---|
| 1005 | 1131 | /** |
|---|
| .. | .. |
|---|
| 1019 | 1145 | */ |
|---|
| 1020 | 1146 | static void pt_buffer_reset_offsets(struct pt_buffer *buf, unsigned long head) |
|---|
| 1021 | 1147 | { |
|---|
| 1148 | + struct topa_page *cur_tp; |
|---|
| 1149 | + struct topa_entry *te; |
|---|
| 1022 | 1150 | int pg; |
|---|
| 1023 | 1151 | |
|---|
| 1024 | 1152 | if (buf->snapshot) |
|---|
| 1025 | 1153 | head &= (buf->nr_pages << PAGE_SHIFT) - 1; |
|---|
| 1026 | 1154 | |
|---|
| 1027 | | - pg = (head >> PAGE_SHIFT) & (buf->nr_pages - 1); |
|---|
| 1028 | | - pg = pt_topa_next_entry(buf, pg); |
|---|
| 1155 | + if (!buf->single) { |
|---|
| 1156 | + pg = (head >> PAGE_SHIFT) & (buf->nr_pages - 1); |
|---|
| 1157 | + te = pt_topa_entry_for_page(buf, pg); |
|---|
| 1029 | 1158 | |
|---|
| 1030 | | - buf->cur = (struct topa *)((unsigned long)buf->topa_index[pg] & PAGE_MASK); |
|---|
| 1031 | | - buf->cur_idx = ((unsigned long)buf->topa_index[pg] - |
|---|
| 1032 | | - (unsigned long)buf->cur) / sizeof(struct topa_entry); |
|---|
| 1033 | | - buf->output_off = head & (sizes(buf->cur->table[buf->cur_idx].size) - 1); |
|---|
| 1159 | + cur_tp = topa_entry_to_page(te); |
|---|
| 1160 | + buf->cur = &cur_tp->topa; |
|---|
| 1161 | + buf->cur_idx = te - TOPA_ENTRY(buf->cur, 0); |
|---|
| 1162 | + buf->output_off = head & (pt_buffer_region_size(buf) - 1); |
|---|
| 1163 | + } else { |
|---|
| 1164 | + buf->output_off = head; |
|---|
| 1165 | + } |
|---|
| 1034 | 1166 | |
|---|
| 1035 | 1167 | local64_set(&buf->head, head); |
|---|
| 1036 | 1168 | local_set(&buf->data_size, 0); |
|---|
| .. | .. |
|---|
| 1043 | 1175 | static void pt_buffer_fini_topa(struct pt_buffer *buf) |
|---|
| 1044 | 1176 | { |
|---|
| 1045 | 1177 | struct topa *topa, *iter; |
|---|
| 1178 | + |
|---|
| 1179 | + if (buf->single) |
|---|
| 1180 | + return; |
|---|
| 1046 | 1181 | |
|---|
| 1047 | 1182 | list_for_each_entry_safe(topa, iter, &buf->tables, list) { |
|---|
| 1048 | 1183 | /* |
|---|
| .. | .. |
|---|
| 1059 | 1194 | * @size: Total size of all regions within this ToPA. |
|---|
| 1060 | 1195 | * @gfp: Allocation flags. |
|---|
| 1061 | 1196 | */ |
|---|
| 1062 | | -static int pt_buffer_init_topa(struct pt_buffer *buf, unsigned long nr_pages, |
|---|
| 1063 | | - gfp_t gfp) |
|---|
| 1197 | +static int pt_buffer_init_topa(struct pt_buffer *buf, int cpu, |
|---|
| 1198 | + unsigned long nr_pages, gfp_t gfp) |
|---|
| 1064 | 1199 | { |
|---|
| 1065 | 1200 | struct topa *topa; |
|---|
| 1066 | 1201 | int err; |
|---|
| 1067 | 1202 | |
|---|
| 1068 | | - topa = topa_alloc(buf->cpu, gfp); |
|---|
| 1203 | + topa = topa_alloc(cpu, gfp); |
|---|
| 1069 | 1204 | if (!topa) |
|---|
| 1070 | 1205 | return -ENOMEM; |
|---|
| 1071 | 1206 | |
|---|
| 1072 | 1207 | topa_insert_table(buf, topa); |
|---|
| 1073 | 1208 | |
|---|
| 1074 | 1209 | while (buf->nr_pages < nr_pages) { |
|---|
| 1075 | | - err = topa_insert_pages(buf, gfp); |
|---|
| 1210 | + err = topa_insert_pages(buf, cpu, gfp); |
|---|
| 1076 | 1211 | if (err) { |
|---|
| 1077 | 1212 | pt_buffer_fini_topa(buf); |
|---|
| 1078 | 1213 | return -ENOMEM; |
|---|
| 1079 | 1214 | } |
|---|
| 1080 | 1215 | } |
|---|
| 1081 | 1216 | |
|---|
| 1082 | | - pt_buffer_setup_topa_index(buf); |
|---|
| 1083 | | - |
|---|
| 1084 | 1217 | /* link last table to the first one, unless we're double buffering */ |
|---|
| 1085 | | - if (pt_cap_get(PT_CAP_topa_multiple_entries)) { |
|---|
| 1086 | | - TOPA_ENTRY(buf->last, -1)->base = buf->first->phys >> TOPA_SHIFT; |
|---|
| 1218 | + if (intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries)) { |
|---|
| 1219 | + TOPA_ENTRY(buf->last, -1)->base = topa_pfn(buf->first); |
|---|
| 1087 | 1220 | TOPA_ENTRY(buf->last, -1)->end = 1; |
|---|
| 1088 | 1221 | } |
|---|
| 1089 | 1222 | |
|---|
| 1090 | 1223 | pt_topa_dump(buf); |
|---|
| 1091 | 1224 | return 0; |
|---|
| 1225 | +} |
|---|
| 1226 | + |
|---|
| 1227 | +static int pt_buffer_try_single(struct pt_buffer *buf, int nr_pages) |
|---|
| 1228 | +{ |
|---|
| 1229 | + struct page *p = virt_to_page(buf->data_pages[0]); |
|---|
| 1230 | + int ret = -ENOTSUPP, order = 0; |
|---|
| 1231 | + |
|---|
| 1232 | + /* |
|---|
| 1233 | + * We can use single range output mode |
|---|
| 1234 | + * + in snapshot mode, where we don't need interrupts; |
|---|
| 1235 | + * + if the hardware supports it; |
|---|
| 1236 | + * + if the entire buffer is one contiguous allocation. |
|---|
| 1237 | + */ |
|---|
| 1238 | + if (!buf->snapshot) |
|---|
| 1239 | + goto out; |
|---|
| 1240 | + |
|---|
| 1241 | + if (!intel_pt_validate_hw_cap(PT_CAP_single_range_output)) |
|---|
| 1242 | + goto out; |
|---|
| 1243 | + |
|---|
| 1244 | + if (PagePrivate(p)) |
|---|
| 1245 | + order = page_private(p); |
|---|
| 1246 | + |
|---|
| 1247 | + if (1 << order != nr_pages) |
|---|
| 1248 | + goto out; |
|---|
| 1249 | + |
|---|
| 1250 | + /* |
|---|
| 1251 | + * Some processors cannot always support single range for more than |
|---|
| 1252 | + * 4KB - refer errata TGL052, ADL037 and RPL017. Future processors might |
|---|
| 1253 | + * also be affected, so for now rather than trying to keep track of |
|---|
| 1254 | + * which ones, just disable it for all. |
|---|
| 1255 | + */ |
|---|
| 1256 | + if (nr_pages > 1) |
|---|
| 1257 | + goto out; |
|---|
| 1258 | + |
|---|
| 1259 | + buf->single = true; |
|---|
| 1260 | + buf->nr_pages = nr_pages; |
|---|
| 1261 | + ret = 0; |
|---|
| 1262 | +out: |
|---|
| 1263 | + return ret; |
|---|
| 1092 | 1264 | } |
|---|
| 1093 | 1265 | |
|---|
| 1094 | 1266 | /** |
|---|
| .. | .. |
|---|
| 1113 | 1285 | if (!nr_pages) |
|---|
| 1114 | 1286 | return NULL; |
|---|
| 1115 | 1287 | |
|---|
| 1288 | + /* |
|---|
| 1289 | + * Only support AUX sampling in snapshot mode, where we don't |
|---|
| 1290 | + * generate NMIs. |
|---|
| 1291 | + */ |
|---|
| 1292 | + if (event->attr.aux_sample_size && !snapshot) |
|---|
| 1293 | + return NULL; |
|---|
| 1294 | + |
|---|
| 1116 | 1295 | if (cpu == -1) |
|---|
| 1117 | 1296 | cpu = raw_smp_processor_id(); |
|---|
| 1118 | 1297 | node = cpu_to_node(cpu); |
|---|
| 1119 | 1298 | |
|---|
| 1120 | | - buf = kzalloc_node(offsetof(struct pt_buffer, topa_index[nr_pages]), |
|---|
| 1121 | | - GFP_KERNEL, node); |
|---|
| 1299 | + buf = kzalloc_node(sizeof(struct pt_buffer), GFP_KERNEL, node); |
|---|
| 1122 | 1300 | if (!buf) |
|---|
| 1123 | 1301 | return NULL; |
|---|
| 1124 | 1302 | |
|---|
| 1125 | | - buf->cpu = cpu; |
|---|
| 1126 | 1303 | buf->snapshot = snapshot; |
|---|
| 1127 | 1304 | buf->data_pages = pages; |
|---|
| 1305 | + buf->stop_pos = -1; |
|---|
| 1306 | + buf->intr_pos = -1; |
|---|
| 1128 | 1307 | |
|---|
| 1129 | 1308 | INIT_LIST_HEAD(&buf->tables); |
|---|
| 1130 | 1309 | |
|---|
| 1131 | | - ret = pt_buffer_init_topa(buf, nr_pages, GFP_KERNEL); |
|---|
| 1310 | + ret = pt_buffer_try_single(buf, nr_pages); |
|---|
| 1311 | + if (!ret) |
|---|
| 1312 | + return buf; |
|---|
| 1313 | + |
|---|
| 1314 | + ret = pt_buffer_init_topa(buf, cpu, nr_pages, GFP_KERNEL); |
|---|
| 1132 | 1315 | if (ret) { |
|---|
| 1133 | 1316 | kfree(buf); |
|---|
| 1134 | 1317 | return NULL; |
|---|
| .. | .. |
|---|
| 1154 | 1337 | struct pt_filters *filters; |
|---|
| 1155 | 1338 | int node = event->cpu == -1 ? -1 : cpu_to_node(event->cpu); |
|---|
| 1156 | 1339 | |
|---|
| 1157 | | - if (!pt_cap_get(PT_CAP_num_address_ranges)) |
|---|
| 1340 | + if (!intel_pt_validate_hw_cap(PT_CAP_num_address_ranges)) |
|---|
| 1158 | 1341 | return 0; |
|---|
| 1159 | 1342 | |
|---|
| 1160 | 1343 | filters = kzalloc_node(sizeof(struct pt_filters), GFP_KERNEL, node); |
|---|
| .. | .. |
|---|
| 1176 | 1359 | event->hw.addr_filters = NULL; |
|---|
| 1177 | 1360 | } |
|---|
| 1178 | 1361 | |
|---|
| 1179 | | -static inline bool valid_kernel_ip(unsigned long ip) |
|---|
| 1362 | +#ifdef CONFIG_X86_64 |
|---|
| 1363 | +static u64 canonical_address(u64 vaddr, u8 vaddr_bits) |
|---|
| 1180 | 1364 | { |
|---|
| 1181 | | - return virt_addr_valid(ip) && kernel_ip(ip); |
|---|
| 1365 | + return ((s64)vaddr << (64 - vaddr_bits)) >> (64 - vaddr_bits); |
|---|
| 1182 | 1366 | } |
|---|
| 1367 | + |
|---|
| 1368 | +static u64 is_canonical_address(u64 vaddr, u8 vaddr_bits) |
|---|
| 1369 | +{ |
|---|
| 1370 | + return canonical_address(vaddr, vaddr_bits) == vaddr; |
|---|
| 1371 | +} |
|---|
| 1372 | + |
|---|
| 1373 | +/* Clamp to a canonical address greater-than-or-equal-to the address given */ |
|---|
| 1374 | +static u64 clamp_to_ge_canonical_addr(u64 vaddr, u8 vaddr_bits) |
|---|
| 1375 | +{ |
|---|
| 1376 | + return is_canonical_address(vaddr, vaddr_bits) ? |
|---|
| 1377 | + vaddr : |
|---|
| 1378 | + -BIT_ULL(vaddr_bits - 1); |
|---|
| 1379 | +} |
|---|
| 1380 | + |
|---|
| 1381 | +/* Clamp to a canonical address less-than-or-equal-to the address given */ |
|---|
| 1382 | +static u64 clamp_to_le_canonical_addr(u64 vaddr, u8 vaddr_bits) |
|---|
| 1383 | +{ |
|---|
| 1384 | + return is_canonical_address(vaddr, vaddr_bits) ? |
|---|
| 1385 | + vaddr : |
|---|
| 1386 | + BIT_ULL(vaddr_bits - 1) - 1; |
|---|
| 1387 | +} |
|---|
| 1388 | +#else |
|---|
| 1389 | +#define clamp_to_ge_canonical_addr(x, y) (x) |
|---|
| 1390 | +#define clamp_to_le_canonical_addr(x, y) (x) |
|---|
| 1391 | +#endif |
|---|
| 1183 | 1392 | |
|---|
| 1184 | 1393 | static int pt_event_addr_filters_validate(struct list_head *filters) |
|---|
| 1185 | 1394 | { |
|---|
| .. | .. |
|---|
| 1195 | 1404 | filter->action == PERF_ADDR_FILTER_ACTION_START) |
|---|
| 1196 | 1405 | return -EOPNOTSUPP; |
|---|
| 1197 | 1406 | |
|---|
| 1198 | | - if (!filter->path.dentry) { |
|---|
| 1199 | | - if (!valid_kernel_ip(filter->offset)) |
|---|
| 1200 | | - return -EINVAL; |
|---|
| 1201 | | - |
|---|
| 1202 | | - if (!valid_kernel_ip(filter->offset + filter->size)) |
|---|
| 1203 | | - return -EINVAL; |
|---|
| 1204 | | - } |
|---|
| 1205 | | - |
|---|
| 1206 | | - if (++range > pt_cap_get(PT_CAP_num_address_ranges)) |
|---|
| 1407 | + if (++range > intel_pt_validate_hw_cap(PT_CAP_num_address_ranges)) |
|---|
| 1207 | 1408 | return -EOPNOTSUPP; |
|---|
| 1208 | 1409 | } |
|---|
| 1209 | 1410 | |
|---|
| .. | .. |
|---|
| 1226 | 1427 | if (filter->path.dentry && !fr[range].start) { |
|---|
| 1227 | 1428 | msr_a = msr_b = 0; |
|---|
| 1228 | 1429 | } else { |
|---|
| 1229 | | - /* apply the offset */ |
|---|
| 1230 | | - msr_a = fr[range].start; |
|---|
| 1231 | | - msr_b = msr_a + fr[range].size - 1; |
|---|
| 1430 | + unsigned long n = fr[range].size - 1; |
|---|
| 1431 | + unsigned long a = fr[range].start; |
|---|
| 1432 | + unsigned long b; |
|---|
| 1433 | + |
|---|
| 1434 | + if (a > ULONG_MAX - n) |
|---|
| 1435 | + b = ULONG_MAX; |
|---|
| 1436 | + else |
|---|
| 1437 | + b = a + n; |
|---|
| 1438 | + /* |
|---|
| 1439 | + * Apply the offset. 64-bit addresses written to the |
|---|
| 1440 | + * MSRs must be canonical, but the range can encompass |
|---|
| 1441 | + * non-canonical addresses. Since software cannot |
|---|
| 1442 | + * execute at non-canonical addresses, adjusting to |
|---|
| 1443 | + * canonical addresses does not affect the result of the |
|---|
| 1444 | + * address filter. |
|---|
| 1445 | + */ |
|---|
| 1446 | + msr_a = clamp_to_ge_canonical_addr(a, boot_cpu_data.x86_virt_bits); |
|---|
| 1447 | + msr_b = clamp_to_le_canonical_addr(b, boot_cpu_data.x86_virt_bits); |
|---|
| 1448 | + if (msr_b < msr_a) |
|---|
| 1449 | + msr_a = msr_b = 0; |
|---|
| 1232 | 1450 | } |
|---|
| 1233 | 1451 | |
|---|
| 1234 | 1452 | filters->filter[range].msr_a = msr_a; |
|---|
| .. | .. |
|---|
| 1294 | 1512 | return; |
|---|
| 1295 | 1513 | } |
|---|
| 1296 | 1514 | |
|---|
| 1297 | | - pt_config_buffer(buf->cur->table, buf->cur_idx, |
|---|
| 1298 | | - buf->output_off); |
|---|
| 1299 | | - pt_config(event); |
|---|
| 1515 | + pt_config_buffer(buf); |
|---|
| 1516 | + pt_config_start(event); |
|---|
| 1300 | 1517 | } |
|---|
| 1301 | 1518 | } |
|---|
| 1302 | 1519 | |
|---|
| .. | .. |
|---|
| 1359 | 1576 | WRITE_ONCE(pt->handle_nmi, 1); |
|---|
| 1360 | 1577 | hwc->state = 0; |
|---|
| 1361 | 1578 | |
|---|
| 1362 | | - pt_config_buffer(buf->cur->table, buf->cur_idx, |
|---|
| 1363 | | - buf->output_off); |
|---|
| 1579 | + pt_config_buffer(buf); |
|---|
| 1364 | 1580 | pt_config(event); |
|---|
| 1365 | 1581 | |
|---|
| 1366 | 1582 | return; |
|---|
| .. | .. |
|---|
| 1409 | 1625 | buf->nr_pages << PAGE_SHIFT); |
|---|
| 1410 | 1626 | perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0)); |
|---|
| 1411 | 1627 | } |
|---|
| 1628 | +} |
|---|
| 1629 | + |
|---|
| 1630 | +static long pt_event_snapshot_aux(struct perf_event *event, |
|---|
| 1631 | + struct perf_output_handle *handle, |
|---|
| 1632 | + unsigned long size) |
|---|
| 1633 | +{ |
|---|
| 1634 | + struct pt *pt = this_cpu_ptr(&pt_ctx); |
|---|
| 1635 | + struct pt_buffer *buf = perf_get_aux(&pt->handle); |
|---|
| 1636 | + unsigned long from = 0, to; |
|---|
| 1637 | + long ret; |
|---|
| 1638 | + |
|---|
| 1639 | + if (WARN_ON_ONCE(!buf)) |
|---|
| 1640 | + return 0; |
|---|
| 1641 | + |
|---|
| 1642 | + /* |
|---|
| 1643 | + * Sampling is only allowed on snapshot events; |
|---|
| 1644 | + * see pt_buffer_setup_aux(). |
|---|
| 1645 | + */ |
|---|
| 1646 | + if (WARN_ON_ONCE(!buf->snapshot)) |
|---|
| 1647 | + return 0; |
|---|
| 1648 | + |
|---|
| 1649 | + /* |
|---|
| 1650 | + * Here, handle_nmi tells us if the tracing is on |
|---|
| 1651 | + */ |
|---|
| 1652 | + if (READ_ONCE(pt->handle_nmi)) |
|---|
| 1653 | + pt_config_stop(event); |
|---|
| 1654 | + |
|---|
| 1655 | + pt_read_offset(buf); |
|---|
| 1656 | + pt_update_head(pt); |
|---|
| 1657 | + |
|---|
| 1658 | + to = local_read(&buf->data_size); |
|---|
| 1659 | + if (to < size) |
|---|
| 1660 | + from = buf->nr_pages << PAGE_SHIFT; |
|---|
| 1661 | + from += to - size; |
|---|
| 1662 | + |
|---|
| 1663 | + ret = perf_output_copy_aux(&pt->handle, handle, from, to); |
|---|
| 1664 | + |
|---|
| 1665 | + /* |
|---|
| 1666 | + * If the tracing was on when we turned up, restart it. |
|---|
| 1667 | + * Compiler barrier not needed as we couldn't have been |
|---|
| 1668 | + * preempted by anything that touches pt->handle_nmi. |
|---|
| 1669 | + */ |
|---|
| 1670 | + if (pt->handle_nmi) |
|---|
| 1671 | + pt_config_start(event); |
|---|
| 1672 | + |
|---|
| 1673 | + return ret; |
|---|
| 1412 | 1674 | } |
|---|
| 1413 | 1675 | |
|---|
| 1414 | 1676 | static void pt_event_del(struct perf_event *event, int mode) |
|---|
| .. | .. |
|---|
| 1479 | 1741 | pt_event_stop(pt->handle.event, PERF_EF_UPDATE); |
|---|
| 1480 | 1742 | } |
|---|
| 1481 | 1743 | |
|---|
| 1744 | +int is_intel_pt_event(struct perf_event *event) |
|---|
| 1745 | +{ |
|---|
| 1746 | + return event->pmu == &pt_pmu.pmu; |
|---|
| 1747 | +} |
|---|
| 1748 | + |
|---|
| 1482 | 1749 | static __init int pt_init(void) |
|---|
| 1483 | 1750 | { |
|---|
| 1484 | 1751 | int ret, cpu, prior_warn = 0; |
|---|
| .. | .. |
|---|
| 1509 | 1776 | if (ret) |
|---|
| 1510 | 1777 | return ret; |
|---|
| 1511 | 1778 | |
|---|
| 1512 | | - if (!pt_cap_get(PT_CAP_topa_output)) { |
|---|
| 1779 | + if (!intel_pt_validate_hw_cap(PT_CAP_topa_output)) { |
|---|
| 1513 | 1780 | pr_warn("ToPA output is not supported on this CPU\n"); |
|---|
| 1514 | 1781 | return -ENODEV; |
|---|
| 1515 | 1782 | } |
|---|
| 1516 | 1783 | |
|---|
| 1517 | | - if (!pt_cap_get(PT_CAP_topa_multiple_entries)) |
|---|
| 1518 | | - pt_pmu.pmu.capabilities = |
|---|
| 1519 | | - PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_AUX_SW_DOUBLEBUF; |
|---|
| 1784 | + if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries)) |
|---|
| 1785 | + pt_pmu.pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG; |
|---|
| 1520 | 1786 | |
|---|
| 1521 | 1787 | pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE; |
|---|
| 1522 | 1788 | pt_pmu.pmu.attr_groups = pt_attr_groups; |
|---|
| .. | .. |
|---|
| 1526 | 1792 | pt_pmu.pmu.del = pt_event_del; |
|---|
| 1527 | 1793 | pt_pmu.pmu.start = pt_event_start; |
|---|
| 1528 | 1794 | pt_pmu.pmu.stop = pt_event_stop; |
|---|
| 1795 | + pt_pmu.pmu.snapshot_aux = pt_event_snapshot_aux; |
|---|
| 1529 | 1796 | pt_pmu.pmu.read = pt_event_read; |
|---|
| 1530 | 1797 | pt_pmu.pmu.setup_aux = pt_buffer_setup_aux; |
|---|
| 1531 | 1798 | pt_pmu.pmu.free_aux = pt_buffer_free_aux; |
|---|
| 1532 | 1799 | pt_pmu.pmu.addr_filters_sync = pt_event_addr_filters_sync; |
|---|
| 1533 | 1800 | pt_pmu.pmu.addr_filters_validate = pt_event_addr_filters_validate; |
|---|
| 1534 | 1801 | pt_pmu.pmu.nr_addr_filters = |
|---|
| 1535 | | - pt_cap_get(PT_CAP_num_address_ranges); |
|---|
| 1802 | + intel_pt_validate_hw_cap(PT_CAP_num_address_ranges); |
|---|
| 1536 | 1803 | |
|---|
| 1537 | 1804 | ret = perf_pmu_register(&pt_pmu.pmu, "intel_pt", -1); |
|---|
| 1538 | 1805 | |
|---|