| .. | .. |
|---|
| 30 | 30 | int (*is_in_guest)(void); |
|---|
| 31 | 31 | int (*is_user_mode)(void); |
|---|
| 32 | 32 | unsigned long (*get_guest_ip)(void); |
|---|
| 33 | + void (*handle_intel_pt_intr)(void); |
|---|
| 33 | 34 | }; |
|---|
| 34 | 35 | |
|---|
| 35 | 36 | #ifdef CONFIG_HAVE_HW_BREAKPOINT |
|---|
| .. | .. |
|---|
| 53 | 54 | #include <linux/atomic.h> |
|---|
| 54 | 55 | #include <linux/sysfs.h> |
|---|
| 55 | 56 | #include <linux/perf_regs.h> |
|---|
| 56 | | -#include <linux/workqueue.h> |
|---|
| 57 | 57 | #include <linux/cgroup.h> |
|---|
| 58 | +#include <linux/refcount.h> |
|---|
| 58 | 59 | #include <linux/security.h> |
|---|
| 59 | 60 | #include <asm/local.h> |
|---|
| 60 | 61 | |
|---|
| 61 | 62 | struct perf_callchain_entry { |
|---|
| 62 | 63 | __u64 nr; |
|---|
| 63 | | - __u64 ip[0]; /* /proc/sys/kernel/perf_event_max_stack */ |
|---|
| 64 | + __u64 ip[]; /* /proc/sys/kernel/perf_event_max_stack */ |
|---|
| 64 | 65 | }; |
|---|
| 65 | 66 | |
|---|
| 66 | 67 | struct perf_callchain_entry_ctx { |
|---|
| .. | .. |
|---|
| 92 | 93 | /* |
|---|
| 93 | 94 | * branch stack layout: |
|---|
| 94 | 95 | * nr: number of taken branches stored in entries[] |
|---|
| 96 | + * hw_idx: The low level index of raw branch records |
|---|
| 97 | + * for the most recent branch. |
|---|
| 98 | + * -1ULL means invalid/unknown. |
|---|
| 95 | 99 | * |
|---|
| 96 | 100 | * Note that nr can vary from sample to sample |
|---|
| 97 | 101 | * branches (to, from) are stored from most recent |
|---|
| 98 | 102 | * to least recent, i.e., entries[0] contains the most |
|---|
| 99 | 103 | * recent branch. |
|---|
| 104 | + * The entries[] is an abstraction of raw branch records, |
|---|
| 105 | + * which may not be stored in age order in HW, e.g. Intel LBR. |
|---|
| 106 | + * The hw_idx is to expose the low level index of raw |
|---|
| 107 | + * branch record for the most recent branch aka entries[0]. |
|---|
| 108 | + * The hw_idx index is between -1 (unknown) and max depth, |
|---|
| 109 | + * which can be retrieved in /sys/devices/cpu/caps/branches. |
|---|
| 110 | + * For the architectures whose raw branch records are |
|---|
| 111 | + * already stored in age order, the hw_idx should be 0. |
|---|
| 100 | 112 | */ |
|---|
| 101 | 113 | struct perf_branch_stack { |
|---|
| 102 | 114 | __u64 nr; |
|---|
| 103 | | - struct perf_branch_entry entries[0]; |
|---|
| 115 | + __u64 hw_idx; |
|---|
| 116 | + struct perf_branch_entry entries[]; |
|---|
| 104 | 117 | }; |
|---|
| 105 | 118 | |
|---|
| 106 | 119 | struct task_struct; |
|---|
| .. | .. |
|---|
| 199 | 212 | */ |
|---|
| 200 | 213 | u64 sample_period; |
|---|
| 201 | 214 | |
|---|
| 202 | | - /* |
|---|
| 203 | | - * The period we started this sample with. |
|---|
| 204 | | - */ |
|---|
| 205 | | - u64 last_period; |
|---|
| 215 | + union { |
|---|
| 216 | + struct { /* Sampling */ |
|---|
| 217 | + /* |
|---|
| 218 | + * The period we started this sample with. |
|---|
| 219 | + */ |
|---|
| 220 | + u64 last_period; |
|---|
| 206 | 221 | |
|---|
| 207 | | - /* |
|---|
| 208 | | - * However much is left of the current period; note that this is |
|---|
| 209 | | - * a full 64bit value and allows for generation of periods longer |
|---|
| 210 | | - * than hardware might allow. |
|---|
| 211 | | - */ |
|---|
| 212 | | - local64_t period_left; |
|---|
| 222 | + /* |
|---|
| 223 | + * However much is left of the current period; |
|---|
| 224 | + * note that this is a full 64bit value and |
|---|
| 225 | + * allows for generation of periods longer |
|---|
| 226 | + * than hardware might allow. |
|---|
| 227 | + */ |
|---|
| 228 | + local64_t period_left; |
|---|
| 229 | + }; |
|---|
| 230 | + struct { /* Topdown events counting for context switch */ |
|---|
| 231 | + u64 saved_metric; |
|---|
| 232 | + u64 saved_slots; |
|---|
| 233 | + }; |
|---|
| 234 | + }; |
|---|
| 213 | 235 | |
|---|
| 214 | 236 | /* |
|---|
| 215 | 237 | * State for throttling the event, see __perf_event_overflow() and |
|---|
| .. | .. |
|---|
| 241 | 263 | #define PERF_PMU_CAP_NO_INTERRUPT 0x01 |
|---|
| 242 | 264 | #define PERF_PMU_CAP_NO_NMI 0x02 |
|---|
| 243 | 265 | #define PERF_PMU_CAP_AUX_NO_SG 0x04 |
|---|
| 244 | | -#define PERF_PMU_CAP_AUX_SW_DOUBLEBUF 0x08 |
|---|
| 266 | +#define PERF_PMU_CAP_EXTENDED_REGS 0x08 |
|---|
| 245 | 267 | #define PERF_PMU_CAP_EXCLUSIVE 0x10 |
|---|
| 246 | 268 | #define PERF_PMU_CAP_ITRACE 0x20 |
|---|
| 247 | 269 | #define PERF_PMU_CAP_HETEROGENEOUS_CPUS 0x40 |
|---|
| 270 | +#define PERF_PMU_CAP_NO_EXCLUDE 0x80 |
|---|
| 271 | +#define PERF_PMU_CAP_AUX_OUTPUT 0x100 |
|---|
| 272 | + |
|---|
| 273 | +struct perf_output_handle; |
|---|
| 248 | 274 | |
|---|
| 249 | 275 | /** |
|---|
| 250 | 276 | * struct pmu - generic performance monitoring unit |
|---|
| .. | .. |
|---|
| 255 | 281 | struct module *module; |
|---|
| 256 | 282 | struct device *dev; |
|---|
| 257 | 283 | const struct attribute_group **attr_groups; |
|---|
| 284 | + const struct attribute_group **attr_update; |
|---|
| 258 | 285 | const char *name; |
|---|
| 259 | 286 | int type; |
|---|
| 260 | 287 | |
|---|
| .. | .. |
|---|
| 263 | 290 | */ |
|---|
| 264 | 291 | int capabilities; |
|---|
| 265 | 292 | |
|---|
| 266 | | - int * __percpu pmu_disable_count; |
|---|
| 267 | | - struct perf_cpu_context * __percpu pmu_cpu_context; |
|---|
| 293 | + int __percpu *pmu_disable_count; |
|---|
| 294 | + struct perf_cpu_context __percpu *pmu_cpu_context; |
|---|
| 268 | 295 | atomic_t exclusive_cnt; /* < 0: cpu; > 0: tsk */ |
|---|
| 269 | 296 | int task_ctx_nr; |
|---|
| 270 | 297 | int hrtimer_interval_ms; |
|---|
| 271 | | - u32 events_across_hotplug:1, |
|---|
| 272 | | - reserved:31; |
|---|
| 273 | 298 | |
|---|
| 274 | 299 | /* number of address filters this PMU can do */ |
|---|
| 275 | 300 | unsigned int nr_addr_filters; |
|---|
| .. | .. |
|---|
| 291 | 316 | * -EBUSY -- @event is for this PMU but PMU temporarily unavailable |
|---|
| 292 | 317 | * -EINVAL -- @event is for this PMU but @event is not valid |
|---|
| 293 | 318 | * -EOPNOTSUPP -- @event is for this PMU, @event is valid, but not supported |
|---|
| 294 | | - * -EACCESS -- @event is for this PMU, @event is valid, but no privilidges |
|---|
| 319 | + * -EACCES -- @event is for this PMU, @event is valid, but no privileges |
|---|
| 295 | 320 | * |
|---|
| 296 | 321 | * 0 -- @event is for this PMU and valid |
|---|
| 297 | 322 | * |
|---|
| .. | .. |
|---|
| 350 | 375 | * ->stop() with PERF_EF_UPDATE will read the counter and update |
|---|
| 351 | 376 | * period/count values like ->read() would. |
|---|
| 352 | 377 | * |
|---|
| 353 | | - * ->start() with PERF_EF_RELOAD will reprogram the the counter |
|---|
| 378 | + * ->start() with PERF_EF_RELOAD will reprogram the counter |
|---|
| 354 | 379 | * value, must be preceded by a ->stop() with PERF_EF_UPDATE. |
|---|
| 355 | 380 | */ |
|---|
| 356 | 381 | void (*start) (struct perf_event *event, int flags); |
|---|
| .. | .. |
|---|
| 403 | 428 | */ |
|---|
| 404 | 429 | void (*sched_task) (struct perf_event_context *ctx, |
|---|
| 405 | 430 | bool sched_in); |
|---|
| 406 | | - /* |
|---|
| 407 | | - * PMU specific data size |
|---|
| 408 | | - */ |
|---|
| 409 | | - size_t task_ctx_size; |
|---|
| 410 | 431 | |
|---|
| 432 | + /* |
|---|
| 433 | + * Kmem cache of PMU specific data |
|---|
| 434 | + */ |
|---|
| 435 | + struct kmem_cache *task_ctx_cache; |
|---|
| 436 | + |
|---|
| 437 | + /* |
|---|
| 438 | + * PMU specific parts of task perf event context (i.e. ctx->task_ctx_data) |
|---|
| 439 | + * can be synchronized using this function. See Intel LBR callstack support |
|---|
| 440 | + * implementation and Perf core context switch handling callbacks for usage |
|---|
| 441 | + * examples. |
|---|
| 442 | + */ |
|---|
| 443 | + void (*swap_task_ctx) (struct perf_event_context *prev, |
|---|
| 444 | + struct perf_event_context *next); |
|---|
| 445 | + /* optional */ |
|---|
| 411 | 446 | |
|---|
| 412 | 447 | /* |
|---|
| 413 | 448 | * Set up pmu-private data structures for an AUX area |
|---|
| .. | .. |
|---|
| 420 | 455 | * Free pmu-private AUX data structures |
|---|
| 421 | 456 | */ |
|---|
| 422 | 457 | void (*free_aux) (void *aux); /* optional */ |
|---|
| 458 | + |
|---|
| 459 | + /* |
|---|
| 460 | + * Take a snapshot of the AUX buffer without touching the event |
|---|
| 461 | + * state, so that preempting ->start()/->stop() callbacks does |
|---|
| 462 | + * not interfere with their logic. Called in PMI context. |
|---|
| 463 | + * |
|---|
| 464 | + * Returns the size of AUX data copied to the output handle. |
|---|
| 465 | + * |
|---|
| 466 | + * Optional. |
|---|
| 467 | + */ |
|---|
| 468 | + long (*snapshot_aux) (struct perf_event *event, |
|---|
| 469 | + struct perf_output_handle *handle, |
|---|
| 470 | + unsigned long size); |
|---|
| 423 | 471 | |
|---|
| 424 | 472 | /* |
|---|
| 425 | 473 | * Validate address range filters: make sure the HW supports the |
|---|
| .. | .. |
|---|
| 447 | 495 | /* optional */ |
|---|
| 448 | 496 | |
|---|
| 449 | 497 | /* |
|---|
| 498 | + * Check if event can be used for aux_output purposes for |
|---|
| 499 | + * events of this PMU. |
|---|
| 500 | + * |
|---|
| 501 | + * Runs from perf_event_open(). Should return 0 for "no match" |
|---|
| 502 | + * or non-zero for "match". |
|---|
| 503 | + */ |
|---|
| 504 | + int (*aux_output_match) (struct perf_event *event); |
|---|
| 505 | + /* optional */ |
|---|
| 506 | + |
|---|
| 507 | + /* |
|---|
| 450 | 508 | * Filter events for PMU-specific reasons. |
|---|
| 451 | 509 | */ |
|---|
| 452 | 510 | int (*filter_match) (struct perf_event *event); /* optional */ |
|---|
| .. | .. |
|---|
| 466 | 524 | /** |
|---|
| 467 | 525 | * struct perf_addr_filter - address range filter definition |
|---|
| 468 | 526 | * @entry: event's filter list linkage |
|---|
| 469 | | - * @inode: object file's inode for file-based filters |
|---|
| 527 | + * @path: object file's path for file-based filters |
|---|
| 470 | 528 | * @offset: filter range offset |
|---|
| 471 | 529 | * @size: filter range size (size==0 means single address trigger) |
|---|
| 472 | 530 | * @action: filter/start/stop |
|---|
| .. | .. |
|---|
| 506 | 564 | * enum perf_event_state - the states of an event: |
|---|
| 507 | 565 | */ |
|---|
| 508 | 566 | enum perf_event_state { |
|---|
| 509 | | - PERF_EVENT_STATE_DORMANT = -5, |
|---|
| 510 | 567 | PERF_EVENT_STATE_DEAD = -4, |
|---|
| 511 | 568 | PERF_EVENT_STATE_EXIT = -3, |
|---|
| 512 | 569 | PERF_EVENT_STATE_ERROR = -2, |
|---|
| .. | .. |
|---|
| 528 | 585 | * PERF_EV_CAP_SOFTWARE: Is a software event. |
|---|
| 529 | 586 | * PERF_EV_CAP_READ_ACTIVE_PKG: A CPU event (or cgroup event) that can be read |
|---|
| 530 | 587 | * from any CPU in the package where it is active. |
|---|
| 588 | + * PERF_EV_CAP_SIBLING: An event with this flag must be a group sibling and |
|---|
| 589 | + * cannot be a group leader. If an event with this flag is detached from the |
|---|
| 590 | + * group it is scheduled out and moved into an unrecoverable ERROR state. |
|---|
| 531 | 591 | */ |
|---|
| 532 | 592 | #define PERF_EV_CAP_SOFTWARE BIT(0) |
|---|
| 533 | 593 | #define PERF_EV_CAP_READ_ACTIVE_PKG BIT(1) |
|---|
| 594 | +#define PERF_EV_CAP_SIBLING BIT(2) |
|---|
| 534 | 595 | |
|---|
| 535 | 596 | #define SWEVENT_HLIST_BITS 8 |
|---|
| 536 | 597 | #define SWEVENT_HLIST_SIZE (1 << SWEVENT_HLIST_BITS) |
|---|
| .. | .. |
|---|
| 545 | 606 | #define PERF_ATTACH_TASK 0x04 |
|---|
| 546 | 607 | #define PERF_ATTACH_TASK_DATA 0x08 |
|---|
| 547 | 608 | #define PERF_ATTACH_ITRACE 0x10 |
|---|
| 609 | +#define PERF_ATTACH_SCHED_CB 0x20 |
|---|
| 548 | 610 | |
|---|
| 549 | 611 | struct perf_cgroup; |
|---|
| 550 | | -struct ring_buffer; |
|---|
| 612 | +struct perf_buffer; |
|---|
| 551 | 613 | |
|---|
| 552 | 614 | struct pmu_event_list { |
|---|
| 553 | 615 | raw_spinlock_t lock; |
|---|
| .. | .. |
|---|
| 619 | 681 | /* |
|---|
| 620 | 682 | * timestamp shadows the actual context timing but it can |
|---|
| 621 | 683 | * be safely used in NMI interrupt context. It reflects the |
|---|
| 622 | | - * context time as it was when the event was last scheduled in. |
|---|
| 684 | + * context time as it was when the event was last scheduled in, |
|---|
| 685 | + * or when ctx_sched_in failed to schedule the event because we |
|---|
| 686 | + * run out of PMC. |
|---|
| 623 | 687 | * |
|---|
| 624 | 688 | * ctx_time already accounts for ctx->timestamp. Therefore to |
|---|
| 625 | 689 | * compute ctx_time for a sample, simply add perf_clock(). |
|---|
| .. | .. |
|---|
| 651 | 715 | |
|---|
| 652 | 716 | int oncpu; |
|---|
| 653 | 717 | int cpu; |
|---|
| 654 | | - cpumask_t readable_on_cpus; |
|---|
| 655 | 718 | |
|---|
| 656 | 719 | struct list_head owner_entry; |
|---|
| 657 | 720 | struct task_struct *owner; |
|---|
| .. | .. |
|---|
| 660 | 723 | struct mutex mmap_mutex; |
|---|
| 661 | 724 | atomic_t mmap_count; |
|---|
| 662 | 725 | |
|---|
| 663 | | - struct ring_buffer *rb; |
|---|
| 726 | + struct perf_buffer *rb; |
|---|
| 664 | 727 | struct list_head rb_entry; |
|---|
| 665 | 728 | unsigned long rcu_batches; |
|---|
| 666 | 729 | int rcu_pending; |
|---|
| .. | .. |
|---|
| 682 | 745 | /* vma address array for file-based filders */ |
|---|
| 683 | 746 | struct perf_addr_filter_range *addr_filter_ranges; |
|---|
| 684 | 747 | unsigned long addr_filters_gen; |
|---|
| 748 | + |
|---|
| 749 | + /* for aux_output events */ |
|---|
| 750 | + struct perf_event *aux_event; |
|---|
| 685 | 751 | |
|---|
| 686 | 752 | void (*destroy)(struct perf_event *); |
|---|
| 687 | 753 | struct rcu_head rcu_head; |
|---|
| .. | .. |
|---|
| 713 | 779 | void *security; |
|---|
| 714 | 780 | #endif |
|---|
| 715 | 781 | struct list_head sb_list; |
|---|
| 716 | | - /* Is this event shared with other events */ |
|---|
| 717 | | - bool shared; |
|---|
| 718 | | - |
|---|
| 719 | | - /* TODO: need to cherry-pick 3d3eb5fb85d97. This is just padding for now |
|---|
| 720 | | - * to reduce the ABI diff */ |
|---|
| 721 | | - struct list_head dormant_event_entry; |
|---|
| 722 | 782 | #endif /* CONFIG_PERF_EVENTS */ |
|---|
| 723 | 783 | }; |
|---|
| 724 | 784 | |
|---|
| .. | .. |
|---|
| 761 | 821 | int nr_stat; |
|---|
| 762 | 822 | int nr_freq; |
|---|
| 763 | 823 | int rotate_disable; |
|---|
| 764 | | - atomic_t refcount; |
|---|
| 824 | + /* |
|---|
| 825 | + * Set when nr_events != nr_active, except tolerant to events not |
|---|
| 826 | + * necessary to be active due to scheduling constraints, such as cgroups. |
|---|
| 827 | + */ |
|---|
| 828 | + int rotate_necessary; |
|---|
| 829 | + refcount_t refcount; |
|---|
| 765 | 830 | struct task_struct *task; |
|---|
| 766 | 831 | |
|---|
| 767 | 832 | /* |
|---|
| .. | .. |
|---|
| 814 | 879 | int sched_cb_usage; |
|---|
| 815 | 880 | |
|---|
| 816 | 881 | int online; |
|---|
| 882 | + /* |
|---|
| 883 | + * Per-CPU storage for iterators used in visit_groups_merge. The default |
|---|
| 884 | + * storage is of size 2 to hold the CPU and any CPU event iterators. |
|---|
| 885 | + */ |
|---|
| 886 | + int heap_size; |
|---|
| 887 | + struct perf_event **heap; |
|---|
| 888 | + struct perf_event *heap_default[2]; |
|---|
| 817 | 889 | }; |
|---|
| 818 | 890 | |
|---|
| 819 | 891 | struct perf_output_handle { |
|---|
| 820 | 892 | struct perf_event *event; |
|---|
| 821 | | - struct ring_buffer *rb; |
|---|
| 893 | + struct perf_buffer *rb; |
|---|
| 822 | 894 | unsigned long wakeup; |
|---|
| 823 | 895 | unsigned long size; |
|---|
| 824 | 896 | u64 aux_flags; |
|---|
| .. | .. |
|---|
| 901 | 973 | extern void perf_sched_cb_inc(struct pmu *pmu); |
|---|
| 902 | 974 | extern int perf_event_task_disable(void); |
|---|
| 903 | 975 | extern int perf_event_task_enable(void); |
|---|
| 976 | + |
|---|
| 977 | +extern void perf_pmu_resched(struct pmu *pmu); |
|---|
| 978 | + |
|---|
| 904 | 979 | extern int perf_event_refresh(struct perf_event *event, int refresh); |
|---|
| 905 | 980 | extern void perf_event_update_userpage(struct perf_event *event); |
|---|
| 906 | 981 | extern int perf_event_release_kernel(struct perf_event *event); |
|---|
| .. | .. |
|---|
| 949 | 1024 | u32 reserved; |
|---|
| 950 | 1025 | } cpu_entry; |
|---|
| 951 | 1026 | struct perf_callchain_entry *callchain; |
|---|
| 1027 | + u64 aux_size; |
|---|
| 952 | 1028 | |
|---|
| 953 | | - /* |
|---|
| 954 | | - * regs_user may point to task_pt_regs or to regs_user_copy, depending |
|---|
| 955 | | - * on arch details. |
|---|
| 956 | | - */ |
|---|
| 957 | 1029 | struct perf_regs regs_user; |
|---|
| 958 | | - struct pt_regs regs_user_copy; |
|---|
| 959 | | - |
|---|
| 960 | 1030 | struct perf_regs regs_intr; |
|---|
| 961 | 1031 | u64 stack_user_size; |
|---|
| 962 | 1032 | |
|---|
| 963 | 1033 | u64 phys_addr; |
|---|
| 1034 | + u64 cgroup; |
|---|
| 964 | 1035 | } ____cacheline_aligned; |
|---|
| 965 | 1036 | |
|---|
| 966 | 1037 | /* default value for data source */ |
|---|
| .. | .. |
|---|
| 1002 | 1073 | extern void perf_event_output_backward(struct perf_event *event, |
|---|
| 1003 | 1074 | struct perf_sample_data *data, |
|---|
| 1004 | 1075 | struct pt_regs *regs); |
|---|
| 1005 | | -extern void perf_event_output(struct perf_event *event, |
|---|
| 1006 | | - struct perf_sample_data *data, |
|---|
| 1007 | | - struct pt_regs *regs); |
|---|
| 1076 | +extern int perf_event_output(struct perf_event *event, |
|---|
| 1077 | + struct perf_sample_data *data, |
|---|
| 1078 | + struct pt_regs *regs); |
|---|
| 1008 | 1079 | |
|---|
| 1009 | 1080 | static inline bool |
|---|
| 1010 | | -is_default_overflow_handler(struct perf_event *event) |
|---|
| 1081 | +__is_default_overflow_handler(perf_overflow_handler_t overflow_handler) |
|---|
| 1011 | 1082 | { |
|---|
| 1012 | | - if (likely(event->overflow_handler == perf_event_output_forward)) |
|---|
| 1083 | + if (likely(overflow_handler == perf_event_output_forward)) |
|---|
| 1013 | 1084 | return true; |
|---|
| 1014 | | - if (unlikely(event->overflow_handler == perf_event_output_backward)) |
|---|
| 1085 | + if (unlikely(overflow_handler == perf_event_output_backward)) |
|---|
| 1015 | 1086 | return true; |
|---|
| 1016 | 1087 | return false; |
|---|
| 1017 | 1088 | } |
|---|
| 1089 | + |
|---|
| 1090 | +#define is_default_overflow_handler(event) \ |
|---|
| 1091 | + __is_default_overflow_handler((event)->overflow_handler) |
|---|
| 1092 | + |
|---|
| 1093 | +#ifdef CONFIG_BPF_SYSCALL |
|---|
| 1094 | +static inline bool uses_default_overflow_handler(struct perf_event *event) |
|---|
| 1095 | +{ |
|---|
| 1096 | + if (likely(is_default_overflow_handler(event))) |
|---|
| 1097 | + return true; |
|---|
| 1098 | + |
|---|
| 1099 | + return __is_default_overflow_handler(event->orig_overflow_handler); |
|---|
| 1100 | +} |
|---|
| 1101 | +#else |
|---|
| 1102 | +#define uses_default_overflow_handler(event) \ |
|---|
| 1103 | + is_default_overflow_handler(event) |
|---|
| 1104 | +#endif |
|---|
| 1018 | 1105 | |
|---|
| 1019 | 1106 | extern void |
|---|
| 1020 | 1107 | perf_event_header__init_id(struct perf_event_header *header, |
|---|
| .. | .. |
|---|
| 1027 | 1114 | |
|---|
| 1028 | 1115 | extern void |
|---|
| 1029 | 1116 | perf_log_lost_samples(struct perf_event *event, u64 lost); |
|---|
| 1117 | + |
|---|
| 1118 | +static inline bool event_has_any_exclude_flag(struct perf_event *event) |
|---|
| 1119 | +{ |
|---|
| 1120 | + struct perf_event_attr *attr = &event->attr; |
|---|
| 1121 | + |
|---|
| 1122 | + return attr->exclude_idle || attr->exclude_user || |
|---|
| 1123 | + attr->exclude_kernel || attr->exclude_hv || |
|---|
| 1124 | + attr->exclude_guest || attr->exclude_host; |
|---|
| 1125 | +} |
|---|
| 1030 | 1126 | |
|---|
| 1031 | 1127 | static inline bool is_sampling_event(struct perf_event *event) |
|---|
| 1032 | 1128 | { |
|---|
| .. | .. |
|---|
| 1064 | 1160 | #endif |
|---|
| 1065 | 1161 | |
|---|
| 1066 | 1162 | /* |
|---|
| 1067 | | - * Take a snapshot of the regs. Skip ip and frame pointer to |
|---|
| 1068 | | - * the nth caller. We only need a few of the regs: |
|---|
| 1163 | + * When generating a perf sample in-line, instead of from an interrupt / |
|---|
| 1164 | + * exception, we lack a pt_regs. This is typically used from software events |
|---|
| 1165 | + * like: SW_CONTEXT_SWITCHES, SW_MIGRATIONS and the tie-in with tracepoints. |
|---|
| 1166 | + * |
|---|
| 1167 | + * We typically don't need a full set, but (for x86) do require: |
|---|
| 1069 | 1168 | * - ip for PERF_SAMPLE_IP |
|---|
| 1070 | 1169 | * - cs for user_mode() tests |
|---|
| 1071 | | - * - bp for callchains |
|---|
| 1072 | | - * - eflags, for future purposes, just in case |
|---|
| 1170 | + * - sp for PERF_SAMPLE_CALLCHAIN |
|---|
| 1171 | + * - eflags for MISC bits and CALLCHAIN (see: perf_hw_regs()) |
|---|
| 1172 | + * |
|---|
| 1173 | + * NOTE: assumes @regs is otherwise already 0 filled; this is important for |
|---|
| 1174 | + * things like PERF_SAMPLE_REGS_INTR. |
|---|
| 1073 | 1175 | */ |
|---|
| 1074 | 1176 | static inline void perf_fetch_caller_regs(struct pt_regs *regs) |
|---|
| 1075 | 1177 | { |
|---|
| .. | .. |
|---|
| 1142 | 1244 | } |
|---|
| 1143 | 1245 | |
|---|
| 1144 | 1246 | extern void perf_event_mmap(struct vm_area_struct *vma); |
|---|
| 1145 | | -extern struct perf_guest_info_callbacks *perf_guest_cbs; |
|---|
| 1247 | + |
|---|
| 1248 | +extern void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, |
|---|
| 1249 | + bool unregister, const char *sym); |
|---|
| 1250 | +extern void perf_event_bpf_event(struct bpf_prog *prog, |
|---|
| 1251 | + enum perf_bpf_event_type type, |
|---|
| 1252 | + u16 flags); |
|---|
| 1253 | + |
|---|
| 1254 | +extern struct perf_guest_info_callbacks __rcu *perf_guest_cbs; |
|---|
| 1255 | +static inline struct perf_guest_info_callbacks *perf_get_guest_cbs(void) |
|---|
| 1256 | +{ |
|---|
| 1257 | + /* |
|---|
| 1258 | + * Callbacks are RCU-protected and must be READ_ONCE to avoid reloading |
|---|
| 1259 | + * the callbacks between a !NULL check and dereferences, to ensure |
|---|
| 1260 | + * pending stores/changes to the callback pointers are visible before a |
|---|
| 1261 | + * non-NULL perf_guest_cbs is visible to readers, and to prevent a |
|---|
| 1262 | + * module from unloading callbacks while readers are active. |
|---|
| 1263 | + */ |
|---|
| 1264 | + return rcu_dereference(perf_guest_cbs); |
|---|
| 1265 | +} |
|---|
| 1146 | 1266 | extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); |
|---|
| 1147 | 1267 | extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); |
|---|
| 1148 | 1268 | |
|---|
| .. | .. |
|---|
| 1150 | 1270 | extern void perf_event_comm(struct task_struct *tsk, bool exec); |
|---|
| 1151 | 1271 | extern void perf_event_namespaces(struct task_struct *tsk); |
|---|
| 1152 | 1272 | extern void perf_event_fork(struct task_struct *tsk); |
|---|
| 1273 | +extern void perf_event_text_poke(const void *addr, |
|---|
| 1274 | + const void *old_bytes, size_t old_len, |
|---|
| 1275 | + const void *new_bytes, size_t new_len); |
|---|
| 1153 | 1276 | |
|---|
| 1154 | 1277 | /* Callchains */ |
|---|
| 1155 | 1278 | DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry); |
|---|
| .. | .. |
|---|
| 1162 | 1285 | extern struct perf_callchain_entry *perf_callchain(struct perf_event *event, struct pt_regs *regs); |
|---|
| 1163 | 1286 | extern int get_callchain_buffers(int max_stack); |
|---|
| 1164 | 1287 | extern void put_callchain_buffers(void); |
|---|
| 1288 | +extern struct perf_callchain_entry *get_callchain_entry(int *rctx); |
|---|
| 1289 | +extern void put_callchain_entry(int rctx); |
|---|
| 1165 | 1290 | |
|---|
| 1166 | 1291 | extern int sysctl_perf_event_max_stack; |
|---|
| 1167 | 1292 | extern int sysctl_perf_event_max_contexts_per_stack; |
|---|
| .. | .. |
|---|
| 1198 | 1323 | |
|---|
| 1199 | 1324 | extern void perf_sample_event_took(u64 sample_len_ns); |
|---|
| 1200 | 1325 | |
|---|
| 1201 | | -extern int perf_proc_update_handler(struct ctl_table *table, int write, |
|---|
| 1202 | | - void __user *buffer, size_t *lenp, |
|---|
| 1203 | | - loff_t *ppos); |
|---|
| 1204 | | -extern int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write, |
|---|
| 1205 | | - void __user *buffer, size_t *lenp, |
|---|
| 1206 | | - loff_t *ppos); |
|---|
| 1207 | | - |
|---|
| 1326 | +int perf_proc_update_handler(struct ctl_table *table, int write, |
|---|
| 1327 | + void *buffer, size_t *lenp, loff_t *ppos); |
|---|
| 1328 | +int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write, |
|---|
| 1329 | + void *buffer, size_t *lenp, loff_t *ppos); |
|---|
| 1208 | 1330 | int perf_event_max_stack_handler(struct ctl_table *table, int write, |
|---|
| 1209 | | - void __user *buffer, size_t *lenp, loff_t *ppos); |
|---|
| 1331 | + void *buffer, size_t *lenp, loff_t *ppos); |
|---|
| 1210 | 1332 | |
|---|
| 1211 | 1333 | /* Access to perf_event_open(2) syscall. */ |
|---|
| 1212 | 1334 | #define PERF_SECURITY_OPEN 0 |
|---|
| .. | .. |
|---|
| 1216 | 1338 | #define PERF_SECURITY_KERNEL 2 |
|---|
| 1217 | 1339 | #define PERF_SECURITY_TRACEPOINT 3 |
|---|
| 1218 | 1340 | |
|---|
| 1219 | | -static inline bool perf_paranoid_any(void) |
|---|
| 1220 | | -{ |
|---|
| 1221 | | - return sysctl_perf_event_paranoid > 2; |
|---|
| 1222 | | -} |
|---|
| 1223 | | - |
|---|
| 1224 | 1341 | static inline int perf_is_paranoid(void) |
|---|
| 1225 | 1342 | { |
|---|
| 1226 | 1343 | return sysctl_perf_event_paranoid > -1; |
|---|
| .. | .. |
|---|
| 1228 | 1345 | |
|---|
| 1229 | 1346 | static inline int perf_allow_kernel(struct perf_event_attr *attr) |
|---|
| 1230 | 1347 | { |
|---|
| 1231 | | - if (sysctl_perf_event_paranoid > 1 && !capable(CAP_SYS_ADMIN)) |
|---|
| 1348 | + if (sysctl_perf_event_paranoid > 1 && !perfmon_capable()) |
|---|
| 1232 | 1349 | return -EACCES; |
|---|
| 1233 | 1350 | |
|---|
| 1234 | 1351 | return security_perf_event_open(attr, PERF_SECURITY_KERNEL); |
|---|
| .. | .. |
|---|
| 1236 | 1353 | |
|---|
| 1237 | 1354 | static inline int perf_allow_cpu(struct perf_event_attr *attr) |
|---|
| 1238 | 1355 | { |
|---|
| 1239 | | - if (sysctl_perf_event_paranoid > 0 && !capable(CAP_SYS_ADMIN)) |
|---|
| 1356 | + if (sysctl_perf_event_paranoid > 0 && !perfmon_capable()) |
|---|
| 1240 | 1357 | return -EACCES; |
|---|
| 1241 | 1358 | |
|---|
| 1242 | 1359 | return security_perf_event_open(attr, PERF_SECURITY_CPU); |
|---|
| .. | .. |
|---|
| 1244 | 1361 | |
|---|
| 1245 | 1362 | static inline int perf_allow_tracepoint(struct perf_event_attr *attr) |
|---|
| 1246 | 1363 | { |
|---|
| 1247 | | - if (sysctl_perf_event_paranoid > -1 && !capable(CAP_SYS_ADMIN)) |
|---|
| 1364 | + if (sysctl_perf_event_paranoid > -1 && !perfmon_capable()) |
|---|
| 1248 | 1365 | return -EPERM; |
|---|
| 1249 | 1366 | |
|---|
| 1250 | 1367 | return security_perf_event_open(attr, PERF_SECURITY_TRACEPOINT); |
|---|
| .. | .. |
|---|
| 1308 | 1425 | extern void perf_event_addr_filters_sync(struct perf_event *event); |
|---|
| 1309 | 1426 | |
|---|
| 1310 | 1427 | extern int perf_output_begin(struct perf_output_handle *handle, |
|---|
| 1428 | + struct perf_sample_data *data, |
|---|
| 1311 | 1429 | struct perf_event *event, unsigned int size); |
|---|
| 1312 | 1430 | extern int perf_output_begin_forward(struct perf_output_handle *handle, |
|---|
| 1313 | | - struct perf_event *event, |
|---|
| 1314 | | - unsigned int size); |
|---|
| 1431 | + struct perf_sample_data *data, |
|---|
| 1432 | + struct perf_event *event, |
|---|
| 1433 | + unsigned int size); |
|---|
| 1315 | 1434 | extern int perf_output_begin_backward(struct perf_output_handle *handle, |
|---|
| 1435 | + struct perf_sample_data *data, |
|---|
| 1316 | 1436 | struct perf_event *event, |
|---|
| 1317 | 1437 | unsigned int size); |
|---|
| 1318 | 1438 | |
|---|
| .. | .. |
|---|
| 1321 | 1441 | const void *buf, unsigned int len); |
|---|
| 1322 | 1442 | extern unsigned int perf_output_skip(struct perf_output_handle *handle, |
|---|
| 1323 | 1443 | unsigned int len); |
|---|
| 1444 | +extern long perf_output_copy_aux(struct perf_output_handle *aux_handle, |
|---|
| 1445 | + struct perf_output_handle *handle, |
|---|
| 1446 | + unsigned long from, unsigned long to); |
|---|
| 1324 | 1447 | extern int perf_swevent_get_recursion_context(void); |
|---|
| 1325 | 1448 | extern void perf_swevent_put_recursion_context(int rctx); |
|---|
| 1326 | 1449 | extern u64 perf_swevent_set_period(struct perf_event *event); |
|---|
| .. | .. |
|---|
| 1330 | 1453 | extern void perf_event_disable_inatomic(struct perf_event *event); |
|---|
| 1331 | 1454 | extern void perf_event_task_tick(void); |
|---|
| 1332 | 1455 | extern int perf_event_account_interrupt(struct perf_event *event); |
|---|
| 1456 | +extern int perf_event_period(struct perf_event *event, u64 value); |
|---|
| 1457 | +extern u64 perf_event_pause(struct perf_event *event, bool reset); |
|---|
| 1333 | 1458 | #else /* !CONFIG_PERF_EVENTS: */ |
|---|
| 1334 | 1459 | static inline void * |
|---|
| 1335 | 1460 | perf_aux_output_begin(struct perf_output_handle *handle, |
|---|
| .. | .. |
|---|
| 1389 | 1514 | (struct perf_guest_info_callbacks *callbacks) { return 0; } |
|---|
| 1390 | 1515 | |
|---|
| 1391 | 1516 | static inline void perf_event_mmap(struct vm_area_struct *vma) { } |
|---|
| 1517 | + |
|---|
| 1518 | +typedef int (perf_ksymbol_get_name_f)(char *name, int name_len, void *data); |
|---|
| 1519 | +static inline void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, |
|---|
| 1520 | + bool unregister, const char *sym) { } |
|---|
| 1521 | +static inline void perf_event_bpf_event(struct bpf_prog *prog, |
|---|
| 1522 | + enum perf_bpf_event_type type, |
|---|
| 1523 | + u16 flags) { } |
|---|
| 1392 | 1524 | static inline void perf_event_exec(void) { } |
|---|
| 1393 | 1525 | static inline void perf_event_comm(struct task_struct *tsk, bool exec) { } |
|---|
| 1394 | 1526 | static inline void perf_event_namespaces(struct task_struct *tsk) { } |
|---|
| 1395 | 1527 | static inline void perf_event_fork(struct task_struct *tsk) { } |
|---|
| 1528 | +static inline void perf_event_text_poke(const void *addr, |
|---|
| 1529 | + const void *old_bytes, |
|---|
| 1530 | + size_t old_len, |
|---|
| 1531 | + const void *new_bytes, |
|---|
| 1532 | + size_t new_len) { } |
|---|
| 1396 | 1533 | static inline void perf_event_init(void) { } |
|---|
| 1397 | 1534 | static inline int perf_swevent_get_recursion_context(void) { return -1; } |
|---|
| 1398 | 1535 | static inline void perf_swevent_put_recursion_context(int rctx) { } |
|---|
| .. | .. |
|---|
| 1402 | 1539 | static inline int __perf_event_disable(void *info) { return -1; } |
|---|
| 1403 | 1540 | static inline void perf_event_task_tick(void) { } |
|---|
| 1404 | 1541 | static inline int perf_event_release_kernel(struct perf_event *event) { return 0; } |
|---|
| 1542 | +static inline int perf_event_period(struct perf_event *event, u64 value) |
|---|
| 1543 | +{ |
|---|
| 1544 | + return -EINVAL; |
|---|
| 1545 | +} |
|---|
| 1546 | +static inline u64 perf_event_pause(struct perf_event *event, bool reset) |
|---|
| 1547 | +{ |
|---|
| 1548 | + return 0; |
|---|
| 1549 | +} |
|---|
| 1405 | 1550 | #endif |
|---|
| 1406 | 1551 | |
|---|
| 1407 | 1552 | #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL) |
|---|
| .. | .. |
|---|
| 1467 | 1612 | #define perf_event_exit_cpu NULL |
|---|
| 1468 | 1613 | #endif |
|---|
| 1469 | 1614 | |
|---|
| 1615 | +extern void __weak arch_perf_update_userpage(struct perf_event *event, |
|---|
| 1616 | + struct perf_event_mmap_page *userpg, |
|---|
| 1617 | + u64 now); |
|---|
| 1618 | + |
|---|
| 1470 | 1619 | #endif /* _LINUX_PERF_EVENT_H */ |
|---|