From 8ac6c7a54ed1b98d142dce24b11c6de6a1e239a5 Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Tue, 22 Oct 2024 10:36:11 +0000 Subject: [PATCH] 修改4g拨号为QMI,需要在系统里后台执行quectel-CM --- kernel/tools/perf/Documentation/perf-record.txt | 198 +++++++++++++++++++++++++++++++++++++++++++++--- 1 files changed, 183 insertions(+), 15 deletions(-) diff --git a/kernel/tools/perf/Documentation/perf-record.txt b/kernel/tools/perf/Documentation/perf-record.txt index edf2be2..768888b 100644 --- a/kernel/tools/perf/Documentation/perf-record.txt +++ b/kernel/tools/perf/Documentation/perf-record.txt @@ -64,6 +64,11 @@ - 'name' : User defined event name. Single quotes (') may be used to escape symbols in the name from parsing by shell and tool like this: name=\'CPU_CLK_UNHALTED.THREAD:cmask=0x1\'. + - 'aux-output': Generate AUX records instead of events. This requires + that an AUX area event is also provided. + - 'aux-sample-size': Set sample size for AUX area sampling. If the + '--aux-sample' option has been used, set aux-sample-size=0 to disable + AUX area sampling for the event. See the linkperf:perf-list[1] man page for more parameters. @@ -91,6 +96,20 @@ 'mem:0x1000:rw'. If you want to profile write accesses in [0x1000~1008), just set 'mem:0x1000/8:w'. + + - a BPF source file (ending in .c) or a precompiled object file (ending + in .o) selects one or more BPF events. + The BPF program can attach to various perf events based on the ELF section + names. + + When processing a '.c' file, perf searches an installed LLVM to compile it + into an object file first. Optional clang options can be passed via the + '--clang-opt' command line option, e.g.: + + perf record --clang-opt "-DLINUX_VERSION_CODE=0x50000" \ + -e tests/bpf-script-example.c + + Note: '--clang-opt' must be placed before '--event/-e'. - a group of events surrounded by a pair of brace ("{event1,event2,...}"). Each event is separated by commas and the group should be quoted to @@ -222,16 +241,22 @@ option and remains only for backward compatibility. See --event. -g:: - Enables call-graph (stack chain/backtrace) recording. + Enables call-graph (stack chain/backtrace) recording for both + kernel space and user space. --call-graph:: Setup and enable call-graph (stack chain/backtrace) recording, - implies -g. Default is "fp". + implies -g. Default is "fp" (for user space). - Allows specifying "fp" (frame pointer) or "dwarf" - (DWARF's CFI - Call Frame Information) or "lbr" - (Hardware Last Branch Record facility) as the method to collect - the information used to show the call graphs. + The unwinding method used for kernel space is dependent on the + unwinder used by the active kernel configuration, i.e + CONFIG_UNWINDER_FRAME_POINTER (fp) or CONFIG_UNWINDER_ORC (orc) + + Any option specified here controls the method used for user space. + + Valid options are "fp" (frame pointer), "dwarf" (DWARF's CFI - + Call Frame Information) or "lbr" (Hardware Last Branch Record + facility). In some systems, where binaries are build with gcc --fomit-frame-pointer, using the "fp" method will produce bogus @@ -370,7 +395,10 @@ abort events and some memory events in precise mode on modern Intel CPUs. --namespaces:: -Record events of type PERF_RECORD_NAMESPACES. +Record events of type PERF_RECORD_NAMESPACES. This enables 'cgroup_id' sort key. + +--all-cgroups:: +Record events of type PERF_RECORD_CGROUP. This enables 'cgroup' sort key. --transaction:: Record transaction flags for transaction related events. @@ -383,8 +411,9 @@ -D:: --delay=:: -After starting the program, wait msecs before measuring. This is useful to -filter out the startup phase of the program, which is often very different. +After starting the program, wait msecs before measuring (-1: start with events +disabled). This is useful to filter out the startup phase of the program, which +is often very different. -I:: --intr-regs:: @@ -396,7 +425,8 @@ --intr-regs=ax,bx. The list of register is architecture dependent. --user-regs:: -Capture user registers at sample time. Same arguments as -I. +Similar to -I, but capture user registers at sample time. To list the available +user registers use --user-regs=\?. --running-time:: Record running and enabled time for read events (:S) @@ -411,9 +441,20 @@ -S:: --snapshot:: Select AUX area tracing Snapshot Mode. This option is valid only with an -AUX area tracing event. Optionally the number of bytes to capture per -snapshot can be specified. In Snapshot Mode, trace data is captured only when -signal SIGUSR2 is received. +AUX area tracing event. Optionally, certain snapshot capturing parameters +can be specified in a string that follows this option: + 'e': take one last snapshot on exit; guarantees that there is at least one + snapshot in the output file; + <size>: if the PMU supports this, specify the desired snapshot size. + +In Snapshot Mode trace data is captured only when signal SIGUSR2 is received +and on exit if the above 'e' option is given. + +--aux-sample[=OPTIONS]:: +Select AUX area sampling. At least one of the events selected by the -e option +must be an AUX area event. Samples on other events will be created containing +data from the AUX area. Optionally sample size may be specified, otherwise it +defaults to 4KiB. --proc-map-timeout:: When processing pre-existing threads /proc/XXX/mmap, it may take a long time, @@ -422,7 +463,9 @@ --switch-events:: Record context switch events i.e. events of type PERF_RECORD_SWITCH or -PERF_RECORD_SWITCH_CPU_WIDE. +PERF_RECORD_SWITCH_CPU_WIDE. In some cases (e.g. Intel PT or CoreSight) +switch events will be enabled automatically, which can be suppressed by +by the option --no-switch-events. --clang-path=PATH:: Path to clang binary to use for compiling BPF scriptlets. @@ -439,11 +482,56 @@ --buildid-all:: Record build-id of all DSOs regardless whether it's actually hit or not. +--aio[=n]:: +Use <n> control blocks in asynchronous (Posix AIO) trace writing mode (default: 1, max: 4). +Asynchronous mode is supported only when linking Perf tool with libc library +providing implementation for Posix AIO API. + +--affinity=mode:: +Set affinity mask of trace reading thread according to the policy defined by 'mode' value: + node - thread affinity mask is set to NUMA node cpu mask of the processed mmap buffer + cpu - thread affinity mask is set to cpu of the processed mmap buffer + +--mmap-flush=number:: + +Specify minimal number of bytes that is extracted from mmap data pages and +processed for output. One can specify the number using B/K/M/G suffixes. + +The maximal allowed value is a quarter of the size of mmaped data pages. + +The default option value is 1 byte which means that every time that the output +writing thread finds some new data in the mmaped buffer the data is extracted, +possibly compressed (-z) and written to the output, perf.data or pipe. + +Larger data chunks are compressed more effectively in comparison to smaller +chunks so extraction of larger chunks from the mmap data pages is preferable +from the perspective of output size reduction. + +Also at some cases executing less output write syscalls with bigger data size +can take less time than executing more output write syscalls with smaller data +size thus lowering runtime profiling overhead. + +-z:: +--compression-level[=n]:: +Produce compressed trace using specified level n (default: 1 - fastest compression, +22 - smallest trace) + --all-kernel:: Configure all used events to run in kernel space. --all-user:: Configure all used events to run in user space. + +--kernel-callchains:: +Collect callchains only from kernel space. I.e. this option sets +perf_event_attr.exclude_callchain_user to 1. + +--user-callchains:: +Collect callchains only from user space. I.e. this option sets +perf_event_attr.exclude_callchain_kernel to 1. + +Don't use both --kernel-callchains and --user-callchains at the same time or no +callchains will be collected. --timestamp-filename Append timestamp to output file name. @@ -475,6 +563,23 @@ --switch-output --no-no-buildid --no-no-buildid-cache +--switch-output-event:: +Events that will cause the switch of the perf.data file, auto-selecting +--switch-output=signal, the results are similar as internally the side band +thread will also send a SIGUSR2 to the main one. + +Uses the same syntax as --event, it will just not be recorded, serving only to +switch the perf.data file as soon as the --switch-output event is processed by +a separate sideband thread. + +This sideband thread is also used to other purposes, like processing the +PERF_RECORD_BPF_EVENT records as they happen, asking the kernel for extra BPF +information, etc. + +--switch-max-files=N:: + +When rotating perf.data with --switch-output, only keep N files. + --dry-run:: Parse options then exit. --dry-run can be used to detect errors in cmdline options. @@ -504,6 +609,69 @@ Implies --tail-synthesize. +--kcore:: +Make a copy of /proc/kcore and place it into a directory with the perf data file. + +--max-size=<size>:: +Limit the sample data max size, <size> is expected to be a number with +appended unit character - B/K/M/G + +--num-thread-synthesize:: + The number of threads to run when synthesizing events for existing processes. + By default, the number of threads equals 1. + +ifdef::HAVE_LIBPFM[] +--pfm-events events:: +Select a PMU event using libpfm4 syntax (see http://perfmon2.sf.net) +including support for event filters. For example '--pfm-events +inst_retired:any_p:u:c=1:i'. More than one event can be passed to the +option using the comma separator. Hardware events and generic hardware +events cannot be mixed together. The latter must be used with the -e +option. The -e option and this one can be mixed and matched. Events +can be grouped using the {} notation. +endif::HAVE_LIBPFM[] + +--control=fifo:ctl-fifo[,ack-fifo]:: +--control=fd:ctl-fd[,ack-fd]:: +ctl-fifo / ack-fifo are opened and used as ctl-fd / ack-fd as follows. +Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, +'disable': disable events, 'snapshot': AUX area tracing snapshot). Measurements can be +started with events disabled using --delay=-1 option. Optionally send control command +completion ('ack\n') to ack-fd descriptor to synchronize with the controlling process. +Example of bash shell script to enable and disable events during measurements: + + #!/bin/bash + + ctl_dir=/tmp/ + + ctl_fifo=${ctl_dir}perf_ctl.fifo + test -p ${ctl_fifo} && unlink ${ctl_fifo} + mkfifo ${ctl_fifo} + exec {ctl_fd}<>${ctl_fifo} + + ctl_ack_fifo=${ctl_dir}perf_ctl_ack.fifo + test -p ${ctl_ack_fifo} && unlink ${ctl_ack_fifo} + mkfifo ${ctl_ack_fifo} + exec {ctl_fd_ack}<>${ctl_ack_fifo} + + perf record -D -1 -e cpu-cycles -a \ + --control fd:${ctl_fd},${ctl_fd_ack} \ + -- sleep 30 & + perf_pid=$! + + sleep 5 && echo 'enable' >&${ctl_fd} && read -u ${ctl_fd_ack} e1 && echo "enabled(${e1})" + sleep 10 && echo 'disable' >&${ctl_fd} && read -u ${ctl_fd_ack} d1 && echo "disabled(${d1})" + + exec {ctl_fd_ack}>&- + unlink ${ctl_ack_fifo} + + exec {ctl_fd}>&- + unlink ${ctl_fifo} + + wait -n ${perf_pid} + exit $? + + SEE ALSO -------- -linkperf:perf-stat[1], linkperf:perf-list[1] +linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-intel-pt[1] -- Gitblit v1.6.2