~hc/RK356X_SDK_RELEASE.git

..	..	@@ -64,6 +64,11 @@
64	64	- 'name' : User defined event name. Single quotes (') may be used to
65	65	escape symbols in the name from parsing by shell and tool
66	66	like this: name=\'CPU_CLK_UNHALTED.THREAD:cmask=0x1\'.
	67	+ - 'aux-output': Generate AUX records instead of events. This requires
	68	+ that an AUX area event is also provided.
	69	+ - 'aux-sample-size': Set sample size for AUX area sampling. If the
	70	+ '--aux-sample' option has been used, set aux-sample-size=0 to disable
	71	+ AUX area sampling for the event.
67	72
68	73	See the linkperf:perf-list[1] man page for more parameters.
69	74
..	..	@@ -91,6 +96,20 @@
91	96	'mem:0x1000:rw'.
92	97	If you want to profile write accesses in [0x1000~1008), just set
93	98	'mem:0x1000/8:w'.
	99	+
	100	+ - a BPF source file (ending in .c) or a precompiled object file (ending
	101	+ in .o) selects one or more BPF events.
	102	+ The BPF program can attach to various perf events based on the ELF section
	103	+ names.
	104	+
	105	+ When processing a '.c' file, perf searches an installed LLVM to compile it
	106	+ into an object file first. Optional clang options can be passed via the
	107	+ '--clang-opt' command line option, e.g.:
	108	+
	109	+ perf record --clang-opt "-DLINUX_VERSION_CODE=0x50000" \
	110	+ -e tests/bpf-script-example.c
	111	+
	112	+ Note: '--clang-opt' must be placed before '--event/-e'.
94	113
95	114	- a group of events surrounded by a pair of brace ("{event1,event2,...}").
96	115	Each event is separated by commas and the group should be quoted to
..	..	@@ -222,16 +241,22 @@
222	241	option and remains only for backward compatibility. See --event.
223	242
224	243	-g::
225		- Enables call-graph (stack chain/backtrace) recording.
	244	+ Enables call-graph (stack chain/backtrace) recording for both
	245	+ kernel space and user space.
226	246
227	247	--call-graph::
228	248	Setup and enable call-graph (stack chain/backtrace) recording,
229		- implies -g. Default is "fp".
	249	+ implies -g. Default is "fp" (for user space).
230	250
231		- Allows specifying "fp" (frame pointer) or "dwarf"
232		- (DWARF's CFI - Call Frame Information) or "lbr"
233		- (Hardware Last Branch Record facility) as the method to collect
234		- the information used to show the call graphs.
	251	+ The unwinding method used for kernel space is dependent on the
	252	+ unwinder used by the active kernel configuration, i.e
	253	+ CONFIG_UNWINDER_FRAME_POINTER (fp) or CONFIG_UNWINDER_ORC (orc)
	254	+
	255	+ Any option specified here controls the method used for user space.
	256	+
	257	+ Valid options are "fp" (frame pointer), "dwarf" (DWARF's CFI -
	258	+ Call Frame Information) or "lbr" (Hardware Last Branch Record
	259	+ facility).
235	260
236	261	In some systems, where binaries are build with gcc
237	262	--fomit-frame-pointer, using the "fp" method will produce bogus
..	..	@@ -370,7 +395,10 @@
370	395	abort events and some memory events in precise mode on modern Intel CPUs.
371	396
372	397	--namespaces::
373		-Record events of type PERF_RECORD_NAMESPACES.
	398	+Record events of type PERF_RECORD_NAMESPACES. This enables 'cgroup_id' sort key.
	399	+
	400	+--all-cgroups::
	401	+Record events of type PERF_RECORD_CGROUP. This enables 'cgroup' sort key.
374	402
375	403	--transaction::
376	404	Record transaction flags for transaction related events.
..	..	@@ -383,8 +411,9 @@
383	411
384	412	-D::
385	413	--delay=::
386		-After starting the program, wait msecs before measuring. This is useful to
387		-filter out the startup phase of the program, which is often very different.
	414	+After starting the program, wait msecs before measuring (-1: start with events
	415	+disabled). This is useful to filter out the startup phase of the program, which
	416	+is often very different.
388	417
389	418	-I::
390	419	--intr-regs::
..	..	@@ -396,7 +425,8 @@
396	425	--intr-regs=ax,bx. The list of register is architecture dependent.
397	426
398	427	--user-regs::
399		-Capture user registers at sample time. Same arguments as -I.
	428	+Similar to -I, but capture user registers at sample time. To list the available
	429	+user registers use --user-regs=\?.
400	430
401	431	--running-time::
402	432	Record running and enabled time for read events (:S)
..	..	@@ -411,9 +441,20 @@
411	441	-S::
412	442	--snapshot::
413	443	Select AUX area tracing Snapshot Mode. This option is valid only with an
414		-AUX area tracing event. Optionally the number of bytes to capture per
415		-snapshot can be specified. In Snapshot Mode, trace data is captured only when
416		-signal SIGUSR2 is received.
	444	+AUX area tracing event. Optionally, certain snapshot capturing parameters
	445	+can be specified in a string that follows this option:
	446	+ 'e': take one last snapshot on exit; guarantees that there is at least one
	447	+ snapshot in the output file;
	448	+ <size>: if the PMU supports this, specify the desired snapshot size.
	449	+
	450	+In Snapshot Mode trace data is captured only when signal SIGUSR2 is received
	451	+and on exit if the above 'e' option is given.
	452	+
	453	+--aux-sample[=OPTIONS]::
	454	+Select AUX area sampling. At least one of the events selected by the -e option
	455	+must be an AUX area event. Samples on other events will be created containing
	456	+data from the AUX area. Optionally sample size may be specified, otherwise it
	457	+defaults to 4KiB.
417	458
418	459	--proc-map-timeout::
419	460	When processing pre-existing threads /proc/XXX/mmap, it may take a long time,
..	..	@@ -422,7 +463,9 @@
422	463
423	464	--switch-events::
424	465	Record context switch events i.e. events of type PERF_RECORD_SWITCH or
425		-PERF_RECORD_SWITCH_CPU_WIDE.
	466	+PERF_RECORD_SWITCH_CPU_WIDE. In some cases (e.g. Intel PT or CoreSight)
	467	+switch events will be enabled automatically, which can be suppressed by
	468	+by the option --no-switch-events.
426	469
427	470	--clang-path=PATH::
428	471	Path to clang binary to use for compiling BPF scriptlets.
..	..	@@ -439,11 +482,56 @@
439	482	--buildid-all::
440	483	Record build-id of all DSOs regardless whether it's actually hit or not.
441	484
	485	+--aio[=n]::
	486	+Use <n> control blocks in asynchronous (Posix AIO) trace writing mode (default: 1, max: 4).
	487	+Asynchronous mode is supported only when linking Perf tool with libc library
	488	+providing implementation for Posix AIO API.
	489	+
	490	+--affinity=mode::
	491	+Set affinity mask of trace reading thread according to the policy defined by 'mode' value:
	492	+ node - thread affinity mask is set to NUMA node cpu mask of the processed mmap buffer
	493	+ cpu - thread affinity mask is set to cpu of the processed mmap buffer
	494	+
	495	+--mmap-flush=number::
	496	+
	497	+Specify minimal number of bytes that is extracted from mmap data pages and
	498	+processed for output. One can specify the number using B/K/M/G suffixes.
	499	+
	500	+The maximal allowed value is a quarter of the size of mmaped data pages.
	501	+
	502	+The default option value is 1 byte which means that every time that the output
	503	+writing thread finds some new data in the mmaped buffer the data is extracted,
	504	+possibly compressed (-z) and written to the output, perf.data or pipe.
	505	+
	506	+Larger data chunks are compressed more effectively in comparison to smaller
	507	+chunks so extraction of larger chunks from the mmap data pages is preferable
	508	+from the perspective of output size reduction.
	509	+
	510	+Also at some cases executing less output write syscalls with bigger data size
	511	+can take less time than executing more output write syscalls with smaller data
	512	+size thus lowering runtime profiling overhead.
	513	+
	514	+-z::
	515	+--compression-level[=n]::
	516	+Produce compressed trace using specified level n (default: 1 - fastest compression,
	517	+22 - smallest trace)
	518	+
442	519	--all-kernel::
443	520	Configure all used events to run in kernel space.
444	521
445	522	--all-user::
446	523	Configure all used events to run in user space.
	524	+
	525	+--kernel-callchains::
	526	+Collect callchains only from kernel space. I.e. this option sets
	527	+perf_event_attr.exclude_callchain_user to 1.
	528	+
	529	+--user-callchains::
	530	+Collect callchains only from user space. I.e. this option sets
	531	+perf_event_attr.exclude_callchain_kernel to 1.
	532	+
	533	+Don't use both --kernel-callchains and --user-callchains at the same time or no
	534	+callchains will be collected.
447	535
448	536	--timestamp-filename
449	537	Append timestamp to output file name.
..	..	@@ -475,6 +563,23 @@
475	563
476	564	--switch-output --no-no-buildid --no-no-buildid-cache
477	565
	566	+--switch-output-event::
	567	+Events that will cause the switch of the perf.data file, auto-selecting
	568	+--switch-output=signal, the results are similar as internally the side band
	569	+thread will also send a SIGUSR2 to the main one.
	570	+
	571	+Uses the same syntax as --event, it will just not be recorded, serving only to
	572	+switch the perf.data file as soon as the --switch-output event is processed by
	573	+a separate sideband thread.
	574	+
	575	+This sideband thread is also used to other purposes, like processing the
	576	+PERF_RECORD_BPF_EVENT records as they happen, asking the kernel for extra BPF
	577	+information, etc.
	578	+
	579	+--switch-max-files=N::
	580	+
	581	+When rotating perf.data with --switch-output, only keep N files.
	582	+
478	583	--dry-run::
479	584	Parse options then exit. --dry-run can be used to detect errors in cmdline
480	585	options.
..	..	@@ -504,6 +609,69 @@
504	609
505	610	Implies --tail-synthesize.
506	611
	612	+--kcore::
	613	+Make a copy of /proc/kcore and place it into a directory with the perf data file.
	614	+
	615	+--max-size=<size>::
	616	+Limit the sample data max size, <size> is expected to be a number with
	617	+appended unit character - B/K/M/G
	618	+
	619	+--num-thread-synthesize::
	620	+ The number of threads to run when synthesizing events for existing processes.
	621	+ By default, the number of threads equals 1.
	622	+
	623	+ifdef::HAVE_LIBPFM[]
	624	+--pfm-events events::
	625	+Select a PMU event using libpfm4 syntax (see http://perfmon2.sf.net)
	626	+including support for event filters. For example '--pfm-events
	627	+inst_retired:any_p:u:c=1:i'. More than one event can be passed to the
	628	+option using the comma separator. Hardware events and generic hardware
	629	+events cannot be mixed together. The latter must be used with the -e
	630	+option. The -e option and this one can be mixed and matched. Events
	631	+can be grouped using the {} notation.
	632	+endif::HAVE_LIBPFM[]
	633	+
	634	+--control=fifo:ctl-fifo[,ack-fifo]::
	635	+--control=fd:ctl-fd[,ack-fd]::
	636	+ctl-fifo / ack-fifo are opened and used as ctl-fd / ack-fd as follows.
	637	+Listen on ctl-fd descriptor for command to control measurement ('enable': enable events,
	638	+'disable': disable events, 'snapshot': AUX area tracing snapshot). Measurements can be
	639	+started with events disabled using --delay=-1 option. Optionally send control command
	640	+completion ('ack\n') to ack-fd descriptor to synchronize with the controlling process.
	641	+Example of bash shell script to enable and disable events during measurements:
	642	+
	643	+ #!/bin/bash
	644	+
	645	+ ctl_dir=/tmp/
	646	+
	647	+ ctl_fifo=${ctl_dir}perf_ctl.fifo
	648	+ test -p ${ctl_fifo} && unlink ${ctl_fifo}
	649	+ mkfifo ${ctl_fifo}
	650	+ exec {ctl_fd}<>${ctl_fifo}
	651	+
	652	+ ctl_ack_fifo=${ctl_dir}perf_ctl_ack.fifo
	653	+ test -p ${ctl_ack_fifo} && unlink ${ctl_ack_fifo}
	654	+ mkfifo ${ctl_ack_fifo}
	655	+ exec {ctl_fd_ack}<>${ctl_ack_fifo}
	656	+
	657	+ perf record -D -1 -e cpu-cycles -a \
	658	+ --control fd:${ctl_fd},${ctl_fd_ack} \
	659	+ -- sleep 30 &
	660	+ perf_pid=$!
	661	+
	662	+ sleep 5 && echo 'enable' >&${ctl_fd} && read -u ${ctl_fd_ack} e1 && echo "enabled(${e1})"
	663	+ sleep 10 && echo 'disable' >&${ctl_fd} && read -u ${ctl_fd_ack} d1 && echo "disabled(${d1})"
	664	+
	665	+ exec {ctl_fd_ack}>&-
	666	+ unlink ${ctl_ack_fifo}
	667	+
	668	+ exec {ctl_fd}>&-
	669	+ unlink ${ctl_fifo}
	670	+
	671	+ wait -n ${perf_pid}
	672	+ exit $?
	673	+
	674	+
507	675	SEE ALSO
508	676	--------
509		-linkperf:perf-stat[1], linkperf:perf-list[1]
	677	+linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-intel-pt[1]