[PATCH v9 9/9] perf test: Add Arm CoreSight callchain test
James Clark
james.clark at linaro.org
Wed Jun 17 03:03:07 PDT 2026
On 16/06/2026 3:51 pm, Leo Yan wrote:
> Add a CoreSight shell test for synthesized callchains.
>
> The test uses the new callchain workload to generate trace and decodes
> it with synthesis callchain. It then verifies that the instruction
> samples show the expected callchain push and pop.
>
> Use control FIFOs so tracing starts only around the workload, which
> keeps the trace data small. The test is limited to with the cs_etm
> event available and root permission.
>
> After:
>
> perf test 138 -vvv
> 138: CoreSight synthesized callchain:
> ---- start ----
> test child forked, pid 35581
> Callchain flow matched:
> l1=4642868 l2=4642880 l3=4642895 l4=4642919 l5=4670494 l6=4670500 l7=4670520
> ---- end(0) ----
> 138: CoreSight synthesized callchain : Ok
>
> Assisted-by: Codex:GPT-5.5
> Signed-off-by: Leo Yan <leo.yan at arm.com>
> ---
> tools/perf/Documentation/perf-test.txt | 6 +-
> tools/perf/tests/builtin-test.c | 1 +
> tools/perf/tests/shell/coresight/callchain.sh | 172 ++++++++++++++++++++++++++
> tools/perf/tests/tests.h | 1 +
> tools/perf/tests/workloads/Build | 2 +
> tools/perf/tests/workloads/callchain.c | 33 +++++
> 6 files changed, 213 insertions(+), 2 deletions(-)
>
> diff --git a/tools/perf/Documentation/perf-test.txt b/tools/perf/Documentation/perf-test.txt
> index 81c8525f594680d814f80e6f88bcce8d867bb350..859df74e62efc4b1e80da13ae8e053356f68ae54 100644
> --- a/tools/perf/Documentation/perf-test.txt
> +++ b/tools/perf/Documentation/perf-test.txt
> @@ -57,7 +57,8 @@ OPTIONS
> --workload=::
> Run a built-in workload, to list them use '--list-workloads', current
> ones include: noploop, thloop, leafloop, sqrtloop, brstack, datasym,
> - context_switch_loop, deterministic, named_threads and landlock.
> + context_switch_loop, deterministic, named_threads, landlock and
> + callchain.
>
> Used with the shell script regression tests.
>
> @@ -69,7 +70,8 @@ OPTIONS
> 'named_threads' accepts the number of threads and the number of loops to
> do in each thread.
>
> - The datasym, landlock and deterministic workloads don't accept any.
> + The datasym, landlock, deterministic and callchain workloads don't accept
> + any.
>
> --list-workloads::
> List the available workloads to use with -w/--workload.
> diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
> index 7e75f590f225e3284980829707ca8d916c98cada..1d1f38127e05429a27f31beda814f2b5f5a75089 100644
> --- a/tools/perf/tests/builtin-test.c
> +++ b/tools/perf/tests/builtin-test.c
> @@ -168,6 +168,7 @@ static struct test_workload *workloads[] = {
> &workload__jitdump,
> &workload__context_switch_loop,
> &workload__deterministic,
> + &workload__callchain,
>
> #ifdef HAVE_RUST_SUPPORT
> &workload__code_with_type,
> diff --git a/tools/perf/tests/shell/coresight/callchain.sh b/tools/perf/tests/shell/coresight/callchain.sh
> new file mode 100755
> index 0000000000000000000000000000000000000000..13cca7dc11184002e3ddc058c0d0ffa1c458c483
> --- /dev/null
> +++ b/tools/perf/tests/shell/coresight/callchain.sh
> @@ -0,0 +1,172 @@
> +#!/bin/bash
> +# CoreSight synthesized callchain (exclusive)
> +# SPDX-License-Identifier: GPL-2.0
> +
> +glb_err=1
> +
> +if ! tmpdir=$(mktemp -d /tmp/perf-cs-callchain-test.XXXXXX); then
> + echo "mktemp failed"
> + exit 1
> +fi
> +
> +cleanup_files()
> +{
> + rm -rf "$tmpdir"
> +}
> +
> +trap cleanup_files EXIT
> +trap 'cleanup_files; exit $glb_err' TERM INT
> +
> +skip_if_system_is_not_ready()
> +{
> + perf list | grep -Pzq 'cs_etm//' || {
> + echo "[Skip] cs_etm event is not available" >&2
> + return 2
> + }
> +
> + # Requires root for trace in kernel
> + [ "$(id -u)" = 0 ] || {
> + echo "[Skip] No root permission" >&2
> + return 2
> + }
> +
> + return 0
> +}
> +
> +record_trace()
> +{
> + local data=$1
> + local script=$2
> +
> + local cf="$tmpdir/ctl"
> + local af="$tmpdir/ack"
> +
> + mkfifo "$cf" "$af"
> +
> + perf record -o "$data" -e cs_etm// --per-thread -D -1 --control fifo:"$cf","$af" -- \
> + perf test --record-ctl fifo:"$cf","$af" -w callchain >/dev/null 2>&1 &&
> +
> + # It is safe to use 'i3i' with a three-instruction interval, since the
> + # workload is compiled with -O0.
> + perf script --itrace=g16i3il64 -i "$data" > "$script"
Is there a reason we don't generate callstacks on branch samples and use
--itrace=g16bl64? That removes the magic number 3 and reduces the output
file size and test runtime a bit.
All I had to do was copy the same "if (etm->synth_opts.callchain) { ..."
block to cs_etm__synth_branch_sample(). It seems like the grepping
doesn't exactly match the branch sample format so the test fails, but
I'm sure that could be fixed.
I suppose there is value in testing instruction output, but maybe we can
add the option for users to add callstacks to branch samples, even if
it's not tested.
> +}
> +
> +callchain_regex_1()
> +{
> + printf '%s' \
> +'perf[[:space:]]+[0-9]+[[:space:]]+\[[0-9]+\][[:space:]]+([0-9.]+:[[:space:]]+)?[0-9]+ instructions:[[:space:]]*\n'\
> +'[[:space:]]+[[:xdigit:]]+ callchain_foo\+0x[[:xdigit:]]+ \(.*/perf\)\n'\
> +'[[:space:]]+[[:xdigit:]]+ callchain\+0x[[:xdigit:]]+ \(.*/perf\)\n'\
> +'([[:space:]]+[[:xdigit:]]+ .*\n)*'
> +}
> +
> +callchain_regex_2()
> +{
> + printf '%s' \
> +'perf[[:space:]]+[0-9]+[[:space:]]+\[[0-9]+\][[:space:]]+([0-9.]+:[[:space:]]+)?[0-9]+ instructions:[[:space:]]*\n'\
> +'[[:space:]]+[[:xdigit:]]+ callchain_do_syscall\+0x[[:xdigit:]]+ \(.*/perf\)\n'\
> +'[[:space:]]+[[:xdigit:]]+ callchain_foo\+0x[[:xdigit:]]+ \(.*/perf\)\n'\
> +'[[:space:]]+[[:xdigit:]]+ callchain\+0x[[:xdigit:]]+ \(.*/perf\)\n'\
> +'([[:space:]]+[[:xdigit:]]+ .*\n)*'
> +}
> +
> +callchain_regex_3()
> +{
> + printf '%s' \
> +'perf[[:space:]]+[0-9]+[[:space:]]+\[[0-9]+\][[:space:]]+([0-9.]+:[[:space:]]+)?[0-9]+ instructions:[[:space:]]*\n'\
> +'[[:space:]]+[[:xdigit:]]+ syscall(@plt)?\+0x[[:xdigit:]]+ \(.*\)\n'\
> +'[[:space:]]+[[:xdigit:]]+ callchain_do_syscall\+0x[[:xdigit:]]+ \(.*/perf\)\n'\
> +'[[:space:]]+[[:xdigit:]]+ callchain_foo\+0x[[:xdigit:]]+ \(.*/perf\)\n'\
> +'[[:space:]]+[[:xdigit:]]+ callchain\+0x[[:xdigit:]]+ \(.*/perf\)\n'\
> +'([[:space:]]+[[:xdigit:]]+ .*\n)*'
> +}
> +
> +callchain_regex_4()
> +{
> + printf '%s' \
> +'perf[[:space:]]+[0-9]+[[:space:]]+\[[0-9]+\][[:space:]]+([0-9.]+:[[:space:]]+)?[0-9]+ instructions:[[:space:]]*\n'\
> +'[[:space:]]+[[:xdigit:]]+ .*\+0x[[:xdigit:]]+ \(\[kernel\.kallsyms\]\)\n'\
> +'[[:space:]]+[[:xdigit:]]+ syscall(@plt)?\+0x[[:xdigit:]]+ \(.*\)\n'\
> +'[[:space:]]+[[:xdigit:]]+ callchain_do_syscall\+0x[[:xdigit:]]+ \(.*/perf\)\n'\
> +'[[:space:]]+[[:xdigit:]]+ callchain_foo\+0x[[:xdigit:]]+ \(.*/perf\)\n'\
> +'[[:space:]]+[[:xdigit:]]+ callchain\+0x[[:xdigit:]]+ \(.*/perf\)\n'\
> +'([[:space:]]+[[:xdigit:]]+ .*\n)*'
> +}
> +
> +find_after_line()
> +{
> + local regex="$1"
> + local file="$2"
> + local start="$3"
> + local offset
> + local line
> +
> + # Search in byte offset
> + offset=$(
> + tail -n +"$start" "$file" |
> + grep -Pzob -m1 "$regex" |
> + tr '\0' '\n' |
> + sed -n 's/^\([0-9][0-9]*\):.*/\1/p;q'
> + )
> +
> + if [ -z "$offset" ]; then
> + echo "Failed to match regex after line $start" >&2
> + echo "Regex:" >&2
> + printf '%s\n' "$regex" >&2
> + echo "Context from line $start:" >&2
> + sed -n "${start},$((start + 100))p" "$file" >&2
> + return 1
> + fi
> +
> + # Convert from offset to line
> + line=$(
> + tail -n +"$start" "$file" |
> + head -c "$offset" |
> + wc -l
> + )
> +
> + echo "$((start + line))"
> +}
> +
> +check_callchain_flow()
> +{
> + local file="$1"
> + local l1 l2 l3 l4 l5 l6 l7
> +
> + # Callchain push
> + l1=$(find_after_line "$(callchain_regex_1)" "$file" 1) || return 1
> + l2=$(find_after_line "$(callchain_regex_2)" "$file" "$((l1 + 1))") || return 1
> + l3=$(find_after_line "$(callchain_regex_3)" "$file" "$((l2 + 1))") || return 1
> + l4=$(find_after_line "$(callchain_regex_4)" "$file" "$((l3 + 1))") || return 1
> +
> + # Callchain pop
> + l5=$(find_after_line "$(callchain_regex_3)" "$file" "$((l4 + 1))") || return 1
> + l6=$(find_after_line "$(callchain_regex_2)" "$file" "$((l5 + 1))") || return 1
> + l7=$(find_after_line "$(callchain_regex_1)" "$file" "$((l6 + 1))") || return 1
> +
> + echo "Callchain flow matched:"
> + echo " l1=$l1 l2=$l2 l3=$l3 l4=$l4 l5=$l5 l6=$l6 l7=$l7"
> +
> + return 0
> +}
> +
> +run_test()
> +{
> + local data=$tmpdir/perf.data
> + local script=$tmpdir/perf.script
> +
> + if ! record_trace "$data" "$script"; then
> + echo "perf record/script failed"
> + return
> + fi
> +
> + check_callchain_flow "$script" || return
> +
> + glb_err=0
> +}
> +
> +skip_if_system_is_not_ready || exit 2
> +
> +run_test
> +
> +exit $glb_err
> diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
> index 7cedf05be544ad79a99e86d30dfa4f7b01ca0837..cee9e6b62dcc838c864bbe76efe3b638ed75b134 100644
> --- a/tools/perf/tests/tests.h
> +++ b/tools/perf/tests/tests.h
> @@ -248,6 +248,7 @@ DECLARE_WORKLOAD(inlineloop);
> DECLARE_WORKLOAD(jitdump);
> DECLARE_WORKLOAD(context_switch_loop);
> DECLARE_WORKLOAD(deterministic);
> +DECLARE_WORKLOAD(callchain);
>
> #ifdef HAVE_RUST_SUPPORT
> DECLARE_WORKLOAD(code_with_type);
> diff --git a/tools/perf/tests/workloads/Build b/tools/perf/tests/workloads/Build
> index 7bb4b9829ba245740c8967e6bf3235614cdd55a3..048e371eb63e316453b6b46ebd0a02794c3d25d7 100644
> --- a/tools/perf/tests/workloads/Build
> +++ b/tools/perf/tests/workloads/Build
> @@ -13,6 +13,7 @@ perf-test-y += inlineloop.o
> perf-test-y += jitdump.o
> perf-test-y += context_switch_loop.o
> perf-test-y += deterministic.o
> +perf-test-y += callchain.o
>
> ifeq ($(CONFIG_RUST_SUPPORT),y)
> perf-test-y += code_with_type.o
> @@ -27,3 +28,4 @@ CFLAGS_traploop.o = -g -O0 -fno-inline -U_FORTIFY_SOURCE
> CFLAGS_inlineloop.o = -g -O2
> CFLAGS_deterministic.o = -g -O0 -fno-inline -U_FORTIFY_SOURCE
> CFLAGS_named_threads.o = -g -O0 -fno-inline -U_FORTIFY_SOURCE
> +CFLAGS_callchain.o = -g -O0 -fno-inline -U_FORTIFY_SOURCE
> diff --git a/tools/perf/tests/workloads/callchain.c b/tools/perf/tests/workloads/callchain.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..3951423d8115e9efb49af8ba2586001fc6f02761
> --- /dev/null
> +++ b/tools/perf/tests/workloads/callchain.c
> @@ -0,0 +1,33 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <linux/compiler.h>
> +#include <sys/syscall.h>
> +#include <unistd.h>
> +#include "../tests.h"
> +
> +/*
> + * Mark as noinline to establish the call chain, and avoid the static
> + * annotation to prevent LTO from renaming the functions.
> + */
> +noinline void callchain_do_syscall(void);
> +noinline void callchain_foo(void);
> +noinline int callchain(int argc, const char **argv);
> +
> +noinline void callchain_do_syscall(void)
> +{
> + syscall(SYS_getpid);
> +}
> +
> +noinline void callchain_foo(void)
> +{
> + callchain_do_syscall();
> +}
> +
> +noinline int callchain(int argc __maybe_unused,
> + const char **argv __maybe_unused)
> +{
> + callchain_foo();
> +
> + return 0;
> +}
> +
> +DEFINE_WORKLOAD(callchain);
>
More information about the linux-arm-kernel
mailing list