[PATCH v4 4/6] perf tools: enable dwarf_callchain_users on arm64

Mark Rutland mark.rutland at arm.com
Wed Dec 15 08:37:47 PST 2021


On Wed, Dec 15, 2021 at 03:11:36PM +0000, German Gomez wrote:
> From: Alexandre Truong <alexandre.truong at arm.com>
> 
> On arm64, enable dwarf_callchain_users which will be needed
> to do a dwarf unwind in order to get the caller of the leaf frame.
> 
> Signed-off-by: Alexandre Truong <alexandre.truong at arm.com>
> Signed-off-by: German Gomez <german.gomez at arm.com>
> ---
>  tools/perf/builtin-report.c | 4 ++--
>  tools/perf/builtin-script.c | 4 ++--
>  tools/perf/util/callchain.c | 9 ++++++++-
>  tools/perf/util/callchain.h | 2 +-
>  4 files changed, 13 insertions(+), 6 deletions(-)
> 
> diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
> index 8167ebfe776a..a31ad60ba66e 100644
> --- a/tools/perf/builtin-report.c
> +++ b/tools/perf/builtin-report.c
> @@ -410,7 +410,7 @@ static int report__setup_sample_type(struct report *rep)
>  		}
>  	}
>  
> -	callchain_param_setup(sample_type);
> +	callchain_param_setup(sample_type, perf_env__arch(&rep->session->header.env));
>  
>  	if (rep->stitch_lbr && (callchain_param.record_mode != CALLCHAIN_LBR)) {
>  		ui__warning("Can't find LBR callchain. Switch off --stitch-lbr.\n"
> @@ -1124,7 +1124,7 @@ static int process_attr(struct perf_tool *tool __maybe_unused,
>  	 * on events sample_type.
>  	 */
>  	sample_type = evlist__combined_sample_type(*pevlist);
> -	callchain_param_setup(sample_type);
> +	callchain_param_setup(sample_type, perf_env__arch((*pevlist)->env));
>  	return 0;
>  }
>  
> diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
> index ab7d575f97f2..d308adfd1176 100644
> --- a/tools/perf/builtin-script.c
> +++ b/tools/perf/builtin-script.c
> @@ -2318,7 +2318,7 @@ static int process_attr(struct perf_tool *tool, union perf_event *event,
>  	 * on events sample_type.
>  	 */
>  	sample_type = evlist__combined_sample_type(evlist);
> -	callchain_param_setup(sample_type);
> +	callchain_param_setup(sample_type, perf_env__arch((*pevlist)->env));
>  
>  	/* Enable fields for callchain entries */
>  	if (symbol_conf.use_callchain &&
> @@ -3468,7 +3468,7 @@ static void script__setup_sample_type(struct perf_script *script)
>  	struct perf_session *session = script->session;
>  	u64 sample_type = evlist__combined_sample_type(session->evlist);
>  
> -	callchain_param_setup(sample_type);
> +	callchain_param_setup(sample_type, perf_env__arch(session->machines.host.env));
>  
>  	if (script->stitch_lbr && (callchain_param.record_mode != CALLCHAIN_LBR)) {
>  		pr_warning("Can't find LBR callchain. Switch off --stitch-lbr.\n"
> diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
> index 8e2777133bd9..aaab9a674807 100644
> --- a/tools/perf/util/callchain.c
> +++ b/tools/perf/util/callchain.c
> @@ -1600,7 +1600,7 @@ void callchain_cursor_reset(struct callchain_cursor *cursor)
>  		map__zput(node->ms.map);
>  }
>  
> -void callchain_param_setup(u64 sample_type)
> +void callchain_param_setup(u64 sample_type, const char *arch)
>  {
>  	if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
>  		if ((sample_type & PERF_SAMPLE_REGS_USER) &&
> @@ -1612,6 +1612,13 @@ void callchain_param_setup(u64 sample_type)
>  		else
>  			callchain_param.record_mode = CALLCHAIN_FP;
>  	}
> +
> +	/*
> +	 * It's possible to determine the caller of leaf frames with omitted
> +	 * frame pointers on aarch64 using libunwind, so enable it.
> +	 */

I reckon it's worth mentioning *why* we need to do this; how about:

	/*
	 * It's necessary to use libunwind to reliably determine the caller of
	 * a leaf function on aarch64, as otherwise we cannot know whether to
	 * start from the LR or FP.
	 *
	 * Always starting from the LR can result in duplicate or entirely
	 * erroneous entries. Always skipping the LR and starting from the FP
	 * can result in missing entries.
	 */

Other than that, this looks fine to me!

Thanks,
Mark.

> +	if (callchain_param.record_mode == CALLCHAIN_FP && !strcmp(arch, "arm64"))
> +		dwarf_callchain_users = true;
>  }
>  
>  static bool chain_match(struct callchain_list *base_chain,
> diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
> index 77fba053c677..d95615daed73 100644
> --- a/tools/perf/util/callchain.h
> +++ b/tools/perf/util/callchain.h
> @@ -300,7 +300,7 @@ int callchain_branch_counts(struct callchain_root *root,
>  			    u64 *branch_count, u64 *predicted_count,
>  			    u64 *abort_count, u64 *cycles_count);
>  
> -void callchain_param_setup(u64 sample_type);
> +void callchain_param_setup(u64 sample_type, const char *arch);
>  
>  bool callchain_cnode_matched(struct callchain_node *base_cnode,
>  			     struct callchain_node *pair_cnode);
> -- 
> 2.25.1
> 



More information about the linux-arm-kernel mailing list