[PATCH v1 00/14] Clean up libperf cpumap's empty function

Arnaldo Carvalho de Melo acme at kernel.org
Thu Dec 14 05:49:14 PST 2023


Em Wed, Dec 13, 2023 at 02:48:15PM +0200, Adrian Hunter escreveu:
> On 12/12/23 19:59, Arnaldo Carvalho de Melo wrote:
> > Em Tue, Nov 28, 2023 at 10:01:57PM -0800, Ian Rogers escreveu:
> >> Rename and clean up the use of libperf CPU map functions particularly
> >> focussing on perf_cpu_map__empty that may return true for maps
> >> containing CPUs but also with an "any CPU"/dummy value.
> >>
> >> perf_cpu_map__nr is also troubling in that iterating an empty CPU map
> >> will yield the "any CPU"/dummy value. Reduce the appearance of some
> >> calls to this by using the perf_cpu_map__for_each_cpu macro.
> >>
> >> Ian Rogers (14):
> >>   libperf cpumap: Rename perf_cpu_map__dummy_new
> >>   libperf cpumap: Rename and prefer sysfs for perf_cpu_map__default_new
> >>   libperf cpumap: Rename perf_cpu_map__empty
> >>   libperf cpumap: Replace usage of perf_cpu_map__new(NULL)
> >>   libperf cpumap: Add for_each_cpu that skips the "any CPU" case
> > 
> > Applied 1-6, with James Reviewed-by tags, would be good to have Adrian
> > check the PT and BTS parts, testing the end result if he things its all
> > ok.

Ian,

	1-6 is in perf-tools-next now, can you please consider Adrian's
suggestion to reduce patch size and rebase the remaining patches?

- Arnaldo
 
> Changing the same lines of code twice in the same patch set is not
> really kernel style.
> 
> Some of the churn could be reduced by applying and rebasing on the
> patch below.
> 
> Ideally the patches should be reordered so that the lines only
> change once i.e.
> 
> 	perf_cpu_map__empty -> <replacement>
> 
> instead of
> 
> 	perf_cpu_map__empty -> <rename> -> <replacement>
> 
> If that is too much trouble, please accept my ack instead:
> 
> Acked-by: Adrian Hunter <adrian.hunter at intel.com>


 
> 
> From: Adrian Hunter <adrian.hunter at intel.com>
> 
> Factor out perf_cpu_map__empty() use to reduce the occurrences and make
> the code more readable.
> 
> Signed-off-by: Adrian Hunter <adrian.hunter at intel.com>
> ---
>  tools/perf/arch/x86/util/intel-bts.c | 11 ++++++++---
>  tools/perf/arch/x86/util/intel-pt.c  | 21 ++++++++++++---------
>  2 files changed, 20 insertions(+), 12 deletions(-)
> 
> diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c
> index d2c8cac11470..cebe994eb9db 100644
> --- a/tools/perf/arch/x86/util/intel-bts.c
> +++ b/tools/perf/arch/x86/util/intel-bts.c
> @@ -59,6 +59,11 @@ intel_bts_info_priv_size(struct auxtrace_record *itr __maybe_unused,
>  	return INTEL_BTS_AUXTRACE_PRIV_SIZE;
>  }
>  
> +static bool intel_bts_per_cpu(struct evlist *evlist)
> +{
> +	return !perf_cpu_map__empty(evlist->core.user_requested_cpus);
> +}
> +
>  static int intel_bts_info_fill(struct auxtrace_record *itr,
>  			       struct perf_session *session,
>  			       struct perf_record_auxtrace_info *auxtrace_info,
> @@ -109,8 +114,8 @@ static int intel_bts_recording_options(struct auxtrace_record *itr,
>  	struct intel_bts_recording *btsr =
>  			container_of(itr, struct intel_bts_recording, itr);
>  	struct perf_pmu *intel_bts_pmu = btsr->intel_bts_pmu;
> +	bool per_cpu_mmaps = intel_bts_per_cpu(evlist);
>  	struct evsel *evsel, *intel_bts_evsel = NULL;
> -	const struct perf_cpu_map *cpus = evlist->core.user_requested_cpus;
>  	bool privileged = perf_event_paranoid_check(-1);
>  
>  	if (opts->auxtrace_sample_mode) {
> @@ -143,7 +148,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr,
>  	if (!opts->full_auxtrace)
>  		return 0;
>  
> -	if (opts->full_auxtrace && !perf_cpu_map__empty(cpus)) {
> +	if (opts->full_auxtrace && per_cpu_mmaps) {
>  		pr_err(INTEL_BTS_PMU_NAME " does not support per-cpu recording\n");
>  		return -EINVAL;
>  	}
> @@ -224,7 +229,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr,
>  		 * In the case of per-cpu mmaps, we need the CPU on the
>  		 * AUX event.
>  		 */
> -		if (!perf_cpu_map__empty(cpus))
> +		if (per_cpu_mmaps)
>  			evsel__set_sample_bit(intel_bts_evsel, CPU);
>  	}
>  
> diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
> index fa0c718b9e72..0ff9147c75da 100644
> --- a/tools/perf/arch/x86/util/intel-pt.c
> +++ b/tools/perf/arch/x86/util/intel-pt.c
> @@ -312,6 +312,11 @@ static void intel_pt_tsc_ctc_ratio(u32 *n, u32 *d)
>  	*d = eax;
>  }
>  
> +static bool intel_pt_per_cpu(struct evlist *evlist)
> +{
> +	return !perf_cpu_map__empty(evlist->core.user_requested_cpus);
> +}
> +
>  static int intel_pt_info_fill(struct auxtrace_record *itr,
>  			      struct perf_session *session,
>  			      struct perf_record_auxtrace_info *auxtrace_info,
> @@ -322,7 +327,8 @@ static int intel_pt_info_fill(struct auxtrace_record *itr,
>  	struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu;
>  	struct perf_event_mmap_page *pc;
>  	struct perf_tsc_conversion tc = { .time_mult = 0, };
> -	bool cap_user_time_zero = false, per_cpu_mmaps;
> +	bool per_cpu_mmaps = intel_pt_per_cpu(session->evlist);
> +	bool cap_user_time_zero = false;
>  	u64 tsc_bit, mtc_bit, mtc_freq_bits, cyc_bit, noretcomp_bit;
>  	u32 tsc_ctc_ratio_n, tsc_ctc_ratio_d;
>  	unsigned long max_non_turbo_ratio;
> @@ -369,8 +375,6 @@ static int intel_pt_info_fill(struct auxtrace_record *itr,
>  			ui__warning("Intel Processor Trace: TSC not available\n");
>  	}
>  
> -	per_cpu_mmaps = !perf_cpu_map__empty(session->evlist->core.user_requested_cpus);
> -
>  	auxtrace_info->type = PERF_AUXTRACE_INTEL_PT;
>  	auxtrace_info->priv[INTEL_PT_PMU_TYPE] = intel_pt_pmu->type;
>  	auxtrace_info->priv[INTEL_PT_TIME_SHIFT] = tc.time_shift;
> @@ -604,8 +608,8 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
>  	struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu;
>  	bool have_timing_info, need_immediate = false;
>  	struct evsel *evsel, *intel_pt_evsel = NULL;
> -	const struct perf_cpu_map *cpus = evlist->core.user_requested_cpus;
>  	bool privileged = perf_event_paranoid_check(-1);
> +	bool per_cpu_mmaps = intel_pt_per_cpu(evlist);
>  	u64 tsc_bit;
>  	int err;
>  
> @@ -774,8 +778,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
>  	 * Per-cpu recording needs sched_switch events to distinguish different
>  	 * threads.
>  	 */
> -	if (have_timing_info && !perf_cpu_map__empty(cpus) &&
> -	    !record_opts__no_switch_events(opts)) {
> +	if (have_timing_info && per_cpu_mmaps && !record_opts__no_switch_events(opts)) {
>  		if (perf_can_record_switch_events()) {
>  			bool cpu_wide = !target__none(&opts->target) &&
>  					!target__has_task(&opts->target);
> @@ -832,7 +835,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
>  		 * In the case of per-cpu mmaps, we need the CPU on the
>  		 * AUX event.
>  		 */
> -		if (!perf_cpu_map__empty(cpus))
> +		if (per_cpu_mmaps)
>  			evsel__set_sample_bit(intel_pt_evsel, CPU);
>  	}
>  
> @@ -858,7 +861,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
>  			tracking_evsel->immediate = true;
>  
>  		/* In per-cpu case, always need the time of mmap events etc */
> -		if (!perf_cpu_map__empty(cpus)) {
> +		if (per_cpu_mmaps) {
>  			evsel__set_sample_bit(tracking_evsel, TIME);
>  			/* And the CPU for switch events */
>  			evsel__set_sample_bit(tracking_evsel, CPU);
> @@ -870,7 +873,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
>  	 * Warn the user when we do not have enough information to decode i.e.
>  	 * per-cpu with no sched_switch (except workload-only).
>  	 */
> -	if (!ptr->have_sched_switch && !perf_cpu_map__empty(cpus) &&
> +	if (!ptr->have_sched_switch && per_cpu_mmaps &&
>  	    !target__none(&opts->target) &&
>  	    !intel_pt_evsel->core.attr.exclude_user)
>  		ui__warning("Intel Processor Trace decoding will not be possible except for kernel tracing!\n");
> -- 
> 2.34.1
> 
> 
> 

-- 

- Arnaldo



More information about the linux-arm-kernel mailing list