[PATCH V2 2/5] perf mem: Clean up perf_mem_events__ptr()

Mon Dec 11 10:09:34 PST 2023

On 2023-12-08 11:31 p.m., Leo Yan wrote:
> On Thu, Dec 07, 2023 at 11:23:35AM -0800, kan.liang at linux.intel.com wrote:
>> From: Kan Liang <kan.liang at linux.intel.com>
>>
>> The mem_events can be retrieved from the struct perf_pmu now. An ARCH
>> specific perf_mem_events__ptr() is not required anymore. Remove all of
>> them.
>>
>> The Intel hybrid has multiple mem-events-supported PMUs. But they share
>> the same mem_events. Other ARCHs only support one mem-events-supported
>> PMU. In the configuration, it's good enough to only configure the
>> mem_events for one PMU. Add perf_mem_events_find_pmu() which returns the
>> first mem-events-supported PMU.
>>
>> In the perf_mem_events__init(), the perf_pmus__scan() is not required
>> anymore. It avoids checking the sysfs for every PMU on the system.
>>
>> Make the perf_mem_events__record_args() more generic. Remove the
>> perf_mem_events__print_unsupport_hybrid().
>>
>> Since pmu is added as a new parameter, rename perf_mem_events__ptr() to
>> perf_pmu__mem_events_ptr(). Several other functions also do a similar
>> rename.
>>
>> Reviewed-by: Ian Rogers <irogers at google.com>
>> Tested-by: Ravi Bangoria <ravi.bangoria at amd.com>
>> Signed-off-by: Kan Liang <kan.liang at linux.intel.com>
>> ---
>>  tools/perf/arch/arm64/util/mem-events.c |  10 +--
>>  tools/perf/arch/x86/util/mem-events.c   |  18 ++---
>>  tools/perf/builtin-c2c.c                |  28 +++++--
>>  tools/perf/builtin-mem.c                |  28 +++++--
>>  tools/perf/util/mem-events.c            | 103 ++++++++++++------------
>>  tools/perf/util/mem-events.h            |   9 ++-
>>  6 files changed, 104 insertions(+), 92 deletions(-)
>>
>> diff --git a/tools/perf/arch/arm64/util/mem-events.c b/tools/perf/arch/arm64/util/mem-events.c
>> index aaa4804922b4..2602e8688727 100644
>> --- a/tools/perf/arch/arm64/util/mem-events.c
>> +++ b/tools/perf/arch/arm64/util/mem-events.c
>> @@ -12,17 +12,9 @@ struct perf_mem_event perf_mem_events_arm[PERF_MEM_EVENTS__MAX] = {
>>  
>>  static char mem_ev_name[100];
>>  
>> -struct perf_mem_event *perf_mem_events__ptr(int i)
>> -{
>> -	if (i >= PERF_MEM_EVENTS__MAX)
>> -		return NULL;
>> -
>> -	return &perf_mem_events_arm[i];
>> -}
>> -
>>  const char *perf_mem_events__name(int i, const char *pmu_name __maybe_unused)
>>  {
>> -	struct perf_mem_event *e = perf_mem_events__ptr(i);
>> +	struct perf_mem_event *e = &perf_mem_events_arm[i];
>>  
>>  	if (i >= PERF_MEM_EVENTS__MAX)
>>  		return NULL;
> 
> Nitpick: it's good to check if 'i' is a valid value and then access the
> array with a valid index.
> 
>         if (i >= PERF_MEM_EVENTS__MAX)
>                 return NULL;
> 
>         e = &perf_mem_events_arm[i];
> 
>> diff --git a/tools/perf/arch/x86/util/mem-events.c b/tools/perf/arch/x86/util/mem-events.c
>> index 2b81d229982c..5fb41d50118d 100644
>> --- a/tools/perf/arch/x86/util/mem-events.c
>> +++ b/tools/perf/arch/x86/util/mem-events.c
>> @@ -28,17 +28,6 @@ struct perf_mem_event perf_mem_events_amd[PERF_MEM_EVENTS__MAX] = {
>>  	E("mem-ldst",	"ibs_op//",	"ibs_op"),
>>  };
>>  
>> -struct perf_mem_event *perf_mem_events__ptr(int i)
>> -{
>> -	if (i >= PERF_MEM_EVENTS__MAX)
>> -		return NULL;
>> -
>> -	if (x86__is_amd_cpu())
>> -		return &perf_mem_events_amd[i];
>> -
>> -	return &perf_mem_events_intel[i];
>> -}
>> -
>>  bool is_mem_loads_aux_event(struct evsel *leader)
>>  {
>>  	struct perf_pmu *pmu = perf_pmus__find("cpu");
>> @@ -54,7 +43,12 @@ bool is_mem_loads_aux_event(struct evsel *leader)
>>  
>>  const char *perf_mem_events__name(int i, const char *pmu_name)
>>  {
>> -	struct perf_mem_event *e = perf_mem_events__ptr(i);
>> +	struct perf_mem_event *e;
> 
> A nitpick as well:
> 
> Given perf's mem/c2c, callers will almostly invoke a valid index via the
> argument 'i', but I still think here is a best place to return NULL
> pointer for an invalid index rather than returning a wild pointer.
> 
> Thus I suggest to apply checking for x86 and other archs:
> 
>         if (i >= PERF_MEM_EVENTS__MAX)
>                 return NULL;
> 
>> +
>> +	if (x86__is_amd_cpu())
>> +		e = &perf_mem_events_amd[i];
>> +	else
>> +		e = &perf_mem_events_intel[i];
>>  
>>  	if (!e)
>>  		return NULL;
> 
> [...]
> 
>>  int perf_mem_events__record_args(const char **rec_argv, int *argv_nr,
>>  				 char **rec_tmp, int *tmp_nr)
>>  {
>>  	const char *mnt = sysfs__mount();
>> +	struct perf_pmu *pmu = NULL;
>>  	int i = *argv_nr, k = 0;
>>  	struct perf_mem_event *e;
>>  
>> -	for (int j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
>> -		e = perf_mem_events__ptr(j);
>> -		if (!e->record)
>> -			continue;
>>  
>> -		if (perf_pmus__num_mem_pmus() == 1) {
>> -			if (!e->supported) {
>> -				pr_err("failed: event '%s' not supported\n",
>> -				       perf_mem_events__name(j, NULL));
>> -				return -1;
>> -			}
>> +	while ((pmu = perf_pmus__scan_mem(pmu)) != NULL) {
>> +		for (int j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
>> +			e = perf_pmu__mem_events_ptr(pmu, j);
>>  
>> -			rec_argv[i++] = "-e";
>> -			rec_argv[i++] = perf_mem_events__name(j, NULL);
>> -		} else {
>> -			struct perf_pmu *pmu = NULL;
>> +			if (!e->record)
>> +				continue;
>>  
>>  			if (!e->supported) {
>> -				perf_mem_events__print_unsupport_hybrid(e, j);
>> +				pr_err("failed: event '%s' not supported\n",
>> +					perf_mem_events__name(j, pmu->name));
>>  				return -1;
>>  			}
>>  
>> -			while ((pmu = perf_pmus__scan(pmu)) != NULL) {
>> +			if (perf_pmus__num_mem_pmus() == 1) {
>> +				rec_argv[i++] = "-e";
>> +				rec_argv[i++] = perf_mem_events__name(j, NULL);
>> +			} else {
>>  				const char *s = perf_mem_events__name(j, pmu->name);
>>  
>>  				if (!perf_mem_event__supported(mnt, pmu, e))
> 
> I think we can improve a bit for this part.
> 
> Current implementation uses perf_pmus__num_mem_pmus() to decide if
> system has only one memory PMU or multiple PMUs, and multiple PMUs
> the tool iterates all memory PMUs to synthesize event options.
> 
> In this patch, it has changed to iterate all memory PMUs, no matter the
> system has only one memory PMU or multiple PMUs. Thus, I don't see the
> point for the condition checking for "perf_pmus__num_mem_pmus() == 1".
> We can consolidate into the unified code like:

Yep, I think it's doable. Also, it seems we can further clean up the
perf_pmus__num_mem_pmus(), which is a __weak function now.

It seems we just need to change the perf_mem_events_find_pmu() a little
bit and let it give both the first mem_events_pmu and the number of
mem_pmus.
> 
>         char *copy;
>         const char *s = perf_pmu__mem_events_name(j, pmu);
> 
>         if (!s)
>                 continue;
> 
>         if (!perf_pmu__mem_events_supported(mnt, pmu, e))
>                 continue;
> 
>         copy = strdup(s);
>         if (!copy)
>                 return -1;
> 
>         rec_argv[i++] = "-e";
>         rec_argv[i++] = copy;
>         rec_tmp[k++] = copy;

Not sure what's the rec_tmp for. It seems no one use it.
I will try if it can be removed.

Thanks,
Kan

> 
> Thanks,
> Leo