[PATCH v7 5/7] perf tool: Add support for HiSilicon PCIe Tune and Trace device driver

liuqi (BA) liuqi115 at huawei.com
Thu May 5 04:30:48 PDT 2022


Hi Leo,

Thanks for your review, some replies below.

On 2022/4/30 15:35, Leo Yan wrote:
> On Thu, Apr 07, 2022 at 08:58:39PM +0800, Yicong Yang via iommu wrote:
>> From: Qi Liu <liuqi115 at huawei.com>
>>
>> 'perf record' and 'perf report --dump-raw-trace' supported in this
>> patch.
>>
>> Example usage:
>>
>> Output will contain raw PTT data and its textual representation, such
>> as:
>>
>> 0 0 0x5810 [0x30]: PERF_RECORD_AUXTRACE size: 0x400000  offset: 0
>> ref: 0xa5d50c725  idx: 0  tid: -1  cpu: 0
>> .
>> . ... HISI PTT data: size 4194304 bytes
>> .  00000000: 00 00 00 00                                 Prefix
>> .  00000004: 08 20 00 60                                 Header DW0
>> .  00000008: ff 02 00 01                                 Header DW1
>> .  0000000c: 20 08 00 00                                 Header DW2
>> .  00000010: 10 e7 44 ab                                 Header DW3
>> .  00000014: 2a a8 1e 01                                 Time
>> .  00000020: 00 00 00 00                                 Prefix
>> .  00000024: 01 00 00 60                                 Header DW0
>> .  00000028: 0f 1e 00 01                                 Header DW1
>> .  0000002c: 04 00 00 00                                 Header DW2
>> .  00000030: 40 00 81 02                                 Header DW3
>> .  00000034: ee 02 00 00                                 Time
>> ....
>>
>> Signed-off-by: Qi Liu <liuqi115 at huawei.com>
>> Signed-off-by: Yicong Yang <yangyicong at hisilicon.com>
>> ---
>>   tools/perf/arch/arm/util/auxtrace.c           |  76 +++++-
>>   tools/perf/arch/arm/util/pmu.c                |   3 +
>>   tools/perf/arch/arm64/util/Build              |   2 +-
>>   tools/perf/arch/arm64/util/hisi_ptt.c         | 195 ++++++++++++++++
>>   tools/perf/util/Build                         |   2 +
>>   tools/perf/util/auxtrace.c                    |   4 +
>>   tools/perf/util/auxtrace.h                    |   1 +
>>   tools/perf/util/hisi-ptt-decoder/Build        |   1 +
>>   .../hisi-ptt-decoder/hisi-ptt-pkt-decoder.c   | 170 ++++++++++++++
>>   .../hisi-ptt-decoder/hisi-ptt-pkt-decoder.h   |  28 +++
>>   tools/perf/util/hisi_ptt.c                    | 218 ++++++++++++++++++
>>   tools/perf/util/hisi_ptt.h                    |  28 +++
> 
> It's good to divide the big patch into smaller patches, e.g. one patch
> is to add PTT auxtrace (so mainly for perf record), and the second
> patch is to add PTT decoder for perf decoding.
> 
got it, I'll do this, thanks.

>>   12 files changed, 724 insertions(+), 4 deletions(-)
>>   create mode 100644 tools/perf/arch/arm64/util/hisi_ptt.c
>>   create mode 100644 tools/perf/util/hisi-ptt-decoder/Build
>>   create mode 100644 tools/perf/util/hisi-ptt-decoder/hisi-ptt-pkt-decoder.c
>>   create mode 100644 tools/perf/util/hisi-ptt-decoder/hisi-ptt-pkt-decoder.h
>>   create mode 100644 tools/perf/util/hisi_ptt.c
>>   create mode 100644 tools/perf/util/hisi_ptt.h
>>
>> diff --git a/tools/perf/arch/arm/util/auxtrace.c b/tools/perf/arch/arm/util/auxtrace.c
>> index 5fc6a2a3dbc5..393f5757c039 100644
>> --- a/tools/perf/arch/arm/util/auxtrace.c
>> +++ b/tools/perf/arch/arm/util/auxtrace.c
>> @@ -4,9 +4,11 @@
>>    * Author: Mathieu Poirier <mathieu.poirier at linaro.org>
>>    */
[...]
>> +
>> +	rewinddir(dir);
>> +	while ((dent = readdir(dir))) {
>> +		if (strstr(dent->d_name, HISI_PTT_PMU_NAME) && idx < (*nr_ptts)) {
>> +			hisi_ptt_pmus[idx] = perf_pmu__find(dent->d_name);
>> +			if (hisi_ptt_pmus[idx]) {
>> +				pr_debug2("%s %d: hisi_ptt_pmu %d type %d name %s\n",
>> +					__func__, __LINE__, idx,
>> +					hisi_ptt_pmus[idx]->type,
>> +					hisi_ptt_pmus[idx]->name);
>> +					idx++;
> 
> Indentation for "idx++" is not right.
will fix this, thanks.

> 
>> +			}
>> +
> 
> Redundant new line.
will fix this, thanks.
> 
>> +		}
>> +	}
>> +
>> +out:
>> +	closedir(dir);
>> +	return hisi_ptt_pmus;
>> +}
>> +
>>   struct auxtrace_record
>>   *auxtrace_record__init(struct evlist *evlist, int *err)
>>   {
>> @@ -57,8 +112,12 @@ struct auxtrace_record
>>   	struct evsel *evsel;
>>   	bool found_etm = false;
>>   	struct perf_pmu *found_spe = NULL;
>> +	struct perf_pmu *found_ptt = NULL;
>>   	struct perf_pmu **arm_spe_pmus = NULL;
>> +	struct perf_pmu **hisi_ptt_pmus = NULL;
>> +
>>   	int nr_spes = 0;
>> +	int nr_ptts = 0;
>>   	int i = 0;
>>   
>>   	if (!evlist)
>> @@ -66,13 +125,14 @@ struct auxtrace_record
>>   
>>   	cs_etm_pmu = perf_pmu__find(CORESIGHT_ETM_PMU_NAME);
>>   	arm_spe_pmus = find_all_arm_spe_pmus(&nr_spes, err);
>> +	hisi_ptt_pmus = find_all_hisi_ptt_pmus(&nr_ptts, err);
>>   
>>   	evlist__for_each_entry(evlist, evsel) {
>>   		if (cs_etm_pmu &&
>>   		    evsel->core.attr.type == cs_etm_pmu->type)
>>   			found_etm = true;
>>   
>> -		if (!nr_spes || found_spe)
>> +		if ((!nr_spes || found_spe) && (!nr_ptts || found_ptt))
>>   			continue;
>>   
>>   		for (i = 0; i < nr_spes; i++) {
>> @@ -81,11 +141,18 @@ struct auxtrace_record
>>   				break;
>>   			}
>>   		}
>> +
>> +		for (i = 0; i < nr_ptts; i++) {
>> +			if (evsel->core.attr.type == hisi_ptt_pmus[i]->type) {
>> +				found_ptt = hisi_ptt_pmus[i];
>> +				break;
>> +			}
>> +		}
>>   	}
> 
> For the loop of evlist__for_each_entry, it's good to refactor the
> event list iteration, we can refine code like below:
> 
> static struct perf_pmu *find_pmu_for_event(struct perf_pmu **pmus,
>                                             int pmu_nr, struct evsel *evsel)
> {
>          int i;
>          struct perf_pmu *found = NULL;
> 
>          if (!pmus)
>                  return NULL;
> 
>          for (i = 0; i < pmu_nr; i++) {
>                  if (evsel->core.attr.type == pmus[i]->type) {
>                          found = pmus[i];
>                          break;
>                  }
>          }
> 
>          return found;
> }
> 
> struct auxtrace_record
> *auxtrace_record__init(struct evlist *evlist, int *err)
> {
>      ...
> 
>      evlist__for_each_entry(evlist, evsel) {
>              if (cs_etm_pmu && !found_etm)
> 		    found_etm = find_pmu_for_event(&cs_etm_pmu, 1, evsel);
> 
>              if (arm_spe_pmus && !found_spe)
> 		    found_etm = find_pmu_for_event(arm_spe_pmus, nr_spe, evsel);
> 
>              if (hisi_ptt_pmus && !found_ptt)
> 		    found_ptt = find_pmu_for_event(hisi_ptt_pmus, nr_ptt, evsel);
>      }
> 
>      ...
> 
> Please use a separate patch for the refactoring, and then based on it
> you could add PTT PMU related finding code.
> 
> }
> 

got it, will do this refactoring in next version, thanks.
> 
>>   	free(arm_spe_pmus);
> 
> Add:
> 
>       free(hisi_ptt_pmus);
> 
> to avoid memory leaking.
> 

will fix it, thanks.

>>   
>> -	if (found_etm && found_spe) {
>> -		pr_err("Concurrent ARM Coresight ETM and SPE operation not currently supported\n");
>> +	if (found_etm && found_spe && found_ptt) {
> 
> This logic is not right; actually we want the logic is:
> 
>          int auxtrace_event_cnt = 0;
>          if (found_etm)
>                  auxtrace_event_cnt++;
>          if (found_spe)
>                  auxtrace_event_cnt++;
>          if (found_ptt)
>                  auxtrace_event_cnt++;
> 
>          if (auxtrace_event_cnt > 1) {
>                  pr_err("Concurrent AUX trace operation isn't supported: found etm %d spe %d ptt %d\n",
>                         found_etm, found_spe, found_ptt);
>                  *err = -EOPNOTSUPP;
>                  return NULL;
>          }
> 
got it, will fix it, thanks.
>> +		pr_err("Concurrent ARM Coresight ETM ,SPE and HiSilicon PCIe Trace operation not currently supported\n");
>>   		*err = -EOPNOTSUPP;
>>   		return NULL;
>>   	}
>> @@ -96,6 +163,9 @@ struct auxtrace_record
>>   #if defined(__aarch64__)
>>   	if (found_spe)
>>   		return arm_spe_recording_init(err, found_spe);
>> +
>> +	if (found_ptt)
>> +		return hisi_ptt_recording_init(err, found_ptt);
>>   #endif
>>   
>>   	/*
>> diff --git a/tools/perf/arch/arm/util/pmu.c b/tools/perf/arch/arm/util/pmu.c
>> index b8b23b9dc598..89a3cedb4557 100644
>> --- a/tools/perf/arch/arm/util/pmu.c
>> +++ b/tools/perf/arch/arm/util/pmu.c
>> @@ -10,6 +10,7 @@
>>   #include <linux/string.h>
>>   
>>   #include "arm-spe.h"
>> +#include "hisi_ptt.h"
>>   #include "../../../util/pmu.h"
>>   
>>   struct perf_event_attr
>> @@ -22,6 +23,8 @@ struct perf_event_attr
>>   #if defined(__aarch64__)
>>   	} else if (strstarts(pmu->name, ARM_SPE_PMU_NAME)) {
>>   		return arm_spe_pmu_default_config(pmu);
>> +	} else if (strstarts(pmu->name, HISI_PTT_PMU_NAME)) {
>> +		pmu->selectable = true;
>>   #endif
>>   	}
>>   
>> diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build
>> index 9fcb4e68add9..8b7fd1dc9f37 100644
>> --- a/tools/perf/arch/arm64/util/Build
>> +++ b/tools/perf/arch/arm64/util/Build
>> @@ -11,4 +11,4 @@ perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
[...]
>> +
>> +#define DEFAULT_PAGE_SIZE 1024
> 
> It's a bit confusion to define PAGE_SIZE as 1024.  Here can simply
> define:
> 
> #define KiB(x) ((x) * 1024)
> 
>> +#define KiB(x) ((x) * DEFAULT_PAGE_SIZE)
>> +#define MiB(x) ((x) * DEFAULT_PAGE_SIZE * DEFAULT_PAGE_SIZE)
> 
> 
ok, I'll change it, thanks.
>> +
>> +struct hisi_ptt_recording {
>> +	struct auxtrace_record	itr;
>> +	struct perf_pmu *hisi_ptt_pmu;
>> +	struct evlist *evlist;
>> +};
>> +
[...]
>> +}
>> +
>> +static int hisi_ptt_recording_options(struct auxtrace_record *itr,
>> +				      struct evlist *evlist,
>> +				      struct record_opts *opts)
>> +{
>> +	struct hisi_ptt_recording *pttr =
>> +			container_of(itr, struct hisi_ptt_recording, itr);
>> +	struct perf_pmu *hisi_ptt_pmu = pttr->hisi_ptt_pmu;
>> +	struct perf_cpu_map *cpus = evlist->core.cpus;
>> +	struct evsel *evsel, *hisi_ptt_evsel = NULL;
>> +	struct evsel *tracking_evsel;
>> +	int err;
>> +
>> +	pttr->evlist = evlist;
>> +	evlist__for_each_entry(evlist, evsel) {
>> +		if (evsel->core.attr.type == hisi_ptt_pmu->type) {
>> +			if (hisi_ptt_evsel) {
>> +				pr_err("There may be only one " HISI_PTT_PMU_NAME "x event\n");
>> +				return -EINVAL;
>> +			}
>> +			evsel->core.attr.freq = 0;
>> +			evsel->core.attr.sample_period = 1;
>> +			hisi_ptt_evsel = evsel;
>> +			opts->full_auxtrace = true;
>> +		}
>> +	}
>> +
>> +	err = hisi_ptt_set_auxtrace_mmap_page(opts);
>> +	if (err)
>> +		return err;
>> +	/*
>> +	 * To obtain the auxtrace buffer file descriptor, the auxtrace event
>> +	 * must come first.
>> +	 */
>> +	evlist__to_front(evlist, hisi_ptt_evsel);
>> +
>> +	if (!perf_cpu_map__empty(cpus)) {
>> +		evsel__set_sample_bit(hisi_ptt_evsel, TIME);
>> +		evsel__set_sample_bit(hisi_ptt_evsel, CPU);
> 
> It needs to set CPU bit in sample type for per-cpu mmaps.  IIUC, PTT
> is only used for system wide tracing?  If so, you don't need set CPU
> bit.
>
ok, I'll delete set CPU bit.

> To be honest, I am also confused set the sample bits.  Actually, there
> have two different sample types for AUX trace, one is here set the
> sample type for AUX event, and in PTT decoding code it needs to set
> sample type for synthesized samples.

does "synthesized samples" mean something like spe pmu in perf report? 
such as consolidate multiple pieces of information into one event named 
"llc-miss" "remote-access"?

PTT doesn't need to do this, we just parse message in perf.data and show it.

> 
>> +	}
>> +
>> +	/* Add dummy event to keep tracking */
>> +	err = parse_events(evlist, "dummy:u", NULL);
>> +	if (err)
>> +		return err;
>> +
>> +	tracking_evsel = evlist__last(evlist);
>> +	evlist__set_tracking_event(evlist, tracking_evsel);
>> +
>> +	tracking_evsel->core.attr.freq = 0;
>> +	tracking_evsel->core.attr.sample_period = 1;
>> +
>> +	if (!perf_cpu_map__empty(cpus))
>> +		evsel__set_sample_bit(tracking_evsel, TIME);
>> +
>> +	return 0;

[...]
>> +
>> +enum hisi_ptt_8dw_pkt_field_type {
>> +	HISI_PTT_8DW_PREFIX,
>> +	HISI_PTT_8DW_HEAD0,
>> +	HISI_PTT_8DW_HEAD1,
>> +	HISI_PTT_8DW_HEAD2,
>> +	HISI_PTT_8DW_HEAD3,
>> +	HISI_PTT_8DW_TIME,
>> +	HISI_PTT_8DW_TYPE_MAX
>> +};
>> +
>> +enum hisi_ptt_4dw_pkt_field_type {
>> +	HISI_PTT_4DW_HEAD1,
>> +	HISI_PTT_4DW_HEAD2,
>> +	HISI_PTT_4DW_HEAD3,
>> +	HISI_PTT_4DW_TYPE_MAX
>> +};
>> +
>> +static const char * const hisi_ptt_8dw_pkt_field_name[] = {
>> +	[HISI_PTT_8DW_PREFIX]	= "Prefix",
>> +	[HISI_PTT_8DW_HEAD0]	= "Header DW0",
>> +	[HISI_PTT_8DW_HEAD1]	= "Header DW1",
>> +	[HISI_PTT_8DW_HEAD2]	= "Header DW2",
>> +	[HISI_PTT_8DW_HEAD3]	= "Header DW3",
>> +	[HISI_PTT_8DW_TIME]	= "Time",
>> +};
>> +
>> +static const char * const hisi_ptt_4dw_pkt_field_name[] = {
>> +	[HISI_PTT_4DW_HEAD1]	= "Header DW1",
>> +	[HISI_PTT_4DW_HEAD2]	= "Header DW2",
>> +	[HISI_PTT_4DW_HEAD3]	= "Header DW3",
>> +};
>> +
>> +/* offset of each member is determined by format of 8dw packet. */
>> +static uint32_t hisi_ptt_8dw_pkt_field_offset[] = {
>> +	[HISI_PTT_8DW_PREFIX]	= 4,
>> +	[HISI_PTT_8DW_HEAD0]	= 4,
>> +	[HISI_PTT_8DW_HEAD1]	= 4,
>> +	[HISI_PTT_8DW_HEAD2]	= 4,
>> +	[HISI_PTT_8DW_HEAD3]	= 4,
>> +	[HISI_PTT_8DW_TIME]	= 8,
>> +};
> 
> You could define a structure hisi_ptt_8dw (just like hisi_ptt_4dw) so
> that can avoid to define this field offset structure.
> 
> It's unusal to define data structure for offset, another way is to
> define macros for offset values.
> 
got it, I'll do this.
>> +
>> +union hisi_ptt_4dw {
>> +	struct {
>> +		uint32_t format : 2;
>> +		uint32_t type : 5;
>> +		uint32_t t9 : 1;
>> +		uint32_t t8 : 1;
>> +		uint32_t th : 1;
>> +		uint32_t so : 1;
>> +		uint32_t len : 10;
>> +		uint32_t time : 11;
>> +	};
>> +	uint32_t value;
>> +};
>> +
[...]

>> +
>> +static void hisi_ptt_dump(struct hisi_ptt *ptt __maybe_unused,
>> +			  unsigned char *buf, size_t len)
>> +{
>> +	const char *color = PERF_COLOR_BLUE;
>> +	enum hisi_ptt_pkt_type type;
>> +	size_t pos = 0;
>> +	int pkt_len;
>> +
>> +	color_fprintf(stdout, color, ". ... HISI PTT data: size %zu bytes\n",
>> +		      len);
>> +
>> +	type = hisi_ptt_check_packet_type(buf);
>> +	while (len) {
> 
> It's good to use condition "while (len > 0)".
ok, will change this.

> 
>> +		pkt_len = hisi_ptt_pkt_desc(buf, pos, type);
>> +		if (!pkt_len)
>> +			color_fprintf(stdout, color, " Bad packet!\n");
>> +
>> +		pos += pkt_len;
>> +		buf += pkt_len;
>> +		len -= pkt_len;
>> +	}
>> +}
>> +
[...]

>> diff --git a/tools/perf/util/hisi_ptt.h b/tools/perf/util/hisi_ptt.h
>> new file mode 100644
>> index 000000000000..c0b6cbde1221
>> --- /dev/null
>> +++ b/tools/perf/util/hisi_ptt.h
>> @@ -0,0 +1,28 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +/*
>> + * HiSilicon PCIe Trace and Tuning (PTT) support
>> + * Copyright (c) 2022 HiSilicon Technologies Co., Ltd.
>> + */
>> +
>> +#ifndef INCLUDE__PERF_HISI_PTT_H__
>> +#define INCLUDE__PERF_HISI_PTT_H__
>> +
>> +#define HISI_PTT_PMU_NAME "hisi_ptt"
>> +enum {
>> +	HISI_PTT_PMU_TYPE,
>> +	HISI_PTT_PER_CPU_MMAPS,
> 
> HISI_PTT_PER_CPU_MMAPS is not used, so can remove it.
> 
will move this.

Thanks,
Qi
> Thanks,
> Leo
> 
>> +	HISI_PTT_AUXTRACE_PRIV_MAX,
>> +};
>> +
>> +#define HISI_PTT_AUXTRACE_PRIV_SIZE (HISI_PTT_AUXTRACE_PRIV_MAX * sizeof(u64))
>> +union perf_event;
>> +struct perf_session;
>> +struct perf_pmu;
>> +
>> +struct auxtrace_record *hisi_ptt_recording_init(int *err,
>> +						struct perf_pmu *hisi_ptt_pmu);
>> +
>> +int hisi_ptt_process_auxtrace_info(union perf_event *event,
>> +				   struct perf_session *session);
>> +
>> +#endif
>> -- 
>> 2.24.0
>>
>> _______________________________________________
>> iommu mailing list
>> iommu at lists.linux-foundation.org
>> https://lists.linuxfoundation.org/mailman/listinfo/iommu
> .
> 



More information about the linux-arm-kernel mailing list