[PATCH] perf arm-spe: Add support for SPE Data Source packet on HiSilicon HIP12
Leo Yan
leo.yan at arm.com
Tue Apr 22 03:29:52 PDT 2025
On Tue, Apr 22, 2025 at 10:16:40AM +0100, James Clark wrote:
> >
> > Add data source encoding for HiSilicon HIP12 and coresponding mapping
> > to the perf's memory data source. This will help to synthesize the data
> > and support upper layer tools like perf-mem and perf-c2c.
> >
> > Signed-off-by: Yicong Yang <yangyicong at hisilicon.com>
> > ---
> > arch/arm64/include/asm/cputype.h | 2 +
> > tools/arch/arm64/include/asm/cputype.h | 2 +
Please split into two patches. One is a kernel patch and another is a
tool patch.
[...]
> > --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> > +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> > @@ -82,6 +82,23 @@ enum arm_spe_ampereone_data_source {
> > ARM_SPE_AMPEREONE_L2D = 0x9,
> > };
> > +enum arm_spe_hisi_hip_data_source {
> > + ARM_SPE_HISI_HIP_PEER_CPU = 0,
> > + ARM_SPE_HISI_HIP_PEER_CPU_HITM = 1,
> > + ARM_SPE_HISI_HIP_L3 = 2,
> > + ARM_SPE_HISI_HIP_L3_HITM = 3,
> > + ARM_SPE_HISI_HIP_PEER_CLUSTER = 4,
> > + ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM = 5,
> > + ARM_SPE_HISI_HIP_REMOTE_SOCKET = 6,
> > + ARM_SPE_HISI_HIP_REMOTE_SOCKET_HITM = 7,
> > + ARM_SPE_HISI_HIP_LOCAL = 8,
> > + ARM_SPE_HISI_HIP_REMOTE = 9,
> > + ARM_SPE_HISI_HIP_NC_DEV = 13,
> > + ARM_SPE_HISI_HIP_L2 = 16,
> > + ARM_SPE_HISI_HIP_L2_HITM = 17,
> > + ARM_SPE_HISI_HIP_L1 = 18,
> > +};
> > +
> > struct arm_spe_record {
> > enum arm_spe_sample_type type;
> > int err;
> > diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
> > index 2a9775649cc2..eceae4219601 100644
> > --- a/tools/perf/util/arm-spe.c
> > +++ b/tools/perf/util/arm-spe.c
> > @@ -571,6 +571,11 @@ static const struct midr_range ampereone_ds_encoding_cpus[] = {
> > {},
> > };
> > +static const struct midr_range hisi_hip_ds_encoding_cpus[] = {
> > + MIDR_ALL_VERSIONS(MIDR_HISI_HIP12),
> > + {},
> > +};
> > +
> > static void arm_spe__sample_flags(struct arm_spe_queue *speq)
> > {
> > const struct arm_spe_record *record = &speq->decoder->record;
> > @@ -718,9 +723,105 @@ static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *re
> > arm_spe__synth_data_source_common(&common_record, data_src);
> > }
> > +static void arm_spe__synth_data_source_hisi_hip(const struct arm_spe_record *record,
> > + union perf_mem_data_src *data_src)
> > +{
> > + /* Use common synthesis method to handle store operations */
> > + if (record->op & ARM_SPE_OP_ST) {
> > + arm_spe__synth_data_source_common(record, data_src);
> > + return;
> > + }
> > +
> > + switch (record->source) {
> > + case ARM_SPE_HISI_HIP_PEER_CPU:
> > + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
> > + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
> > + data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
> > + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
Seems it is conflict to set both 'PERF_MEM_SNOOP_NONE' and
'PERF_MEM_SNOOPX_PEER'.
Remove setting 'PERF_MEM_SNOOP_NONE' in this case?
> > + break;
> > + case ARM_SPE_HISI_HIP_PEER_CPU_HITM:
> > + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
> > + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
> > + data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
> > + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
> > + break;
> > + case ARM_SPE_HISI_HIP_L3:
> > + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
> > + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
> > + data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
> > + break;
> > + case ARM_SPE_HISI_HIP_L3_HITM:
> > + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
> > + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
> > + data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
> > + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
> > + break;
> > + case ARM_SPE_HISI_HIP_PEER_CLUSTER:
> > + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
> > + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
Seems to me, a CPU has L3 cache, would the cluster has a higher level's
cache?
> > + data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
> > + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
Ditto for the confliction for the two flags 'SNOOP_NONE' and
'SNOOPX_PEER'.
> > + break;
> > + case ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM:
> > + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
> > + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
> > + data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
> > + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
> > + break;
> > + case ARM_SPE_HISI_HIP_REMOTE_SOCKET:
> > + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2;
> > + data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
> > + data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
> > + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
>
> Hi Yicong,
>
> Is the mem_snoop setting missing from this one?
The field 'mem_snoopx' is an extension to the field 'mem_snoop'.
If the field 'mem_snoopx' is set, it is no need to set 'mem_snoop'.
Thanks,
Leo
More information about the linux-arm-kernel
mailing list