[PATCH] perf arm-spe: Add support for SPE Data Source packet on HiSilicon HIP12

Yicong Yang yangyicong at huawei.com
Tue Apr 22 05:31:43 PDT 2025


On 2025/4/22 18:29, Leo Yan wrote:
> On Tue, Apr 22, 2025 at 10:16:40AM +0100, James Clark wrote:
>>>
>>> Add data source encoding for HiSilicon HIP12 and coresponding mapping
>>> to the perf's memory data source. This will help to synthesize the data
>>> and support upper layer tools like perf-mem and perf-c2c.
>>>
>>> Signed-off-by: Yicong Yang <yangyicong at hisilicon.com>
>>> ---
>>>   arch/arm64/include/asm/cputype.h              |   2 +
>>>   tools/arch/arm64/include/asm/cputype.h        |   2 +
> 
> Please split into two patches.  One is a kernel patch and another is a
> tool patch.

will do in next version.

> 
> [...]
> 
>>> --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
>>> +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
>>> @@ -82,6 +82,23 @@ enum arm_spe_ampereone_data_source {
>>>   	ARM_SPE_AMPEREONE_L2D                           = 0x9,
>>>   };
>>> +enum arm_spe_hisi_hip_data_source {
>>> +	ARM_SPE_HISI_HIP_PEER_CPU		= 0,
>>> +	ARM_SPE_HISI_HIP_PEER_CPU_HITM		= 1,
>>> +	ARM_SPE_HISI_HIP_L3			= 2,
>>> +	ARM_SPE_HISI_HIP_L3_HITM		= 3,
>>> +	ARM_SPE_HISI_HIP_PEER_CLUSTER		= 4,
>>> +	ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM	= 5,
>>> +	ARM_SPE_HISI_HIP_REMOTE_SOCKET		= 6,
>>> +	ARM_SPE_HISI_HIP_REMOTE_SOCKET_HITM	= 7,
>>> +	ARM_SPE_HISI_HIP_LOCAL			= 8,
>>> +	ARM_SPE_HISI_HIP_REMOTE			= 9,
>>> +	ARM_SPE_HISI_HIP_NC_DEV			= 13,
>>> +	ARM_SPE_HISI_HIP_L2			= 16,
>>> +	ARM_SPE_HISI_HIP_L2_HITM		= 17,
>>> +	ARM_SPE_HISI_HIP_L1			= 18,
>>> +};
>>> +
>>>   struct arm_spe_record {
>>>   	enum arm_spe_sample_type type;
>>>   	int err;
>>> diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
>>> index 2a9775649cc2..eceae4219601 100644
>>> --- a/tools/perf/util/arm-spe.c
>>> +++ b/tools/perf/util/arm-spe.c
>>> @@ -571,6 +571,11 @@ static const struct midr_range ampereone_ds_encoding_cpus[] = {
>>>   	{},
>>>   };
>>> +static const struct midr_range hisi_hip_ds_encoding_cpus[] = {
>>> +	MIDR_ALL_VERSIONS(MIDR_HISI_HIP12),
>>> +	{},
>>> +};
>>> +
>>>   static void arm_spe__sample_flags(struct arm_spe_queue *speq)
>>>   {
>>>   	const struct arm_spe_record *record = &speq->decoder->record;
>>> @@ -718,9 +723,105 @@ static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *re
>>>   	arm_spe__synth_data_source_common(&common_record, data_src);
>>>   }
>>> +static void arm_spe__synth_data_source_hisi_hip(const struct arm_spe_record *record,
>>> +						union perf_mem_data_src *data_src)
>>> +{
>>> +	/* Use common synthesis method to handle store operations */
>>> +	if (record->op & ARM_SPE_OP_ST) {
>>> +		arm_spe__synth_data_source_common(record, data_src);
>>> +		return;
>>> +	}
>>> +
>>> +	switch (record->source) {
>>> +	case ARM_SPE_HISI_HIP_PEER_CPU:
>>> +		data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
>>> +		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
>>> +		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
>>> +		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
> 
> Seems it is conflict to set both 'PERF_MEM_SNOOP_NONE' and
> 'PERF_MEM_SNOOPX_PEER'.
> 
> Remove setting 'PERF_MEM_SNOOP_NONE' in this case?
> 
>>> +		break;
>>> +	case ARM_SPE_HISI_HIP_PEER_CPU_HITM:
>>> +		data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
>>> +		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
>>> +		data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
>>> +		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
>>> +		break;
>>> +	case ARM_SPE_HISI_HIP_L3:
>>> +		data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
>>> +		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
>>> +		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
>>> +		break;
>>> +	case ARM_SPE_HISI_HIP_L3_HITM:
>>> +		data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
>>> +		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
>>> +		data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
>>> +		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
>>> +		break;
>>> +	case ARM_SPE_HISI_HIP_PEER_CLUSTER:
>>> +		data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
>>> +		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
> 
> Seems to me, a CPU has L3 cache, would the cluster has a higher level's
> cache?

In my case, the cluster CPUs share the L3 cache and there's several clusters.
L3's the highest level cache in the system.

> 
>>> +		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
>>> +		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
> 
> Ditto for the confliction for the two flags 'SNOOP_NONE' and
> 'SNOOPX_PEER'.
> 
>>> +		break;
>>> +	case ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM:
>>> +		data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
>>> +		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
>>> +		data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
>>> +		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
>>> +		break;
>>> +	case ARM_SPE_HISI_HIP_REMOTE_SOCKET:
>>> +		data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2;
>>> +		data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
>>> +		data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
>>> +		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
>>
>> Hi Yicong,
>>
>> Is the mem_snoop setting missing from this one?
> 
> The field 'mem_snoopx' is an extension to the field 'mem_snoop'.
> 
> If the field 'mem_snoopx' is set, it is no need to set 'mem_snoop'.
> 

they should not be mutal exclusive. mem_snoopx provides the information where
the cacheline comes from while mem_snoop provides the status of the cacheline.
if hardware supports we can gather both information from the data source, like
above for ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM.

for other cases if there's mem_snoopx information I think mem_snoop can be dropped,
this won't make differeces. Checked c2c_decode_stats(), only PERF_MEM_SNOOP_HIT and
PERF_MEM_SNOOP_HITM is useful when summarizing c2c statistics.

Thanks.



More information about the linux-arm-kernel mailing list