[PATCH 03/10] perf: arm_spe: Add support for FEAT_SPE_EFT extended filtering

Leo Yan leo.yan at arm.com
Tue May 20 03:35:58 PDT 2025


On Tue, May 06, 2025 at 12:41:35PM +0100, James Clark wrote:
> FEAT_SPE_EFT (optional from Armv9.4) adds mask bits for the existing
> load, store and branch filters. It also adds two new filter bits for
> SIMD and floating point with their own associated mask bits. The current
> filters only allow OR filtering on samples that are load OR store etc,
> and the new mask bits allow setting part of the filter to an AND, for
> example filtering samples that are store AND SIMD. With mask bits set to
> 0, the OR behavior is preserved, so the unless any masks are explicitly
> set old filters will behave the same.
> 
> Add them all and make them behave the same way as existing format bits,
> hidden and return EOPNOTSUPP if set when the feature doesn't exist.
> 
> Signed-off-by: James Clark <james.clark at linaro.org>

Reviewed-by: Leo Yan <leo.yan at arm.com>

> ---
>  drivers/perf/arm_spe_pmu.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 64 insertions(+)
> 
> diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c
> index d9f6d229dce8..9309b846f642 100644
> --- a/drivers/perf/arm_spe_pmu.c
> +++ b/drivers/perf/arm_spe_pmu.c
> @@ -86,6 +86,7 @@ struct arm_spe_pmu {
>  #define SPE_PMU_FEAT_ERND			(1UL << 5)
>  #define SPE_PMU_FEAT_INV_FILT_EVT		(1UL << 6)
>  #define SPE_PMU_FEAT_DISCARD			(1UL << 7)
> +#define SPE_PMU_FEAT_EFT			(1UL << 8)
>  #define SPE_PMU_FEAT_DEV_PROBED			(1UL << 63)
>  	u64					features;
>  
> @@ -197,6 +198,27 @@ static const struct attribute_group arm_spe_pmu_cap_group = {
>  #define ATTR_CFG_FLD_discard_CFG		config	/* PMBLIMITR_EL1.FM = DISCARD */
>  #define ATTR_CFG_FLD_discard_LO			35
>  #define ATTR_CFG_FLD_discard_HI			35
> +#define ATTR_CFG_FLD_branch_filter_mask_CFG	config	/* PMSFCR_EL1.Bm */
> +#define ATTR_CFG_FLD_branch_filter_mask_LO	36
> +#define ATTR_CFG_FLD_branch_filter_mask_HI	36
> +#define ATTR_CFG_FLD_load_filter_mask_CFG	config	/* PMSFCR_EL1.LDm */
> +#define ATTR_CFG_FLD_load_filter_mask_LO	37
> +#define ATTR_CFG_FLD_load_filter_mask_HI	37
> +#define ATTR_CFG_FLD_store_filter_mask_CFG	config	/* PMSFCR_EL1.STm */
> +#define ATTR_CFG_FLD_store_filter_mask_LO	38
> +#define ATTR_CFG_FLD_store_filter_mask_HI	38
> +#define ATTR_CFG_FLD_simd_filter_CFG		config	/* PMSFCR_EL1.SIMD */
> +#define ATTR_CFG_FLD_simd_filter_LO		39
> +#define ATTR_CFG_FLD_simd_filter_HI		39
> +#define ATTR_CFG_FLD_simd_filter_mask_CFG	config	/* PMSFCR_EL1.SIMDm */
> +#define ATTR_CFG_FLD_simd_filter_mask_LO	40
> +#define ATTR_CFG_FLD_simd_filter_mask_HI	40
> +#define ATTR_CFG_FLD_float_filter_CFG		config	/* PMSFCR_EL1.FP */
> +#define ATTR_CFG_FLD_float_filter_LO		41
> +#define ATTR_CFG_FLD_float_filter_HI		41
> +#define ATTR_CFG_FLD_float_filter_mask_CFG	config	/* PMSFCR_EL1.FPm */
> +#define ATTR_CFG_FLD_float_filter_mask_LO	42
> +#define ATTR_CFG_FLD_float_filter_mask_HI	42
>  
>  #define ATTR_CFG_FLD_event_filter_CFG		config1	/* PMSEVFR_EL1 */
>  #define ATTR_CFG_FLD_event_filter_LO		0
> @@ -215,8 +237,15 @@ GEN_PMU_FORMAT_ATTR(pa_enable);
>  GEN_PMU_FORMAT_ATTR(pct_enable);
>  GEN_PMU_FORMAT_ATTR(jitter);
>  GEN_PMU_FORMAT_ATTR(branch_filter);
> +GEN_PMU_FORMAT_ATTR(branch_filter_mask);
>  GEN_PMU_FORMAT_ATTR(load_filter);
> +GEN_PMU_FORMAT_ATTR(load_filter_mask);
>  GEN_PMU_FORMAT_ATTR(store_filter);
> +GEN_PMU_FORMAT_ATTR(store_filter_mask);
> +GEN_PMU_FORMAT_ATTR(simd_filter);
> +GEN_PMU_FORMAT_ATTR(simd_filter_mask);
> +GEN_PMU_FORMAT_ATTR(float_filter);
> +GEN_PMU_FORMAT_ATTR(float_filter_mask);
>  GEN_PMU_FORMAT_ATTR(event_filter);
>  GEN_PMU_FORMAT_ATTR(inv_event_filter);
>  GEN_PMU_FORMAT_ATTR(min_latency);
> @@ -228,8 +257,15 @@ static struct attribute *arm_spe_pmu_formats_attr[] = {
>  	&format_attr_pct_enable.attr,
>  	&format_attr_jitter.attr,
>  	&format_attr_branch_filter.attr,
> +	&format_attr_branch_filter_mask.attr,
>  	&format_attr_load_filter.attr,
> +	&format_attr_load_filter_mask.attr,
>  	&format_attr_store_filter.attr,
> +	&format_attr_store_filter_mask.attr,
> +	&format_attr_simd_filter.attr,
> +	&format_attr_simd_filter_mask.attr,
> +	&format_attr_float_filter.attr,
> +	&format_attr_float_filter_mask.attr,
>  	&format_attr_event_filter.attr,
>  	&format_attr_inv_event_filter.attr,
>  	&format_attr_min_latency.attr,
> @@ -250,6 +286,16 @@ static umode_t arm_spe_pmu_format_attr_is_visible(struct kobject *kobj,
>  	if (attr == &format_attr_inv_event_filter.attr && !(spe_pmu->features & SPE_PMU_FEAT_INV_FILT_EVT))
>  		return 0;
>  
> +	if ((attr == &format_attr_branch_filter_mask.attr ||
> +	     attr == &format_attr_load_filter_mask.attr ||
> +	     attr == &format_attr_store_filter_mask.attr ||
> +	     attr == &format_attr_simd_filter.attr ||
> +	     attr == &format_attr_simd_filter_mask.attr ||
> +	     attr == &format_attr_float_filter.attr ||
> +	     attr == &format_attr_float_filter_mask.attr) &&
> +	     !(spe_pmu->features & SPE_PMU_FEAT_EFT))
> +		return 0;
> +
>  	return attr->mode;
>  }
>  
> @@ -341,8 +387,15 @@ static u64 arm_spe_event_to_pmsfcr(struct perf_event *event)
>  	u64 reg = 0;
>  
>  	reg |= FIELD_PREP(PMSFCR_EL1_LD, ATTR_CFG_GET_FLD(attr, load_filter));
> +	reg |= FIELD_PREP(PMSFCR_EL1_LDm, ATTR_CFG_GET_FLD(attr, load_filter_mask));
>  	reg |= FIELD_PREP(PMSFCR_EL1_ST, ATTR_CFG_GET_FLD(attr, store_filter));
> +	reg |= FIELD_PREP(PMSFCR_EL1_STm, ATTR_CFG_GET_FLD(attr, store_filter_mask));
>  	reg |= FIELD_PREP(PMSFCR_EL1_B, ATTR_CFG_GET_FLD(attr, branch_filter));
> +	reg |= FIELD_PREP(PMSFCR_EL1_Bm, ATTR_CFG_GET_FLD(attr, branch_filter_mask));
> +	reg |= FIELD_PREP(PMSFCR_EL1_SIMD, ATTR_CFG_GET_FLD(attr, simd_filter));
> +	reg |= FIELD_PREP(PMSFCR_EL1_SIMDm, ATTR_CFG_GET_FLD(attr, simd_filter_mask));
> +	reg |= FIELD_PREP(PMSFCR_EL1_FP, ATTR_CFG_GET_FLD(attr, float_filter));
> +	reg |= FIELD_PREP(PMSFCR_EL1_FPm, ATTR_CFG_GET_FLD(attr, float_filter_mask));
>  
>  	if (reg)
>  		reg |= PMSFCR_EL1_FT;
> @@ -716,6 +769,10 @@ static int arm_spe_pmu_event_init(struct perf_event *event)
>  	u64 reg;
>  	struct perf_event_attr *attr = &event->attr;
>  	struct arm_spe_pmu *spe_pmu = to_spe_pmu(event->pmu);
> +	const u64 feat_spe_eft_bits = PMSFCR_EL1_LDm | PMSFCR_EL1_STm |
> +				      PMSFCR_EL1_Bm | PMSFCR_EL1_SIMD |
> +				      PMSFCR_EL1_SIMDm | PMSFCR_EL1_FP |
> +				      PMSFCR_EL1_FPm;
>  
>  	/* This is, of course, deeply driver-specific */
>  	if (attr->type != event->pmu->type)
> @@ -761,6 +818,10 @@ static int arm_spe_pmu_event_init(struct perf_event *event)
>  	    !(spe_pmu->features & SPE_PMU_FEAT_FILT_LAT))
>  		return -EOPNOTSUPP;
>  
> +	if ((reg & feat_spe_eft_bits) &&
> +	    !(spe_pmu->features & SPE_PMU_FEAT_EFT))
> +		return -EOPNOTSUPP;
> +
>  	if (ATTR_CFG_GET_FLD(&event->attr, discard) &&
>  	    !(spe_pmu->features & SPE_PMU_FEAT_DISCARD))
>  		return -EOPNOTSUPP;
> @@ -1052,6 +1113,9 @@ static void __arm_spe_pmu_dev_probe(void *info)
>  	if (spe_pmu->pmsver >= ID_AA64DFR0_EL1_PMSVer_V1P2)
>  		spe_pmu->features |= SPE_PMU_FEAT_DISCARD;
>  
> +	if (FIELD_GET(PMSIDR_EL1_EFT, reg))
> +		spe_pmu->features |= SPE_PMU_FEAT_EFT;
> +
>  	/* This field has a spaced out encoding, so just use a look-up */
>  	fld = FIELD_GET(PMSIDR_EL1_INTERVAL, reg);
>  	switch (fld) {
> 
> -- 
> 2.34.1
> 



More information about the linux-arm-kernel mailing list