[PATCH] drivers/perf/arm_smmuv3_pmu: Support for PMU Snapshot

Mark Rutland mark.rutland at arm.com
Thu Apr 25 03:26:24 PDT 2024


On Tue, Apr 23, 2024 at 01:06:47PM +0000, Tarun Sahu wrote:
> This patch adds support to capture snapshot of all event counters for
> SMMUV3 PMU counters. The following functionalities are added:
> 1. Manually capture the snapshot
> 2. Capture the snapshot when an event counter overflows.
> 
> Test:
>      To manually capture the snapshot:
>      $ echo 1 > /sys/devices/smmuv3_pmcg_<device_id>/capture
>      'OR'
>      Configure an event for capturing the snapshot when it overflows
>      $ perf stat -e /smmuv3_pmcg_ff88840/transaction,filter_enable=1,
> 	overflow_capture=1,filter_span=1,filter_stream_id=0x42/ -a
> 	netperf
> 
>      To read the snapshot:
>      $ cat /sys/devices/smmuv3_pmcg_<device_id>/capture

Why is that a magic file in sysfs rather than being exposed via perf?

How does the first case even work? Where were the events configured?

Looking at the code, that just iteratres over the counters. You should be able
to achieve similar by creating a group of events, then reading the group leader
to snapshot the group (though this won't currently use the HW capture feature).

There's no rationale given here; what are you trying to use this for?

As it stands (and condsidering the above) I do not think this is the right
approach -- if nothing else I think the values should come via perf and not via
sysfs.

Mark.

> 
> Signed-off-by: Tarun Sahu <tarunsahu at google.com>
> ---
>  drivers/perf/arm_smmuv3_pmu.c | 77 +++++++++++++++++++++++++++++++++++
>  1 file changed, 77 insertions(+)
> 
> diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c
> index 6303b82566f9..6e2baa752ccc 100644
> --- a/drivers/perf/arm_smmuv3_pmu.c
> +++ b/drivers/perf/arm_smmuv3_pmu.c
> @@ -15,6 +15,7 @@
>   *   filter_enable    - 0 = no filtering, 1 = filtering enabled
>   *   filter_span      - 0 = exact match, 1 = pattern match
>   *   filter_stream_id - pattern to filter against
> + *   overflow_capture - capture all events when this event overflows
>   *
>   * To match a partial StreamID where the X most-significant bits must match
>   * but the Y least-significant bits might differ, STREAMID is programmed
> @@ -56,6 +57,8 @@
>  
>  #define SMMU_PMCG_EVCNTR0               0x0
>  #define SMMU_PMCG_EVCNTR(n, stride)     (SMMU_PMCG_EVCNTR0 + (n) * (stride))
> +#define SMMU_PMCG_SVR0                  0x0
> +#define SMMU_PMCG_SVR(n, stride)        (SMMU_PMCG_SVR0 + (n) * (stride))
>  #define SMMU_PMCG_EVTYPER0              0x400
>  #define SMMU_PMCG_EVTYPER(n)            (SMMU_PMCG_EVTYPER0 + (n) * 4)
>  #define SMMU_PMCG_SID_SPAN_SHIFT        29
> @@ -67,8 +70,11 @@
>  #define SMMU_PMCG_INTENCLR0             0xC60
>  #define SMMU_PMCG_OVSCLR0               0xC80
>  #define SMMU_PMCG_OVSSET0               0xCC0
> +#define SMMU_PMCG_CAPR                  0xD88
> +#define SMMU_PMCG_CAPR_CAPTURE          BIT(0)
>  #define SMMU_PMCG_CFGR                  0xE00
>  #define SMMU_PMCG_CFGR_SID_FILTER_TYPE  BIT(23)
> +#define SMMU_PMCG_CFGR_CAPTURE          BIT(22)
>  #define SMMU_PMCG_CFGR_MSI              BIT(21)
>  #define SMMU_PMCG_CFGR_RELOC_CTRS       BIT(20)
>  #define SMMU_PMCG_CFGR_SIZE             GENMASK(13, 8)
> @@ -135,6 +141,7 @@ struct smmu_pmu {
>  	u32 options;
>  	u32 iidr;
>  	bool global_filter;
> +	bool capture;
>  };
>  
>  #define to_smmu_pmu(p) (container_of(p, struct smmu_pmu, pmu))
> @@ -147,6 +154,7 @@ struct smmu_pmu {
>  	}                                                                  \
>  
>  SMMU_PMU_EVENT_ATTR_EXTRACTOR(event, config, 0, 15);
> +SMMU_PMU_EVENT_ATTR_EXTRACTOR(overflow_capture, config, 31, 31);
>  SMMU_PMU_EVENT_ATTR_EXTRACTOR(filter_stream_id, config1, 0, 31);
>  SMMU_PMU_EVENT_ATTR_EXTRACTOR(filter_span, config1, 32, 32);
>  SMMU_PMU_EVENT_ATTR_EXTRACTOR(filter_enable, config1, 33, 33);
> @@ -219,6 +227,18 @@ static inline u64 smmu_pmu_counter_get_value(struct smmu_pmu *smmu_pmu, u32 idx)
>  	return value;
>  }
>  
> +static inline u64 smmu_pmu_svr_get_value(struct smmu_pmu *smmu_pmu, u32 idx)
> +{
> +	u64 value;
> +
> +	if (smmu_pmu->counter_mask & BIT(32))
> +		value = readq(smmu_pmu->reloc_base + SMMU_PMCG_SVR(idx, 8));
> +	else
> +		value = readl(smmu_pmu->reloc_base + SMMU_PMCG_SVR(idx, 4));
> +
> +	return value;
> +}
> +
>  static inline void smmu_pmu_counter_enable(struct smmu_pmu *smmu_pmu, u32 idx)
>  {
>  	writeq(BIT(idx), smmu_pmu->reg_base + SMMU_PMCG_CNTENSET0);
> @@ -306,6 +326,7 @@ static void smmu_pmu_set_event_filter(struct perf_event *event,
>  	u32 evtyper;
>  
>  	evtyper = get_event(event) | span << SMMU_PMCG_SID_SPAN_SHIFT;
> +	evtyper |= get_overflow_capture(event);
>  	smmu_pmu_set_evtyper(smmu_pmu, idx, evtyper);
>  	smmu_pmu_set_smr(smmu_pmu, idx, sid);
>  }
> @@ -549,6 +570,54 @@ static const struct attribute_group smmu_pmu_cpumask_group = {
>  	.attrs = smmu_pmu_cpumask_attrs,
>  };
>  
> +static ssize_t smmu_pmu_capture_show(struct device *dev,
> +				     struct device_attribute *attr,
> +				     char *buf)
> +{
> +	struct smmu_pmu *smmu_pmu = to_smmu_pmu(dev_get_drvdata(dev));
> +	u32 capture, i;
> +
> +	if (!smmu_pmu->capture)
> +		return -EPERM;
> +	capture = readl_relaxed(smmu_pmu->reg_base + SMMU_PMCG_CAPR) &
> +			  SMMU_PMCG_CAPR_CAPTURE;
> +	if (capture == 0) {
> +		for (i = 0; i < SMMU_PMCG_MAX_COUNTERS; i++) {
> +			capture = sprintf(buf, "%s%d %llu\n", buf, i,
> +					smmu_pmu_svr_get_value(smmu_pmu, i));
> +		}
> +	}
> +	return capture;
> +}
> +
> +static ssize_t smmu_pmu_capture_store(struct device *dev,
> +				     struct device_attribute *attr,
> +				     const char *buf, size_t count)
> +{
> +	struct smmu_pmu *smmu_pmu = to_smmu_pmu(dev_get_drvdata(dev));
> +	unsigned long val;
> +
> +	if (!smmu_pmu->capture)
> +		return -EPERM;
> +	if (kstrtoul(buf, 0, &val) == 0 && val == 1) {
> +		writeq_relaxed(1, smmu_pmu->reg_base + SMMU_PMCG_CAPR);
> +		return count;
> +	}
> +	return -EINVAL;
> +}
> +
> +static struct device_attribute smmu_pmu_capture_attr =
> +		__ATTR(capture, 0644, smmu_pmu_capture_show, smmu_pmu_capture_store);
> +
> +static struct attribute *smmu_pmu_capture_attrs[] = {
> +	&smmu_pmu_capture_attr.attr,
> +	NULL
> +};
> +
> +static const struct attribute_group smmu_pmu_capture_group = {
> +	.attrs = smmu_pmu_capture_attrs,
> +};
> +
>  /* Events */
>  
>  static ssize_t smmu_pmu_event_show(struct device *dev,
> @@ -633,12 +702,14 @@ static const struct attribute_group smmu_pmu_identifier_group = {
>  
>  /* Formats */
>  PMU_FORMAT_ATTR(event,		   "config:0-15");
> +PMU_FORMAT_ATTR(overflow_capture, "config:31");
>  PMU_FORMAT_ATTR(filter_stream_id,  "config1:0-31");
>  PMU_FORMAT_ATTR(filter_span,	   "config1:32");
>  PMU_FORMAT_ATTR(filter_enable,	   "config1:33");
>  
>  static struct attribute *smmu_pmu_formats[] = {
>  	&format_attr_event.attr,
> +	&format_attr_overflow_capture.attr,
>  	&format_attr_filter_stream_id.attr,
>  	&format_attr_filter_span.attr,
>  	&format_attr_filter_enable.attr,
> @@ -651,6 +722,7 @@ static const struct attribute_group smmu_pmu_format_group = {
>  };
>  
>  static const struct attribute_group *smmu_pmu_attr_grps[] = {
> +	&smmu_pmu_capture_group,
>  	&smmu_pmu_cpumask_group,
>  	&smmu_pmu_events_group,
>  	&smmu_pmu_format_group,
> @@ -888,6 +960,11 @@ static int smmu_pmu_probe(struct platform_device *pdev)
>  		smmu_pmu->reloc_base = smmu_pmu->reg_base;
>  	}
>  
> +	if (cfgr & SMMU_PMCG_CFGR_CAPTURE)
> +		smmu_pmu->capture = true;
> +	else
> +		smmu_pmu->capture = false;
> +
>  	irq = platform_get_irq_optional(pdev, 0);
>  	if (irq > 0)
>  		smmu_pmu->irq = irq;
> -- 
> 2.44.0.769.g3c40516874-goog
> 



More information about the linux-arm-kernel mailing list