[PATCH] drivers/perf/arm_smmuv3_pmu: Support for PMU Snapshot

Robin Murphy robin.murphy at arm.com
Thu Apr 25 06:38:55 PDT 2024


On 25/04/2024 11:26 am, Mark Rutland wrote:
> On Tue, Apr 23, 2024 at 01:06:47PM +0000, Tarun Sahu wrote:
>> This patch adds support to capture snapshot of all event counters for
>> SMMUV3 PMU counters. The following functionalities are added:
>> 1. Manually capture the snapshot
>> 2. Capture the snapshot when an event counter overflows.
>>
>> Test:
>>       To manually capture the snapshot:
>>       $ echo 1 > /sys/devices/smmuv3_pmcg_<device_id>/capture
>>       'OR'
>>       Configure an event for capturing the snapshot when it overflows
>>       $ perf stat -e /smmuv3_pmcg_ff88840/transaction,filter_enable=1,
>> 	overflow_capture=1,filter_span=1,filter_stream_id=0x42/ -a
>> 	netperf
>>
>>       To read the snapshot:
>>       $ cat /sys/devices/smmuv3_pmcg_<device_id>/capture
> 
> Why is that a magic file in sysfs rather than being exposed via perf?
> 
> How does the first case even work? Where were the events configured?

More to the point, how does *either* case provide meaningful 
information? What the interface presents is... some numbers, which 
reflect some partial counts of some unknown set of PMU events from some 
unknown point(s) in time - I can't imagine a user being able to draw any 
useful conclusion from that, unless they simply had a strong desire to 
be able to confirm "Yes, these are indeed some numbers."

> Looking at the code, that just iteratres over the counters. You should be able
> to achieve similar by creating a group of events, then reading the group leader
> to snapshot the group (though this won't currently use the HW capture feature).

FWIW after spending several days attempting to use the equivalent 
snapshot feature for reading in arm-cmn, I came to the conclusion that 
even for a driver's internal implementation, it's just not a very useful 
thing in general for how the perf APIs expect to work.

Thanks,
Robin.

> 
> There's no rationale given here; what are you trying to use this for?
> 
> As it stands (and condsidering the above) I do not think this is the right
> approach -- if nothing else I think the values should come via perf and not via
> sysfs.
> 
> Mark.
> 
>>
>> Signed-off-by: Tarun Sahu <tarunsahu at google.com>
>> ---
>>   drivers/perf/arm_smmuv3_pmu.c | 77 +++++++++++++++++++++++++++++++++++
>>   1 file changed, 77 insertions(+)
>>
>> diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c
>> index 6303b82566f9..6e2baa752ccc 100644
>> --- a/drivers/perf/arm_smmuv3_pmu.c
>> +++ b/drivers/perf/arm_smmuv3_pmu.c
>> @@ -15,6 +15,7 @@
>>    *   filter_enable    - 0 = no filtering, 1 = filtering enabled
>>    *   filter_span      - 0 = exact match, 1 = pattern match
>>    *   filter_stream_id - pattern to filter against
>> + *   overflow_capture - capture all events when this event overflows
>>    *
>>    * To match a partial StreamID where the X most-significant bits must match
>>    * but the Y least-significant bits might differ, STREAMID is programmed
>> @@ -56,6 +57,8 @@
>>   
>>   #define SMMU_PMCG_EVCNTR0               0x0
>>   #define SMMU_PMCG_EVCNTR(n, stride)     (SMMU_PMCG_EVCNTR0 + (n) * (stride))
>> +#define SMMU_PMCG_SVR0                  0x0
>> +#define SMMU_PMCG_SVR(n, stride)        (SMMU_PMCG_SVR0 + (n) * (stride))
>>   #define SMMU_PMCG_EVTYPER0              0x400
>>   #define SMMU_PMCG_EVTYPER(n)            (SMMU_PMCG_EVTYPER0 + (n) * 4)
>>   #define SMMU_PMCG_SID_SPAN_SHIFT        29
>> @@ -67,8 +70,11 @@
>>   #define SMMU_PMCG_INTENCLR0             0xC60
>>   #define SMMU_PMCG_OVSCLR0               0xC80
>>   #define SMMU_PMCG_OVSSET0               0xCC0
>> +#define SMMU_PMCG_CAPR                  0xD88
>> +#define SMMU_PMCG_CAPR_CAPTURE          BIT(0)
>>   #define SMMU_PMCG_CFGR                  0xE00
>>   #define SMMU_PMCG_CFGR_SID_FILTER_TYPE  BIT(23)
>> +#define SMMU_PMCG_CFGR_CAPTURE          BIT(22)
>>   #define SMMU_PMCG_CFGR_MSI              BIT(21)
>>   #define SMMU_PMCG_CFGR_RELOC_CTRS       BIT(20)
>>   #define SMMU_PMCG_CFGR_SIZE             GENMASK(13, 8)
>> @@ -135,6 +141,7 @@ struct smmu_pmu {
>>   	u32 options;
>>   	u32 iidr;
>>   	bool global_filter;
>> +	bool capture;
>>   };
>>   
>>   #define to_smmu_pmu(p) (container_of(p, struct smmu_pmu, pmu))
>> @@ -147,6 +154,7 @@ struct smmu_pmu {
>>   	}                                                                  \
>>   
>>   SMMU_PMU_EVENT_ATTR_EXTRACTOR(event, config, 0, 15);
>> +SMMU_PMU_EVENT_ATTR_EXTRACTOR(overflow_capture, config, 31, 31);
>>   SMMU_PMU_EVENT_ATTR_EXTRACTOR(filter_stream_id, config1, 0, 31);
>>   SMMU_PMU_EVENT_ATTR_EXTRACTOR(filter_span, config1, 32, 32);
>>   SMMU_PMU_EVENT_ATTR_EXTRACTOR(filter_enable, config1, 33, 33);
>> @@ -219,6 +227,18 @@ static inline u64 smmu_pmu_counter_get_value(struct smmu_pmu *smmu_pmu, u32 idx)
>>   	return value;
>>   }
>>   
>> +static inline u64 smmu_pmu_svr_get_value(struct smmu_pmu *smmu_pmu, u32 idx)
>> +{
>> +	u64 value;
>> +
>> +	if (smmu_pmu->counter_mask & BIT(32))
>> +		value = readq(smmu_pmu->reloc_base + SMMU_PMCG_SVR(idx, 8));
>> +	else
>> +		value = readl(smmu_pmu->reloc_base + SMMU_PMCG_SVR(idx, 4));
>> +
>> +	return value;
>> +}
>> +
>>   static inline void smmu_pmu_counter_enable(struct smmu_pmu *smmu_pmu, u32 idx)
>>   {
>>   	writeq(BIT(idx), smmu_pmu->reg_base + SMMU_PMCG_CNTENSET0);
>> @@ -306,6 +326,7 @@ static void smmu_pmu_set_event_filter(struct perf_event *event,
>>   	u32 evtyper;
>>   
>>   	evtyper = get_event(event) | span << SMMU_PMCG_SID_SPAN_SHIFT;
>> +	evtyper |= get_overflow_capture(event);
>>   	smmu_pmu_set_evtyper(smmu_pmu, idx, evtyper);
>>   	smmu_pmu_set_smr(smmu_pmu, idx, sid);
>>   }
>> @@ -549,6 +570,54 @@ static const struct attribute_group smmu_pmu_cpumask_group = {
>>   	.attrs = smmu_pmu_cpumask_attrs,
>>   };
>>   
>> +static ssize_t smmu_pmu_capture_show(struct device *dev,
>> +				     struct device_attribute *attr,
>> +				     char *buf)
>> +{
>> +	struct smmu_pmu *smmu_pmu = to_smmu_pmu(dev_get_drvdata(dev));
>> +	u32 capture, i;
>> +
>> +	if (!smmu_pmu->capture)
>> +		return -EPERM;
>> +	capture = readl_relaxed(smmu_pmu->reg_base + SMMU_PMCG_CAPR) &
>> +			  SMMU_PMCG_CAPR_CAPTURE;
>> +	if (capture == 0) {
>> +		for (i = 0; i < SMMU_PMCG_MAX_COUNTERS; i++) {
>> +			capture = sprintf(buf, "%s%d %llu\n", buf, i,
>> +					smmu_pmu_svr_get_value(smmu_pmu, i));
>> +		}
>> +	}
>> +	return capture;
>> +}
>> +
>> +static ssize_t smmu_pmu_capture_store(struct device *dev,
>> +				     struct device_attribute *attr,
>> +				     const char *buf, size_t count)
>> +{
>> +	struct smmu_pmu *smmu_pmu = to_smmu_pmu(dev_get_drvdata(dev));
>> +	unsigned long val;
>> +
>> +	if (!smmu_pmu->capture)
>> +		return -EPERM;
>> +	if (kstrtoul(buf, 0, &val) == 0 && val == 1) {
>> +		writeq_relaxed(1, smmu_pmu->reg_base + SMMU_PMCG_CAPR);
>> +		return count;
>> +	}
>> +	return -EINVAL;
>> +}
>> +
>> +static struct device_attribute smmu_pmu_capture_attr =
>> +		__ATTR(capture, 0644, smmu_pmu_capture_show, smmu_pmu_capture_store);
>> +
>> +static struct attribute *smmu_pmu_capture_attrs[] = {
>> +	&smmu_pmu_capture_attr.attr,
>> +	NULL
>> +};
>> +
>> +static const struct attribute_group smmu_pmu_capture_group = {
>> +	.attrs = smmu_pmu_capture_attrs,
>> +};
>> +
>>   /* Events */
>>   
>>   static ssize_t smmu_pmu_event_show(struct device *dev,
>> @@ -633,12 +702,14 @@ static const struct attribute_group smmu_pmu_identifier_group = {
>>   
>>   /* Formats */
>>   PMU_FORMAT_ATTR(event,		   "config:0-15");
>> +PMU_FORMAT_ATTR(overflow_capture, "config:31");
>>   PMU_FORMAT_ATTR(filter_stream_id,  "config1:0-31");
>>   PMU_FORMAT_ATTR(filter_span,	   "config1:32");
>>   PMU_FORMAT_ATTR(filter_enable,	   "config1:33");
>>   
>>   static struct attribute *smmu_pmu_formats[] = {
>>   	&format_attr_event.attr,
>> +	&format_attr_overflow_capture.attr,
>>   	&format_attr_filter_stream_id.attr,
>>   	&format_attr_filter_span.attr,
>>   	&format_attr_filter_enable.attr,
>> @@ -651,6 +722,7 @@ static const struct attribute_group smmu_pmu_format_group = {
>>   };
>>   
>>   static const struct attribute_group *smmu_pmu_attr_grps[] = {
>> +	&smmu_pmu_capture_group,
>>   	&smmu_pmu_cpumask_group,
>>   	&smmu_pmu_events_group,
>>   	&smmu_pmu_format_group,
>> @@ -888,6 +960,11 @@ static int smmu_pmu_probe(struct platform_device *pdev)
>>   		smmu_pmu->reloc_base = smmu_pmu->reg_base;
>>   	}
>>   
>> +	if (cfgr & SMMU_PMCG_CFGR_CAPTURE)
>> +		smmu_pmu->capture = true;
>> +	else
>> +		smmu_pmu->capture = false;
>> +
>>   	irq = platform_get_irq_optional(pdev, 0);
>>   	if (irq > 0)
>>   		smmu_pmu->irq = irq;
>> -- 
>> 2.44.0.769.g3c40516874-goog
>>
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel



More information about the linux-arm-kernel mailing list