[PATCH 4/4] perf: arm_cspmu: ampere_cspmu: Add support for Ampere SoC PMU

Thu Jun 22 01:51:43 PDT 2023

On Wed, 21 Jun 2023 18:11:41 -0700
Ilkka Koskinen <ilkka at os.amperecomputing.com> wrote:

> Ampere SoC PMU follows CoreSight PMU architecture. It uses implementation
> specific registers to filter events rather than PMEVFILTnR registers.
> 
> Signed-off-by: Ilkka Koskinen <ilkka at os.amperecomputing.com>
Hi Ilkka,

Drive by review so not super detailed (I was curious) but a few questions/comments inline.

Jonathan

> ---
>  .../admin-guide/perf/ampere_cspmu.rst         |  29 +++
>  drivers/perf/arm_cspmu/Makefile               |   2 +-
>  drivers/perf/arm_cspmu/ampere_cspmu.c         | 232 ++++++++++++++++++
>  drivers/perf/arm_cspmu/ampere_cspmu.h         |  17 ++
>  drivers/perf/arm_cspmu/arm_cspmu.c            |   7 +
>  5 files changed, 286 insertions(+), 1 deletion(-)
>  create mode 100644 Documentation/admin-guide/perf/ampere_cspmu.rst
>  create mode 100644 drivers/perf/arm_cspmu/ampere_cspmu.c
>  create mode 100644 drivers/perf/arm_cspmu/ampere_cspmu.h
> 
> diff --git a/Documentation/admin-guide/perf/ampere_cspmu.rst b/Documentation/admin-guide/perf/ampere_cspmu.rst
> new file mode 100644
> index 000000000000..bf86bffeef63
> --- /dev/null
> +++ b/Documentation/admin-guide/perf/ampere_cspmu.rst
> @@ -0,0 +1,29 @@

> +
> +Example for perf tool use::
> +
> +  / # perf list ampere
> +
> +    ampere_mcu_pmu_0/act_sent/                         [Kernel PMU event]
> +    <...>
> +    ampere_mcu_pmu_1/rd_sent/                          [Kernel PMU event]
> +    <...>
> +
> +  / # perf stat -a -e ampere_mcu_pmu_0/act_sent,filter_enable=3,bank=5,rank=3,threshold=2/,ampere_mcu_pmu_1/rd_sent/ \
> +        sleep 1

Why filter_enable=3?

> +static u32 ampere_cspmu_event_filter(const struct perf_event *event)
> +{

Whilst lots of other comments on this - perhaps add another one here to
why this is a noop.

> +	return 0;
> +}
> +
> +static void ampere_cspmu_set_ev_filter(struct arm_cspmu *cspmu,
> +				       struct hw_perf_event *hwc,
> +				       u32 filter)
> +{
> +	struct perf_event *event;
> +	unsigned int idx;
> +	u32 threshold, rank, bank;
> +
> +	/*
> +	 * At this point, all the events have the same filter settings.
> +	 * Therefore, take the first event and use its configuration.
> +	 */
> +	idx = find_first_bit(cspmu->hw_events.used_ctrs,
> +			     cspmu->cycle_counter_logical_idx);
> +
> +	event = cspmu->hw_events.events[idx];
> +
> +	threshold	= get_threshold(event);
> +	rank		= get_rank(event);
> +	bank		= get_bank(event);
> +
> +	writel(threshold, cspmu->base0 + PMAUXR0);
> +	writel(rank, cspmu->base0 + PMAUXR1);
> +	writel(bank, cspmu->base0 + PMAUXR2);
> +}
> +
> +static int ampere_cspmu_validate_configs(struct perf_event *event,
> +					 struct perf_event *event2)
> +{
> +	if (get_threshold(event) != get_threshold(event2) ||
> +	    get_rank(event) != get_rank(event2) ||
> +	    get_bank(event) != get_bank(event2))
> +		return -EINVAL;
> +
> +	return 0;
> +}
> +
> +static int ampere_cspmu_validate_event(struct arm_cspmu *cspmu,
> +				       struct perf_event *new)
> +{
> +	struct perf_event *curr, *leader = new->group_leader;
> +	unsigned int idx;
> +	int ret;
> +
> +	ret = ampere_cspmu_validate_configs(new, leader);
> +	if (ret)
> +		return ret;
> +
> +	/* We compare the global filter settings to existing events */
> +	idx = find_first_bit(cspmu->hw_events.used_ctrs,
> +			     cspmu->cycle_counter_logical_idx);
> +
> +	/* This is the first event */

Maybe add why that matters to the comment?

> +	if (idx == cspmu->cycle_counter_logical_idx)
> +		return 0;
> +
> +	curr = cspmu->hw_events.events[idx];
> +
> +	return ampere_cspmu_validate_configs(curr, new);
> +}
> +
> +static char *ampere_cspmu_format_name(const struct arm_cspmu *cspmu,
> +				      const char *name_pattern)
> +{
> +	struct device *dev = cspmu->dev;
> +	static atomic_t pmu_generic_idx = {0};

Why not an ida?

If the pmu drivers ever become easy to unbind then you won't get ID
reusage like this an eventually you will run into overflow problems.

> +
> +	return devm_kasprintf(dev, GFP_KERNEL, name_pattern,
> +			      atomic_fetch_inc(&pmu_generic_idx));
> +}
> +
> +int ampere_cspmu_init_ops(struct arm_cspmu *cspmu)
> +{
> +	struct device *dev = cspmu->dev;
> +	struct ampere_cspmu_ctx *ctx;
> +	struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops;
> +
> +	ctx = devm_kzalloc(dev, sizeof(struct ampere_cspmu_ctx), GFP_KERNEL);
> +	if (!ctx)
> +		return -ENOMEM;
> +
> +	ctx->event_attr		= ampereone_mcu_pmu_event_attrs;
> +	ctx->format_attr	= ampereone_mcu_format_attrs;
> +	ctx->name		= ampere_cspmu_format_name(cspmu,
> +							   "ampere_mcu_pmu_%u");

Long line and need to break avoided if you don't bother trying to align the = signs...
Personally I don't like this style as it causes a lot of churn as drivers
evolve, but meh, it's up to you.

Given the result is confusing if the allocation fails (name not what is expected)
I would also check that allocation and error out if it fails.  Obviously it won't
under realistic circumstances, but a bit of paranoia never hurt anyone.

> +	cspmu->impl.ctx = ctx;
> +
> +	impl_ops->event_filter		= ampere_cspmu_event_filter;
> +	impl_ops->set_ev_filter		= ampere_cspmu_set_ev_filter;
> +	impl_ops->validate_event	= ampere_cspmu_validate_event;
> +	impl_ops->get_name		= ampere_cspmu_get_name;
> +	impl_ops->get_event_attrs	= ampere_cspmu_get_event_attrs;
> +	impl_ops->get_format_attrs	= ampere_cspmu_get_format_attrs;
> +
> +	return 0;
> +}
> +
> +MODULE_LICENSE("GPL v2");

...

> diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c
> index 471d6d7ac81a..587515eea0b4 100644
> --- a/drivers/perf/arm_cspmu/arm_cspmu.c
> +++ b/drivers/perf/arm_cspmu/arm_cspmu.c
> @@ -29,6 +29,7 @@
>  #include <linux/perf_event.h>
>  #include <linux/platform_device.h>
>  
> +#include "ampere_cspmu.h"

I'd be tempted to keep the generic header in a separate block then
follow with the vendor ones.  Not particularly important though.

>  #include "arm_cspmu.h"
>  #include "nvidia_cspmu.h"
>  
> @@ -114,6 +115,7 @@
>  
>  /* JEDEC-assigned JEP106 identification code */
>  #define ARM_CSPMU_IMPL_ID_NVIDIA		0x36B
> +#define ARM_CSPMU_IMPL_ID_AMPERE		0xA16
>  
>  static unsigned long arm_cspmu_cpuhp_state;
>  
> @@ -388,6 +390,11 @@ static const struct impl_match impl_match[] = {
>  	  .mask = ARM_CSPMU_PMIIDR_IMPLEMENTER,
>  	  .impl_init_ops = nv_cspmu_init_ops
>  	},
> +	{
> +	  .pmiidr = ARM_CSPMU_IMPL_ID_AMPERE,
> +	  .mask = ARM_CSPMU_PMIIDR_IMPLEMENTER,
> +	  .impl_init_ops = ampere_cspmu_init_ops
> +	},
>  	{}
>  };
>