[PATCH V11 05/10] arm64/perf: Add branch stack support in ARMV8 PMU

Namhyung Kim namhyung at kernel.org
Thu Jun 1 19:33:54 PDT 2023


On Tue, May 30, 2023 at 9:27 PM Anshuman Khandual
<anshuman.khandual at arm.com> wrote:
>
> This enables support for branch stack sampling event in ARMV8 PMU, checking
> has_branch_stack() on the event inside 'struct arm_pmu' callbacks. Although
> these branch stack helpers armv8pmu_branch_XXXXX() are just dummy functions
> for now. While here, this also defines arm_pmu's sched_task() callback with
> armv8pmu_sched_task(), which resets the branch record buffer on a sched_in.
>
> Cc: Catalin Marinas <catalin.marinas at arm.com>
> Cc: Will Deacon <will at kernel.org>
> Cc: Mark Rutland <mark.rutland at arm.com>
> Cc: linux-arm-kernel at lists.infradead.org
> Cc: linux-kernel at vger.kernel.org
> Tested-by: James Clark <james.clark at arm.com>
> Signed-off-by: Anshuman Khandual <anshuman.khandual at arm.com>
> ---
>  arch/arm64/include/asm/perf_event.h | 33 +++++++++++++
>  drivers/perf/arm_pmuv3.c            | 76 ++++++++++++++++++++---------
>  2 files changed, 86 insertions(+), 23 deletions(-)
>
> diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h
> index eb7071c9eb34..7548813783ba 100644
> --- a/arch/arm64/include/asm/perf_event.h
> +++ b/arch/arm64/include/asm/perf_event.h
> @@ -24,4 +24,37 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs);
>         (regs)->pstate = PSR_MODE_EL1h; \
>  }
>
> +struct pmu_hw_events;
> +struct arm_pmu;
> +struct perf_event;
> +
> +#ifdef CONFIG_PERF_EVENTS
> +static inline bool has_branch_stack(struct perf_event *event);
> +
> +static inline void armv8pmu_branch_read(struct pmu_hw_events *cpuc, struct perf_event *event)
> +{
> +       WARN_ON_ONCE(!has_branch_stack(event));
> +}
> +
> +static inline bool armv8pmu_branch_valid(struct perf_event *event)
> +{
> +       WARN_ON_ONCE(!has_branch_stack(event));
> +       return false;
> +}
> +
> +static inline void armv8pmu_branch_enable(struct perf_event *event)
> +{
> +       WARN_ON_ONCE(!has_branch_stack(event));
> +}
> +
> +static inline void armv8pmu_branch_disable(struct perf_event *event)
> +{
> +       WARN_ON_ONCE(!has_branch_stack(event));
> +}
> +
> +static inline void armv8pmu_branch_probe(struct arm_pmu *arm_pmu) { }
> +static inline void armv8pmu_branch_reset(void) { }
> +static inline int armv8pmu_private_alloc(struct arm_pmu *arm_pmu) { return 0; }
> +static inline void armv8pmu_private_free(struct arm_pmu *arm_pmu) { }
> +#endif
>  #endif
> diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c
> index c98e4039386d..86d803ff1ae3 100644
> --- a/drivers/perf/arm_pmuv3.c
> +++ b/drivers/perf/arm_pmuv3.c
> @@ -705,38 +705,21 @@ static void armv8pmu_enable_event(struct perf_event *event)
>          * Enable counter and interrupt, and set the counter to count
>          * the event that we're interested in.
>          */
> -
> -       /*
> -        * Disable counter
> -        */
>         armv8pmu_disable_event_counter(event);
> -
> -       /*
> -        * Set event.
> -        */
>         armv8pmu_write_event_type(event);
> -
> -       /*
> -        * Enable interrupt for this counter
> -        */
>         armv8pmu_enable_event_irq(event);
> -
> -       /*
> -        * Enable counter
> -        */
>         armv8pmu_enable_event_counter(event);
> +
> +       if (has_branch_stack(event))
> +               armv8pmu_branch_enable(event);
>  }
>
>  static void armv8pmu_disable_event(struct perf_event *event)
>  {
> -       /*
> -        * Disable counter
> -        */
> -       armv8pmu_disable_event_counter(event);
> +       if (has_branch_stack(event))
> +               armv8pmu_branch_disable(event);
>
> -       /*
> -        * Disable interrupt for this counter
> -        */
> +       armv8pmu_disable_event_counter(event);
>         armv8pmu_disable_event_irq(event);
>  }
>
> @@ -814,6 +797,11 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu)
>                 if (!armpmu_event_set_period(event))
>                         continue;
>
> +               if (has_branch_stack(event) && !WARN_ON(!cpuc->branches)) {
> +                       armv8pmu_branch_read(cpuc, event);
> +                       perf_sample_save_brstack(&data, event, &cpuc->branches->branch_stack);
> +               }
> +
>                 /*
>                  * Perf event overflow will queue the processing of the event as
>                  * an irq_work which will be taken care of in the handling of
> @@ -912,6 +900,14 @@ static int armv8pmu_user_event_idx(struct perf_event *event)
>         return event->hw.idx;
>  }
>
> +static void armv8pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
> +{
> +       struct arm_pmu *armpmu = to_arm_pmu(pmu_ctx->pmu);
> +
> +       if (sched_in && arm_pmu_branch_stack_supported(armpmu))
> +               armv8pmu_branch_reset();
> +}
> +
>  /*
>   * Add an event filter to a given event.
>   */
> @@ -982,6 +978,9 @@ static void armv8pmu_reset(void *info)
>                 pmcr |= ARMV8_PMU_PMCR_LP;
>
>         armv8pmu_pmcr_write(pmcr);
> +
> +       if (arm_pmu_branch_stack_supported(cpu_pmu))
> +               armv8pmu_branch_reset();
>  }
>
>  static int __armv8_pmuv3_map_event_id(struct arm_pmu *armpmu,
> @@ -1019,6 +1018,9 @@ static int __armv8_pmuv3_map_event(struct perf_event *event,
>
>         hw_event_id = __armv8_pmuv3_map_event_id(armpmu, event);
>
> +       if (has_branch_stack(event) && !armv8pmu_branch_valid(event))
> +               return -EOPNOTSUPP;
> +
>         /*
>          * CHAIN events only work when paired with an adjacent counter, and it
>          * never makes sense for a user to open one in isolation, as they'll be
> @@ -1135,6 +1137,21 @@ static void __armv8pmu_probe_pmu(void *info)
>                 cpu_pmu->reg_pmmir = read_pmmir();
>         else
>                 cpu_pmu->reg_pmmir = 0;
> +       armv8pmu_branch_probe(cpu_pmu);
> +}
> +
> +static int branch_records_alloc(struct arm_pmu *armpmu)
> +{
> +       struct pmu_hw_events *events;
> +       int cpu;
> +
> +       for_each_possible_cpu(cpu) {
> +               events = per_cpu_ptr(armpmu->hw_events, cpu);
> +               events->branches = kzalloc(sizeof(struct branch_records), GFP_KERNEL);
> +               if (!events->branches)
> +                       return -ENOMEM;
> +       }
> +       return 0;
>  }
>
>  static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu)
> @@ -1145,12 +1162,24 @@ static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu)
>         };
>         int ret;
>
> +       ret = armv8pmu_private_alloc(cpu_pmu);
> +       if (ret)
> +               return ret;

Wouldn't it be better to move it under the if statement below
if it's only needed for branch stack?

> +
>         ret = smp_call_function_any(&cpu_pmu->supported_cpus,
>                                     __armv8pmu_probe_pmu,
>                                     &probe, 1);
>         if (ret)
>                 return ret;

Otherwise you might need to free it here.

>
> +       if (arm_pmu_branch_stack_supported(cpu_pmu)) {
> +               ret = branch_records_alloc(cpu_pmu);
> +               if (ret)
> +                       return ret;

And here too.

Thanks,
Namhyung


> +       } else {
> +               armv8pmu_private_free(cpu_pmu);
> +       }
> +
>         return probe.present ? 0 : -ENODEV;
>  }
>
> @@ -1214,6 +1243,7 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name,
>         cpu_pmu->set_event_filter       = armv8pmu_set_event_filter;
>
>         cpu_pmu->pmu.event_idx          = armv8pmu_user_event_idx;
> +       cpu_pmu->sched_task             = armv8pmu_sched_task;
>
>         cpu_pmu->name                   = name;
>         cpu_pmu->map_event              = map_event;
> --
> 2.25.1
>



More information about the linux-arm-kernel mailing list