[RFC PATCH 5/6] riscv: perf: introduce DT mechanism

Anup Patel anup at brainfault.org
Mon Jun 29 00:36:56 EDT 2020


On Mon, Jun 29, 2020 at 8:49 AM Zong Li <zong.li at sifive.com> wrote:
>
> Each architecture is responsible for mapping generic hardware and cache
> events to their own specific encoding of hardware events. For each
> architecture, it also have to distinguish the defination of hardware
> events of different platforms of each vendor. We use DT file to describe
> platform-specific information to make our perf implementation more
> generic and common.
>
> Signed-off-by: Zong Li <zong.li at sifive.com>
> ---
>  arch/riscv/include/asm/perf_event.h |  55 ++----
>  arch/riscv/kernel/perf_event.c      | 273 +++++++++++++---------------
>  2 files changed, 139 insertions(+), 189 deletions(-)
>
> diff --git a/arch/riscv/include/asm/perf_event.h b/arch/riscv/include/asm/perf_event.h
> index 41d515a1f331..e95d3bbaae3e 100644
> --- a/arch/riscv/include/asm/perf_event.h
> +++ b/arch/riscv/include/asm/perf_event.h
> @@ -17,6 +17,8 @@
>  #define RISCV_EVENT_COUNTERS   29
>  #define RISCV_TOTAL_COUNTERS   (RISCV_BASE_COUNTERS + RISCV_EVENT_COUNTERS)
>
> +#define RISCV_DEFAULT_WIDTH_COUNTER    64
> +
>  /*
>   * According to the spec, an implementation can support counter up to
>   * mhpmcounter31, but many high-end processors has at most 6 general
> @@ -33,9 +35,21 @@
>
>  #define RISCV_PMU_HPMCOUNTER_FIRST     3
>  #define RISCV_PMU_HPMCOUNTER_LAST                                      \
> -       (RISCV_PMU_HPMCOUNTER_FIRST + riscv_pmu->num_counters - 1)
> +       (RISCV_PMU_HPMCOUNTER_FIRST + riscv_pmu.num_event_cntr - 1)
> +
> +#define RISCV_OP_UNSUPP                        (-EOPNOTSUPP)
> +
> +#define RISCV_MAP_ALL_UNSUPPORTED                                      \
> +       [0 ... PERF_COUNT_HW_MAX - 1] = RISCV_OP_UNSUPP
>
> -#define RISCV_OP_UNSUPP                (-EOPNOTSUPP)
> +#define C(x) PERF_COUNT_HW_CACHE_##x
> +
> +#define RISCV_CACHE_MAP_ALL_UNSUPPORTED                                        \
> +[0 ... C(MAX) - 1] = {                                                 \
> +       [0 ... C(OP_MAX) - 1] = {                                       \
> +               [0 ... C(RESULT_MAX) - 1] = RISCV_OP_UNSUPP,            \
> +       },                                                              \
> +}
>
>  /* Hardware cache event encoding */
>  #define PERF_HW_CACHE_TYPE             0
> @@ -65,43 +79,6 @@
>  #define CSR_MHPMEVENT7 0x327
>  #define CSR_MHPMEVENT8 0x328
>
> -struct cpu_hw_events {
> -       /* # currently enabled events*/
> -       int                     n_events;
> -       /* currently enabled events */
> -       struct perf_event       *events[RISCV_EVENT_COUNTERS];
> -       /* bitmap of used event counters */
> -       unsigned long           used_cntr_mask;
> -       /* vendor-defined PMU data */
> -       void                    *platform;
> -};
> -
> -struct riscv_pmu {
> -       struct pmu      *pmu;
> -
> -       /* generic hw/cache events table */
> -       const int       *hw_events;
> -       const int       (*cache_events)[PERF_COUNT_HW_CACHE_MAX]
> -                                      [PERF_COUNT_HW_CACHE_OP_MAX]
> -                                      [PERF_COUNT_HW_CACHE_RESULT_MAX];
> -       /* method used to map hw/cache events */
> -       int             (*map_hw_event)(u64 config);
> -       int             (*map_cache_event)(u64 config);
> -
> -       /* max generic hw events in map */
> -       int             max_events;
> -       /* number total counters, 2(base) + x(general) */
> -       int             num_counters;
> -       /* the width of the counter */
> -       int             counter_width;
> -
> -       /* vendor-defined PMU features */
> -       void            *platform;
> -
> -       irqreturn_t     (*handle_irq)(int irq_num, void *dev);
> -       int             irq;
> -};
> -
>  #endif
>  #ifdef CONFIG_PERF_EVENTS
>  #define perf_arch_bpf_user_pt_regs(regs) (struct user_regs_struct *)regs
> diff --git a/arch/riscv/kernel/perf_event.c b/arch/riscv/kernel/perf_event.c
> index 0cfcd6f1e57b..3bdfbe4efd5c 100644
> --- a/arch/riscv/kernel/perf_event.c
> +++ b/arch/riscv/kernel/perf_event.c
> @@ -9,6 +9,7 @@
>   * Copyright (C) 2009 Google, Inc., Stephane Eranian
>   * Copyright 2014 Tilera Corporation. All Rights Reserved.
>   * Copyright (C) 2018 Andes Technology Corporation
> + * Copyright (C) 2020 SiFive
>   *
>   * Perf_events support for RISC-V platforms.
>   *
> @@ -30,113 +31,55 @@
>  #include <linux/perf_event.h>
>  #include <linux/atomic.h>
>  #include <linux/of.h>
> +#include <asm/csr.h>
>  #include <asm/perf_event.h>
>
> -static const struct riscv_pmu *riscv_pmu __read_mostly;
> +static struct riscv_pmu {
> +       struct pmu      *pmu;
> +
> +       /* number of event counters */
> +       int             num_event_cntr;
> +
> +       /* the width of base counters */
> +       int             width_base_cntr;
> +
> +       /* the width of event counters */
> +       int             width_event_cntr;
> +
> +       irqreturn_t     (*handle_irq)(int irq_num, void *dev);
> +
> +       int             irq;
> +} riscv_pmu;
> +
> +struct cpu_hw_events {
> +       /* # currently enabled events*/
> +       int                     n_events;
> +
> +       /* currently enabled events */
> +       struct perf_event       *events[RISCV_EVENT_COUNTERS];
> +
> +       /* bitmap of used event counters */
> +       unsigned long           used_cntr_mask;
> +};
> +
>  static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
>
>  /*
>   * Hardware & cache maps and their methods
>   */
>
> -static const int riscv_hw_event_map[] = {
> -       [PERF_COUNT_HW_CPU_CYCLES]              = RISCV_PMU_CYCLE,
> -       [PERF_COUNT_HW_INSTRUCTIONS]            = RISCV_PMU_INSTRET,
> -       [PERF_COUNT_HW_CACHE_REFERENCES]        = RISCV_OP_UNSUPP,
> -       [PERF_COUNT_HW_CACHE_MISSES]            = RISCV_OP_UNSUPP,
> -       [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]     = RISCV_OP_UNSUPP,
> -       [PERF_COUNT_HW_BRANCH_MISSES]           = RISCV_OP_UNSUPP,
> -       [PERF_COUNT_HW_BUS_CYCLES]              = RISCV_OP_UNSUPP,
> +static int riscv_hw_event_map[PERF_COUNT_HW_MAX] = {
> +       RISCV_MAP_ALL_UNSUPPORTED,
> +
> +       /* Specify base pmu, even if they aren't present in DT file */
> +       [PERF_COUNT_HW_CPU_CYCLES]      = RISCV_PMU_CYCLE,
> +       [PERF_COUNT_HW_INSTRUCTIONS]    = RISCV_PMU_INSTRET,
>  };
>
> -#define C(x) PERF_COUNT_HW_CACHE_##x
> -static const int riscv_cache_event_map[PERF_COUNT_HW_CACHE_MAX]
> -[PERF_COUNT_HW_CACHE_OP_MAX]
> -[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
> -       [C(L1D)] = {
> -               [C(OP_READ)] = {
> -                       [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
> -                       [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
> -               },
> -               [C(OP_WRITE)] = {
> -                       [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
> -                       [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
> -               },
> -               [C(OP_PREFETCH)] = {
> -                       [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
> -                       [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
> -               },
> -       },
> -       [C(L1I)] = {
> -               [C(OP_READ)] = {
> -                       [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
> -                       [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
> -               },
> -               [C(OP_WRITE)] = {
> -                       [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
> -                       [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
> -               },
> -               [C(OP_PREFETCH)] = {
> -                       [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
> -                       [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
> -               },
> -       },
> -       [C(LL)] = {
> -               [C(OP_READ)] = {
> -                       [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
> -                       [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
> -               },
> -               [C(OP_WRITE)] = {
> -                       [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
> -                       [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
> -               },
> -               [C(OP_PREFETCH)] = {
> -                       [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
> -                       [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
> -               },
> -       },
> -       [C(DTLB)] = {
> -               [C(OP_READ)] = {
> -                       [C(RESULT_ACCESS)] =  RISCV_OP_UNSUPP,
> -                       [C(RESULT_MISS)] =  RISCV_OP_UNSUPP,
> -               },
> -               [C(OP_WRITE)] = {
> -                       [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
> -                       [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
> -               },
> -               [C(OP_PREFETCH)] = {
> -                       [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
> -                       [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
> -               },
> -       },
> -       [C(ITLB)] = {
> -               [C(OP_READ)] = {
> -                       [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
> -                       [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
> -               },
> -               [C(OP_WRITE)] = {
> -                       [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
> -                       [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
> -               },
> -               [C(OP_PREFETCH)] = {
> -                       [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
> -                       [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
> -               },
> -       },
> -       [C(BPU)] = {
> -               [C(OP_READ)] = {
> -                       [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
> -                       [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
> -               },
> -               [C(OP_WRITE)] = {
> -                       [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
> -                       [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
> -               },
> -               [C(OP_PREFETCH)] = {
> -                       [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
> -                       [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
> -               },
> -       },
> +static int riscv_cache_event_map[PERF_COUNT_HW_CACHE_MAX]
> +                               [PERF_COUNT_HW_CACHE_OP_MAX]
> +                               [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
> +       RISCV_CACHE_MAP_ALL_UNSUPPORTED,
>  };
>
>  /*
> @@ -154,6 +97,17 @@ static inline int is_event_counter(int idx)
>                 idx <= RISCV_PMU_HPMCOUNTER_LAST);
>  }
>
> +static inline int get_counter_width(int idx)
> +{
> +       if (is_base_counter(idx))
> +               return riscv_pmu.width_base_cntr;
> +
> +       if (is_event_counter(idx))
> +               return riscv_pmu.width_event_cntr;
> +
> +       return 0;
> +}
> +
>  static inline int get_available_counter(struct perf_event *event)
>  {
>         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
> @@ -188,10 +142,14 @@ static inline int get_available_counter(struct perf_event *event)
>   */
>  static int riscv_map_hw_event(u64 config)
>  {
> -       if (config >= riscv_pmu->max_events)
> +       int ret;
> +
> +       if (config >= PERF_COUNT_HW_MAX)
>                 return -EINVAL;
>
> -       return riscv_pmu->hw_events[config];
> +       ret = riscv_hw_event_map[config];
> +
> +       return ret == RISCV_OP_UNSUPP ? -ENOENT : ret;
>  }
>
>  /*
> @@ -355,7 +313,7 @@ static void riscv_pmu_read(struct perf_event *event)
>          * delta is the value to update the counter we maintain in the kernel.
>          */
>         delta = (new_raw_count - prev_raw_count) &
> -               ((1ULL << riscv_pmu->counter_width) - 1);
> +               ((1ULL << get_counter_width(idx)) - 1);
>
>         local64_add(delta, &event->count);
>         /*
> @@ -386,7 +344,7 @@ static void riscv_pmu_stop(struct perf_event *event, int flags)
>         hwc->state |= PERF_HES_STOPPED;
>
>         if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
> -               riscv_pmu->pmu->read(event);
> +               riscv_pmu_read(event);
>                 hwc->state |= PERF_HES_UPTODATE;
>         }
>  }
> @@ -429,7 +387,7 @@ static int riscv_pmu_add(struct perf_event *event, int flags)
>         struct hw_perf_event *hwc = &event->hw;
>         int count_idx;
>
> -       if (cpuc->n_events == riscv_pmu->num_counters)
> +       if (cpuc->n_events == riscv_pmu.num_event_cntr)
>                 return -ENOSPC;
>
>         count_idx = get_available_counter(event);
> @@ -437,13 +395,13 @@ static int riscv_pmu_add(struct perf_event *event, int flags)
>                 return -ENOSPC;
>
>         cpuc->n_events++;
> +
>         hwc->idx = count_idx;
> -       cpuc->events[hwc->idx] = event;
>
>         hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
>
>         if (flags & PERF_EF_START)
> -               riscv_pmu->pmu->start(event, PERF_EF_RELOAD);
> +               riscv_pmu_start(event, PERF_EF_RELOAD);
>
>         return 0;
>  }
> @@ -459,8 +417,8 @@ static void riscv_pmu_del(struct perf_event *event, int flags)
>         cpuc->n_events--;
>         __clear_bit(hwc->idx, &cpuc->used_cntr_mask);
>
> -       cpuc->events[hwc->idx] = NULL;
> -       riscv_pmu->pmu->stop(event, PERF_EF_UPDATE);
> +       riscv_pmu_stop(event, PERF_EF_UPDATE);
> +
>         perf_event_update_userpage(event);
>  }
>
> @@ -470,7 +428,7 @@ static void riscv_pmu_del(struct perf_event *event, int flags)
>
>  static DEFINE_MUTEX(pmc_reserve_mutex);
>
> -static irqreturn_t riscv_base_pmu_handle_irq(int irq_num, void *dev)
> +static irqreturn_t riscv_pmu_handle_irq(int irq_num, void *dev)
>  {
>         return IRQ_NONE;
>  }
> @@ -480,8 +438,8 @@ static int reserve_pmc_hardware(void)
>         int err = 0;
>
>         mutex_lock(&pmc_reserve_mutex);
> -       if (riscv_pmu->irq >= 0 && riscv_pmu->handle_irq) {
> -               err = request_irq(riscv_pmu->irq, riscv_pmu->handle_irq,
> +       if (riscv_pmu.irq >= 0 && riscv_pmu.handle_irq) {
> +               err = request_irq(riscv_pmu.irq, riscv_pmu.handle_irq,
>                                   IRQF_PERCPU, "riscv-base-perf", NULL);
>         }
>         mutex_unlock(&pmc_reserve_mutex);
> @@ -492,8 +450,8 @@ static int reserve_pmc_hardware(void)
>  static void release_pmc_hardware(void)
>  {
>         mutex_lock(&pmc_reserve_mutex);
> -       if (riscv_pmu->irq >= 0)
> -               free_irq(riscv_pmu->irq, NULL);
> +       if (riscv_pmu.irq >= 0)
> +               free_irq(riscv_pmu.irq, NULL);
>         mutex_unlock(&pmc_reserve_mutex);
>  }
>
> @@ -529,10 +487,10 @@ static int riscv_event_init(struct perf_event *event)
>
>         switch (event->attr.type) {
>         case PERF_TYPE_HARDWARE:
> -               code = riscv_pmu->map_hw_event(attr->config);
> +               code = riscv_map_hw_event(attr->config);
>                 break;
>         case PERF_TYPE_HW_CACHE:
> -               code = riscv_pmu->map_cache_event(attr->config);
> +               code = riscv_map_cache_event(attr->config);
>                 break;
>         case PERF_TYPE_RAW:
>                 code = attr->config;
> @@ -555,9 +513,6 @@ static int riscv_event_init(struct perf_event *event)
>         /*
>          * idx is set to -1 because the index of a general event should not be
>          * decided until binding to some counter in pmu->add().
> -        *
> -        * But since we don't have such support, later in pmu->add(), we just
> -        * use hwc->config as the index instead.
>          */
>         hwc->config_base = config_base;
>         hwc->config = code;
> @@ -570,52 +525,70 @@ static int riscv_event_init(struct perf_event *event)
>   * Initialization
>   */
>
> -static struct pmu min_pmu = {
> -       .name           = "riscv-base",
> -       .event_init     = riscv_event_init,
> -       .add            = riscv_pmu_add,
> -       .del            = riscv_pmu_del,
> -       .start          = riscv_pmu_start,
> -       .stop           = riscv_pmu_stop,
> -       .read           = riscv_pmu_read,
> -};
> +static struct riscv_pmu riscv_pmu = {
> +       .pmu = &(struct pmu) {
> +               .name           = "riscv-pmu",
> +               .event_init     = riscv_event_init,
> +               .add            = riscv_pmu_add,
> +               .del            = riscv_pmu_del,
> +               .start          = riscv_pmu_start,
> +               .stop           = riscv_pmu_stop,
> +               .read           = riscv_pmu_read,
> +       },
>
> -static const struct riscv_pmu riscv_base_pmu = {
> -       .pmu = &min_pmu,
> -       .max_events = ARRAY_SIZE(riscv_hw_event_map),
> -       .map_hw_event = riscv_map_hw_event,
> -       .hw_events = riscv_hw_event_map,
> -       .map_cache_event = riscv_map_cache_event,
> -       .cache_events = &riscv_cache_event_map,
> -       .counter_width = 63,
> -       .num_counters = RISCV_BASE_COUNTERS + 0,
> -       .handle_irq = &riscv_base_pmu_handle_irq,
> +       .num_event_cntr = 0,
> +       .width_event_cntr = RISCV_DEFAULT_WIDTH_COUNTER,
> +       .width_base_cntr = RISCV_DEFAULT_WIDTH_COUNTER,
>
> +       .handle_irq = &riscv_pmu_handle_irq,
>         /* This means this PMU has no IRQ. */
>         .irq = -1,
>  };
>
> -static const struct of_device_id riscv_pmu_of_ids[] = {
> -       {.compatible = "riscv,base-pmu",        .data = &riscv_base_pmu},
> -       { /* sentinel value */ }
> -};
> +static int __init init_riscv_pmu(struct device_node *node)
> +{
> +       int num_events, key, value, i;
> +
> +       of_property_read_u32(node, "riscv,width-base-cntr", &riscv_pmu.width_base_cntr);
> +
> +       of_property_read_u32(node, "riscv,width-event-cntr", &riscv_pmu.width_event_cntr);
> +
> +       of_property_read_u32(node, "riscv,n-event-cntr", &riscv_pmu.num_event_cntr);
> +       if (riscv_pmu.num_event_cntr > RISCV_EVENT_COUNTERS)
> +               riscv_pmu.num_event_cntr = RISCV_EVENT_COUNTERS;
> +
> +       num_events = of_property_count_u32_elems(node, "riscv,hw-event-map");
> +       if (num_events > 0 && num_events % 2 == 0)
> +               for (i = 0; i < num_events;) {
> +                       of_property_read_u32_index(node, "riscv,hw-event-map", i++, &key);
> +                       of_property_read_u32_index(node, "riscv,hw-event-map", i++, &value);
> +                       riscv_hw_event_map[key] = value;
> +               }
> +
> +       num_events = of_property_count_u32_elems(node, "riscv,hw-cache-event-map");
> +       if (num_events > 0 && num_events % 2 == 0)
> +               for (i = 0; i < num_events;) {
> +                       of_property_read_u32_index(node, "riscv,hw-cache-event-map", i++, &key);
> +                       of_property_read_u32_index(node, "riscv,hw-cache-event-map", i++, &value);
> +                       riscv_cache_event_map
> +                               [(key >> PERF_HW_CACHE_TYPE) & PERF_HW_CACHE_MASK]
> +                               [(key >> PERF_HW_CACHE_OP) & PERF_HW_CACHE_MASK]
> +                               [(key >> PERF_HW_CACHE_RESULT) & PERF_HW_CACHE_MASK] = value;
> +               }
> +
> +       return 0;
> +}
>
>  static int __init init_hw_perf_events(void)
>  {
> -       struct device_node *node = of_find_node_by_type(NULL, "pmu");
> -       const struct of_device_id *of_id;
> +       struct device_node *node = of_find_compatible_node(NULL, NULL, "riscv,pmu");
>
> -       riscv_pmu = &riscv_base_pmu;
> +       if (node)
> +               init_riscv_pmu(node);
>
> -       if (node) {
> -               of_id = of_match_node(riscv_pmu_of_ids, node);
> -
> -               if (of_id)
> -                       riscv_pmu = of_id->data;
> -               of_node_put(node);
> -       }
> +       /* Even if there is no pmu node in DT, we reach here for base PMU. */
> +       perf_pmu_register(riscv_pmu.pmu, "cpu", PERF_TYPE_RAW);
>
> -       perf_pmu_register(riscv_pmu->pmu, "cpu", PERF_TYPE_RAW);
>         return 0;
>  }
>  arch_initcall(init_hw_perf_events);

Why does this have to arch_initcall() ??
Why can't we just probe this like a regular DT based driver ??

This driver needs total re-write because it has to be a platform driver.
Even the names of counters are not registered. The implementation
specific counter names should come from DT property.

Regards,
Anup

> --
> 2.27.0
>



More information about the linux-riscv mailing list