[RFC PATCH 4/6] riscv: perf: Add raw event support

Anup Patel anup at brainfault.org
Mon Jun 29 00:17:22 EDT 2020


On Mon, Jun 29, 2020 at 8:49 AM Zong Li <zong.li at sifive.com> wrote:
>
> Add support for raw events and hardware cache events. Currently, we set
> the events by writing the mhpmeventN CSRs, it would raise an illegal
> instruction exception and trap into m-mode to emulate event selector
> CSRs access. It doesn't make sense because we shouldn't write the
> m-mode CSRs in s-mode, it would be better that set events through SBI
> call or the shadow CSRs of s-mode. We would change it later.
>
> Signed-off-by: Zong Li <zong.li at sifive.com>
> ---
>  arch/riscv/include/asm/perf_event.h |  65 ++++++---
>  arch/riscv/kernel/perf_event.c      | 204 +++++++++++++++++++++++-----
>  2 files changed, 215 insertions(+), 54 deletions(-)
>
> diff --git a/arch/riscv/include/asm/perf_event.h b/arch/riscv/include/asm/perf_event.h
> index 062efd3a1d5d..41d515a1f331 100644
> --- a/arch/riscv/include/asm/perf_event.h
> +++ b/arch/riscv/include/asm/perf_event.h
> @@ -14,39 +14,64 @@
>
>  #ifdef CONFIG_RISCV_BASE_PMU
>  #define RISCV_BASE_COUNTERS    2
> +#define RISCV_EVENT_COUNTERS   29

Same comment as DT documentation related to naming.

Regards,
Anup


> +#define RISCV_TOTAL_COUNTERS   (RISCV_BASE_COUNTERS + RISCV_EVENT_COUNTERS)
>
>  /*
> - * The RISCV_MAX_COUNTERS parameter should be specified.
> - */
> -
> -#define RISCV_MAX_COUNTERS     2
> -
> -/*
> - * These are the indexes of bits in counteren register *minus* 1,
> - * except for cycle.  It would be coherent if it can directly mapped
> - * to counteren bit definition, but there is a *time* register at
> - * counteren[1].  Per-cpu structure is scarce resource here.
> - *
>   * According to the spec, an implementation can support counter up to
>   * mhpmcounter31, but many high-end processors has at most 6 general
>   * PMCs, we give the definition to MHPMCOUNTER8 here.
>   */
> -#define RISCV_PMU_CYCLE                0
> -#define RISCV_PMU_INSTRET      1
> -#define RISCV_PMU_MHPMCOUNTER3 2
> -#define RISCV_PMU_MHPMCOUNTER4 3
> -#define RISCV_PMU_MHPMCOUNTER5 4
> -#define RISCV_PMU_MHPMCOUNTER6 5
> -#define RISCV_PMU_MHPMCOUNTER7 6
> -#define RISCV_PMU_MHPMCOUNTER8 7
> +#define RISCV_PMU_CYCLE                        0
> +#define RISCV_PMU_INSTRET              2
> +#define RISCV_PMU_HPMCOUNTER3          3
> +#define RISCV_PMU_HPMCOUNTER4          4
> +#define RISCV_PMU_HPMCOUNTER5          5
> +#define RISCV_PMU_HPMCOUNTER6          6
> +#define RISCV_PMU_HPMCOUNTER7          7
> +#define RISCV_PMU_HPMCOUNTER8          8
> +
> +#define RISCV_PMU_HPMCOUNTER_FIRST     3
> +#define RISCV_PMU_HPMCOUNTER_LAST                                      \
> +       (RISCV_PMU_HPMCOUNTER_FIRST + riscv_pmu->num_counters - 1)
>
>  #define RISCV_OP_UNSUPP                (-EOPNOTSUPP)
>
> +/* Hardware cache event encoding */
> +#define PERF_HW_CACHE_TYPE             0
> +#define PERF_HW_CACHE_OP               8
> +#define PERF_HW_CACHE_RESULT           16
> +#define PERF_HW_CACHE_MASK             0xff
> +
> +/* config_base encoding */
> +#define RISCV_PMU_TYPE_MASK            0x3
> +#define RISCV_PMU_TYPE_BASE            0x1
> +#define RISCV_PMU_TYPE_EVENT           0x2
> +#define RISCV_PMU_EXCLUDE_MASK         0xc
> +#define RISCV_PMU_EXCLUDE_USER         0x3
> +#define RISCV_PMU_EXCLUDE_KERNEL       0x4
> +
> +/*
> + * Currently, machine-mode supports emulation of mhpmeventN. Setting mhpmeventN
> + * to raise an illegal instruction exception to set event types in machine-mode.
> + * Eventually, we should set event types through standard SBI call or the shadow
> + * CSRs of supervisor-mode, because it is weird for writing CSR of machine-mode
> + * explicitly in supervisor-mode. These macro should be removed in the future.
> + */
> +#define CSR_MHPMEVENT3 0x323
> +#define CSR_MHPMEVENT4 0x324
> +#define CSR_MHPMEVENT5 0x325
> +#define CSR_MHPMEVENT6 0x326
> +#define CSR_MHPMEVENT7 0x327
> +#define CSR_MHPMEVENT8 0x328
> +
>  struct cpu_hw_events {
>         /* # currently enabled events*/
>         int                     n_events;
>         /* currently enabled events */
> -       struct perf_event       *events[RISCV_MAX_COUNTERS];
> +       struct perf_event       *events[RISCV_EVENT_COUNTERS];
> +       /* bitmap of used event counters */
> +       unsigned long           used_cntr_mask;
>         /* vendor-defined PMU data */
>         void                    *platform;
>  };
> diff --git a/arch/riscv/kernel/perf_event.c b/arch/riscv/kernel/perf_event.c
> index c835f0362d94..0cfcd6f1e57b 100644
> --- a/arch/riscv/kernel/perf_event.c
> +++ b/arch/riscv/kernel/perf_event.c
> @@ -139,6 +139,53 @@ static const int riscv_cache_event_map[PERF_COUNT_HW_CACHE_MAX]
>         },
>  };
>
> +/*
> + * Methods for checking and getting PMU information
> + */
> +
> +static inline int is_base_counter(int idx)
> +{
> +       return (idx == RISCV_PMU_CYCLE || idx == RISCV_PMU_INSTRET);
> +}
> +
> +static inline int is_event_counter(int idx)
> +{
> +       return (idx >= RISCV_PMU_HPMCOUNTER_FIRST &&
> +               idx <= RISCV_PMU_HPMCOUNTER_LAST);
> +}
> +
> +static inline int get_available_counter(struct perf_event *event)
> +{
> +       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
> +       struct hw_perf_event *hwc = &event->hw;
> +       unsigned long config_base = hwc->config_base & RISCV_PMU_TYPE_MASK;
> +       unsigned long mask;
> +       int ret;
> +
> +       switch (config_base) {
> +       case RISCV_PMU_TYPE_BASE:
> +               ret = hwc->config;
> +               if (WARN_ON_ONCE(!is_base_counter(ret)))
> +                       return -ENOSPC;
> +               break;
> +       case RISCV_PMU_TYPE_EVENT:
> +               mask = ~cpuc->used_cntr_mask;
> +               ret = find_next_bit(&mask, RISCV_PMU_HPMCOUNTER_LAST, 3);
> +               if (WARN_ON_ONCE(!is_event_counter(ret)))
> +                       return -ENOSPC;
> +               break;
> +       default:
> +               return -ENOENT;
> +       }
> +
> +       __set_bit(ret, &cpuc->used_cntr_mask);
> +
> +       return ret;
> +}
> +
> +/*
> + * Map generic hardware event
> + */
>  static int riscv_map_hw_event(u64 config)
>  {
>         if (config >= riscv_pmu->max_events)
> @@ -147,32 +194,28 @@ static int riscv_map_hw_event(u64 config)
>         return riscv_pmu->hw_events[config];
>  }
>
> -static int riscv_map_cache_decode(u64 config, unsigned int *type,
> -                          unsigned int *op, unsigned int *result)
> -{
> -       return -ENOENT;
> -}
> -
> +/*
> + * Map generic hardware cache event
> + */
>  static int riscv_map_cache_event(u64 config)
>  {
>         unsigned int type, op, result;
> -       int err = -ENOENT;
> -               int code;
> +       int ret;
>
> -       err = riscv_map_cache_decode(config, &type, &op, &result);
> -       if (!riscv_pmu->cache_events || err)
> -               return err;
> +       type    = (config >> PERF_HW_CACHE_TYPE) & PERF_HW_CACHE_MASK;
> +       op      = (config >> PERF_HW_CACHE_OP) & PERF_HW_CACHE_MASK;
> +       result  = (config >> PERF_HW_CACHE_RESULT) & PERF_HW_CACHE_MASK;
>
>         if (type >= PERF_COUNT_HW_CACHE_MAX ||
>             op >= PERF_COUNT_HW_CACHE_OP_MAX ||
>             result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
>                 return -EINVAL;
>
> -       code = (*riscv_pmu->cache_events)[type][op][result];
> -       if (code == RISCV_OP_UNSUPP)
> +       ret = riscv_cache_event_map[type][op][result];
> +       if (ret == RISCV_OP_UNSUPP)
>                 return -EINVAL;
>
> -       return code;
> +       return ret == RISCV_OP_UNSUPP ? -ENOENT : ret;
>  }
>
>  /*
> @@ -190,8 +233,27 @@ static inline u64 read_counter(int idx)
>         case RISCV_PMU_INSTRET:
>                 val = csr_read(CSR_INSTRET);
>                 break;
> +       case RISCV_PMU_HPMCOUNTER3:
> +               val = csr_read(CSR_HPMCOUNTER3);
> +               break;
> +       case RISCV_PMU_HPMCOUNTER4:
> +               val = csr_read(CSR_HPMCOUNTER4);
> +               break;
> +       case RISCV_PMU_HPMCOUNTER5:
> +               val = csr_read(CSR_HPMCOUNTER5);
> +               break;
> +       case RISCV_PMU_HPMCOUNTER6:
> +               val = csr_read(CSR_HPMCOUNTER6);
> +               break;
> +       case RISCV_PMU_HPMCOUNTER7:
> +               val = csr_read(CSR_HPMCOUNTER7);
> +               break;
> +       case RISCV_PMU_HPMCOUNTER8:
> +               val = csr_read(CSR_HPMCOUNTER8);

This is broken for RV32 because for RV32 we have to read two
CSRs to get a counter value.

Also, for correctly reading a 64bit counter on RV32 we have
to read just like get_cycles64() does for RV32.

static inline u64 get_cycles64(void)
{
    u32 hi, lo;

    do {
        hi = get_cycles_hi();
        lo = get_cycles();
    } while (hi != get_cycles_hi());

    return ((u64)hi << 32) | lo;
}

Regards,
Anup


> +               break;
>         default:
> -               WARN_ON_ONCE(idx < 0 || idx > RISCV_MAX_COUNTERS);
> +               WARN_ON_ONCE(idx < RISCV_PMU_CYCLE ||
> +                            idx > RISCV_TOTAL_COUNTERS);
>                 return -EINVAL;
>         }
>
> @@ -204,6 +266,68 @@ static inline void write_counter(int idx, u64 value)
>         WARN_ON_ONCE(1);
>  }
>
> +static inline void write_event(int idx, u64 value)
> +{
> +       /* TODO: We shouldn't write CSR of m-mode explicitly here. Ideally,
> +        * it need to set the event selector by SBI call or the s-mode
> +        * shadow CSRs of them. Exploit illegal instruction exception to
> +        * emulate mhpmcounterN access in m-mode.
> +        */
> +       switch (idx) {
> +       case RISCV_PMU_HPMCOUNTER3:
> +               csr_write(CSR_MHPMEVENT3, value);
> +               break;
> +       case RISCV_PMU_HPMCOUNTER4:
> +               csr_write(CSR_MHPMEVENT4, value);
> +               break;
> +       case RISCV_PMU_HPMCOUNTER5:
> +               csr_write(CSR_MHPMEVENT5, value);
> +               break;
> +       case RISCV_PMU_HPMCOUNTER6:
> +               csr_write(CSR_MHPMEVENT6, value);
> +               break;
> +       case RISCV_PMU_HPMCOUNTER7:
> +               csr_write(CSR_MHPMEVENT7, value);
> +               break;
> +       case RISCV_PMU_HPMCOUNTER8:
> +               csr_write(CSR_MHPMEVENT8, value);
> +               break;
> +       default:
> +               WARN_ON_ONCE(idx < RISCV_PMU_HPMCOUNTER3 ||
> +                            idx > RISCV_TOTAL_COUNTERS);
> +               return;
> +       }
> +}
> +
> +/*
> + * Enable and disable event counters
> + */
> +
> +static inline void riscv_pmu_enable_event(struct perf_event *event)
> +{
> +       struct hw_perf_event *hwc = &event->hw;
> +       int idx = hwc->idx;
> +
> +       if (is_event_counter(idx))
> +               write_event(idx, hwc->config);
> +
> +       /*
> +        * Since we cannot write to counters, this serves as an initialization
> +        * to the delta-mechanism in pmu->read(); otherwise, the delta would be
> +        * wrong when pmu->read is called for the first time.
> +        */
> +       local64_set(&hwc->prev_count, read_counter(hwc->idx));
> +}
> +
> +static inline void riscv_pmu_disable_event(struct perf_event *event)
> +{
> +       struct hw_perf_event *hwc = &event->hw;
> +       int idx = hwc->idx;
> +
> +       if (is_event_counter(idx))
> +               write_event(idx, 0);
> +}
> +
>  /*
>   * pmu->read: read and update the counter
>   *
> @@ -232,6 +356,7 @@ static void riscv_pmu_read(struct perf_event *event)
>          */
>         delta = (new_raw_count - prev_raw_count) &
>                 ((1ULL << riscv_pmu->counter_width) - 1);
> +
>         local64_add(delta, &event->count);
>         /*
>          * Something like local64_sub(delta, &hwc->period_left) here is
> @@ -252,6 +377,11 @@ static void riscv_pmu_stop(struct perf_event *event, int flags)
>  {
>         struct hw_perf_event *hwc = &event->hw;
>
> +       if (WARN_ON_ONCE(hwc->idx == -1))
> +               return;
> +
> +       riscv_pmu_disable_event(event);
> +
>         WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
>         hwc->state |= PERF_HES_STOPPED;
>
> @@ -271,6 +401,9 @@ static void riscv_pmu_start(struct perf_event *event, int flags)
>         if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
>                 return;
>
> +       if (WARN_ON_ONCE(hwc->idx == -1))
> +               return;
> +
>         if (flags & PERF_EF_RELOAD) {
>                 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
>
> @@ -281,14 +414,10 @@ static void riscv_pmu_start(struct perf_event *event, int flags)
>         }
>
>         hwc->state = 0;
> -       perf_event_update_userpage(event);
>
> -       /*
> -        * Since we cannot write to counters, this serves as an initialization
> -        * to the delta-mechanism in pmu->read(); otherwise, the delta would be
> -        * wrong when pmu->read is called for the first time.
> -        */
> -       local64_set(&hwc->prev_count, read_counter(hwc->idx));
> +       riscv_pmu_enable_event(event);
> +
> +       perf_event_update_userpage(event);
>  }
>
>  /*
> @@ -298,21 +427,18 @@ static int riscv_pmu_add(struct perf_event *event, int flags)
>  {
>         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
>         struct hw_perf_event *hwc = &event->hw;
> +       int count_idx;
>
>         if (cpuc->n_events == riscv_pmu->num_counters)
>                 return -ENOSPC;
>
> -       /*
> -        * We don't have general conunters, so no binding-event-to-counter
> -        * process here.
> -        *
> -        * Indexing using hwc->config generally not works, since config may
> -        * contain extra information, but here the only info we have in
> -        * hwc->config is the event index.
> -        */
> -       hwc->idx = hwc->config;
> -       cpuc->events[hwc->idx] = event;
> +       count_idx = get_available_counter(event);
> +       if (count_idx < 0)
> +               return -ENOSPC;
> +
>         cpuc->n_events++;
> +       hwc->idx = count_idx;
> +       cpuc->events[hwc->idx] = event;
>
>         hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
>
> @@ -330,8 +456,10 @@ static void riscv_pmu_del(struct perf_event *event, int flags)
>         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
>         struct hw_perf_event *hwc = &event->hw;
>
> -       cpuc->events[hwc->idx] = NULL;
>         cpuc->n_events--;
> +       __clear_bit(hwc->idx, &cpuc->used_cntr_mask);
> +
> +       cpuc->events[hwc->idx] = NULL;
>         riscv_pmu->pmu->stop(event, PERF_EF_UPDATE);
>         perf_event_update_userpage(event);
>  }
> @@ -385,6 +513,7 @@ static int riscv_event_init(struct perf_event *event)
>  {
>         struct perf_event_attr *attr = &event->attr;
>         struct hw_perf_event *hwc = &event->hw;
> +       unsigned long config_base = 0;
>         int err;
>         int code;
>
> @@ -406,11 +535,17 @@ static int riscv_event_init(struct perf_event *event)
>                 code = riscv_pmu->map_cache_event(attr->config);
>                 break;
>         case PERF_TYPE_RAW:
> -               return -EOPNOTSUPP;
> +               code = attr->config;
> +               break;
>         default:
>                 return -ENOENT;
>         }
>
> +       if (is_base_counter(code))
> +               config_base |= RISCV_PMU_TYPE_BASE;
> +       else
> +               config_base |= RISCV_PMU_TYPE_EVENT;
> +
>         event->destroy = riscv_event_destroy;
>         if (code < 0) {
>                 event->destroy(event);
> @@ -424,6 +559,7 @@ static int riscv_event_init(struct perf_event *event)
>          * But since we don't have such support, later in pmu->add(), we just
>          * use hwc->config as the index instead.
>          */
> +       hwc->config_base = config_base;
>         hwc->config = code;
>         hwc->idx = -1;
>
> --
> 2.27.0
>



More information about the linux-riscv mailing list