[PATCH 3/7] arm-cci: Abstract out the PMU counter details
Punit Agrawal
punit.agrawal at arm.com
Mon May 18 04:54:29 PDT 2015
"Suzuki K. Poulose" <suzuki.poulose at arm.com> writes:
> From: "Suzuki K. Poulose" <suzuki.poulose at arm.com>
>
> Adds the PMU model specific counters to the PMU model
> abstraction to make it easier to add a new PMU.
>
> The patch cleans up the naming convention used all over
> the code.
> e.g, CCI_PMU_MAX_HW_EVENTS => maximum number of events that
> can be counted at any time, which is in fact the maximum
> number of counters available.
>
> Change all such namings to use 'counters' instead of events.
>
> This patch also abstracts the following:
>
> 1) Size of a PMU event counter area.
> 2) Maximum number of programmable counters supported by the PMU model
> 3) Number of counters which counts fixed events (e.g, cycle
> counter on CCI-400).
>
> Also changes some of the static allocation of the data
> structures to dynamic, to accommodate the number of events
> supported by a PMU.
>
> Gets rid ofthe CCI_PMU_* defines for the model. All such
> data should be accessed via the model abstraction.
>
> Limits the number of counters to the maximum supported
> by the 'model'.
>
> Cc: Punit Agrawal <punit.agrawal at arm.com>
> Cc: Mark Rutland <mark.rutland at arm.com>
> Cc: Will Deacon <will.deacon at arm.com>
> Signed-off-by: Suzuki K. Poulose <suzuki.poulose at arm.com>
Acked-by: Punit Agrawal <punit.agrawal at arm.com>
> ---
> drivers/bus/arm-cci.c | 123 +++++++++++++++++++++++++++++++++++++------------
> 1 file changed, 93 insertions(+), 30 deletions(-)
>
> diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c
> index 27cc200..82d5681 100644
> --- a/drivers/bus/arm-cci.c
> +++ b/drivers/bus/arm-cci.c
> @@ -77,15 +77,17 @@ static const struct of_device_id arm_cci_matches[] = {
>
> #define CCI_PMU_OVRFLW_FLAG 1
>
> -#define CCI_PMU_CNTR_BASE(idx) ((idx) * SZ_4K)
> -
> -#define CCI_PMU_CNTR_MASK ((1ULL << 32) -1)
> +#define CCI_PMU_CNTR_SIZE(model) ((model)->cntr_size)
> +#define CCI_PMU_CNTR_BASE(model, idx) ((idx) * CCI_PMU_CNTR_SIZE(model))
> +#define CCI_PMU_CNTR_MASK ((1ULL << 32) -1)
> +#define CCI_PMU_CNTR_LAST(cci_pmu) (cci_pmu->num_cntrs - 1)
>
> #define CCI_PMU_EVENT_MASK 0xffUL
> #define CCI_PMU_EVENT_SOURCE(event) ((event >> 5) & 0x7)
> #define CCI_PMU_EVENT_CODE(event) (event & 0x1f)
>
> -#define CCI_PMU_MAX_HW_EVENTS 5 /* CCI PMU has 4 counters + 1 cycle counter */
> +#define CCI_PMU_MAX_HW_CNTRS(model) \
> + ((model)->num_hw_cntrs + (model)->fixed_hw_cntrs)
>
> /* Types of interfaces that can generate events */
> enum {
> @@ -100,13 +102,22 @@ struct event_range {
> };
>
> struct cci_pmu_hw_events {
> - struct perf_event *events[CCI_PMU_MAX_HW_EVENTS];
> - unsigned long used_mask[BITS_TO_LONGS(CCI_PMU_MAX_HW_EVENTS)];
> + struct perf_event **events;
> + unsigned long *used_mask;
> raw_spinlock_t pmu_lock;
> };
>
> +/*
> + * struct cci_pmu_model:
> + * @fixed_hw_cntrs - Number of fixed event counters
> + * @num_hw_cntrs - Maximum number of programmable event counters
> + * @cntr_size - Size of an event counter mapping
> + */
> struct cci_pmu_model {
> char *name;
> + u32 fixed_hw_cntrs;
> + u32 num_hw_cntrs;
> + u32 cntr_size;
> struct event_range event_ranges[CCI_IF_MAX];
> };
>
> @@ -116,12 +127,12 @@ struct cci_pmu {
> void __iomem *base;
> struct pmu pmu;
> int nr_irqs;
> - int irqs[CCI_PMU_MAX_HW_EVENTS];
> + int *irqs;
> unsigned long active_irqs;
> const struct cci_pmu_model *model;
> struct cci_pmu_hw_events hw_events;
> struct platform_device *plat_device;
> - int num_events;
> + int num_cntrs;
> atomic_t active_events;
> struct mutex reserve_mutex;
> struct notifier_block cpu_nb;
> @@ -155,7 +166,6 @@ enum cci400_perf_events {
>
> #define CCI_PMU_CYCLE_CNTR_IDX 0
> #define CCI_PMU_CNTR0_IDX 1
> -#define CCI_PMU_CNTR_LAST(cci_pmu) (CCI_PMU_CYCLE_CNTR_IDX + cci_pmu->num_events - 1)
>
> /*
> * CCI PMU event id is an 8-bit value made of two parts - bits 7:5 for one of 8
> @@ -235,20 +245,20 @@ static const struct cci_pmu_model *probe_cci_model(struct platform_device *pdev)
>
> static int pmu_is_valid_counter(struct cci_pmu *cci_pmu, int idx)
> {
> - return CCI_PMU_CYCLE_CNTR_IDX <= idx &&
> - idx <= CCI_PMU_CNTR_LAST(cci_pmu);
> + return 0 <= idx && idx <= CCI_PMU_CNTR_LAST(cci_pmu);
> }
>
> static u32 pmu_read_register(struct cci_pmu *cci_pmu, int idx, unsigned int offset)
> {
> - return readl_relaxed(cci_pmu->base + CCI_PMU_CNTR_BASE(idx) + offset);
> + return readl_relaxed(cci_pmu->base +
> + CCI_PMU_CNTR_BASE(cci_pmu->model, idx) + offset);
> }
>
> static void pmu_write_register(struct cci_pmu *cci_pmu, u32 value,
> int idx, unsigned int offset)
> {
> return writel_relaxed(value, cci_pmu->base +
> - CCI_PMU_CNTR_BASE(idx) + offset);
> + CCI_PMU_CNTR_BASE(cci_pmu->model, idx) + offset);
> }
>
> static void pmu_disable_counter(struct cci_pmu *cci_pmu, int idx)
> @@ -266,13 +276,14 @@ static void pmu_set_event(struct cci_pmu *cci_pmu, int idx, unsigned long event)
> pmu_write_register(cci_pmu, event, idx, CCI_PMU_EVT_SEL);
> }
>
> +/*
> + * Returns the number of programmable counters actually implemented
> + * by the cci
> + */
> static u32 pmu_get_max_counters(void)
> {
> - u32 n_cnts = (readl_relaxed(cci_ctrl_base + CCI_PMCR) &
> - CCI_PMCR_NCNT_MASK) >> CCI_PMCR_NCNT_SHIFT;
> -
> - /* add 1 for cycle counter */
> - return n_cnts + 1;
> + return (readl_relaxed(cci_ctrl_base + CCI_PMCR) &
> + CCI_PMCR_NCNT_MASK) >> CCI_PMCR_NCNT_SHIFT;
> }
>
> static int pmu_get_event_idx(struct cci_pmu_hw_events *hw, struct perf_event *event)
> @@ -496,7 +507,7 @@ static void cci_pmu_enable(struct pmu *pmu)
> {
> struct cci_pmu *cci_pmu = to_cci_pmu(pmu);
> struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events;
> - int enabled = bitmap_weight(hw_events->used_mask, cci_pmu->num_events);
> + int enabled = bitmap_weight(hw_events->used_mask, cci_pmu->num_cntrs);
> unsigned long flags;
> u32 val;
>
> @@ -659,13 +670,16 @@ static int
> validate_group(struct perf_event *event)
> {
> struct perf_event *sibling, *leader = event->group_leader;
> + struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
> + unsigned long mask[BITS_TO_LONGS(cci_pmu->num_cntrs)];
> struct cci_pmu_hw_events fake_pmu = {
> /*
> * Initialise the fake PMU. We only need to populate the
> * used_mask for the purposes of validation.
> */
> - .used_mask = { 0 },
> + .used_mask = mask,
> };
> + memset(mask, 0, BITS_TO_LONGS(cci_pmu->num_cntrs) * sizeof(unsigned long));
>
> if (!validate_event(event->pmu, &fake_pmu, leader))
> return -EINVAL;
> @@ -819,6 +833,7 @@ static const struct attribute_group *pmu_attr_groups[] = {
> static int cci_pmu_init(struct cci_pmu *cci_pmu, struct platform_device *pdev)
> {
> char *name = cci_pmu->model->name;
> + u32 num_cntrs;
>
> pmu_cpumask_attr.var = cci_pmu;
> cci_pmu->pmu = (struct pmu) {
> @@ -836,7 +851,15 @@ static int cci_pmu_init(struct cci_pmu *cci_pmu, struct platform_device *pdev)
> };
>
> cci_pmu->plat_device = pdev;
> - cci_pmu->num_events = pmu_get_max_counters();
> + num_cntrs = pmu_get_max_counters();
> + if (num_cntrs > cci_pmu->model->num_hw_cntrs) {
> + dev_warn(&pdev->dev,
> + "PMU implements more counters(%d) than supported by"
> + " the model(%d), truncated.",
> + num_cntrs, cci_pmu->model->num_hw_cntrs);
> + num_cntrs = cci_pmu->model->num_hw_cntrs;
> + }
> + cci_pmu->num_cntrs = num_cntrs + cci_pmu->model->fixed_hw_cntrs;
>
> return perf_pmu_register(&cci_pmu->pmu, name, -1);
> }
> @@ -871,6 +894,9 @@ static int cci_pmu_cpu_notifier(struct notifier_block *self,
> static struct cci_pmu_model cci_pmu_models[] = {
> [CCI_REV_R0] = {
> .name = "CCI_400",
> + .fixed_hw_cntrs = 1, /* Cycle counter */
> + .num_hw_cntrs = 4,
> + .cntr_size = SZ_4K,
> .event_ranges = {
> [CCI_IF_SLAVE] = {
> CCI_REV_R0_SLAVE_PORT_MIN_EV,
> @@ -884,6 +910,9 @@ static struct cci_pmu_model cci_pmu_models[] = {
> },
> [CCI_REV_R1] = {
> .name = "CCI_400_r1",
> + .fixed_hw_cntrs = 1, /* Cycle counter */
> + .num_hw_cntrs = 4,
> + .cntr_size = SZ_4K,
> .event_ranges = {
> [CCI_IF_SLAVE] = {
> CCI_REV_R1_SLAVE_PORT_MIN_EV,
> @@ -938,35 +967,69 @@ static bool is_duplicate_irq(int irq, int *irqs, int nr_irqs)
> return false;
> }
>
> -static int cci_pmu_probe(struct platform_device *pdev)
> +static struct cci_pmu *cci_pmu_alloc(struct platform_device *pdev)
> {
> - struct resource *res;
> struct cci_pmu *cci_pmu;
> - int i, ret, irq;
> const struct cci_pmu_model *model;
>
> + /*
> + * All allocations are devm_* hence we don't have to free
> + * them explicitly on an error, as it would end up in driver
> + * detach.
> + */
> model = get_cci_model(pdev);
> if (!model) {
> dev_warn(&pdev->dev, "CCI PMU version not supported\n");
> - return -ENODEV;
> + return ERR_PTR(-ENODEV);
> }
>
> cci_pmu = devm_kzalloc(&pdev->dev, sizeof(*cci_pmu), GFP_KERNEL);
> if (!cci_pmu)
> - return -ENOMEM;
> + return ERR_PTR(-ENOMEM);
>
> cci_pmu->model = model;
> + cci_pmu->irqs = devm_kcalloc(&pdev->dev, CCI_PMU_MAX_HW_CNTRS(model),
> + sizeof(*cci_pmu->irqs), GFP_KERNEL);
> + if (!cci_pmu->irqs)
> + return ERR_PTR(-ENOMEM);
> + cci_pmu->hw_events.events = devm_kcalloc(&pdev->dev,
> + CCI_PMU_MAX_HW_CNTRS(model),
> + sizeof(*cci_pmu->hw_events.events),
> + GFP_KERNEL);
> + if (!cci_pmu->hw_events.events)
> + return ERR_PTR(-ENOMEM);
> + cci_pmu->hw_events.used_mask = devm_kcalloc(&pdev->dev,
> + BITS_TO_LONGS(CCI_PMU_MAX_HW_CNTRS(model)),
> + sizeof(*cci_pmu->hw_events.used_mask),
> + GFP_KERNEL);
> + if (!cci_pmu->hw_events.used_mask)
> + return ERR_PTR(-ENOMEM);
> +
> + return cci_pmu;
> +}
> +
> +
> +static int cci_pmu_probe(struct platform_device *pdev)
> +{
> + struct resource *res;
> + struct cci_pmu *cci_pmu;
> + int i, ret, irq;
> +
> + cci_pmu = cci_pmu_alloc(pdev);
> + if (IS_ERR(cci_pmu))
> + return PTR_ERR(cci_pmu);
> +
> res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> cci_pmu->base = devm_ioremap_resource(&pdev->dev, res);
> if (IS_ERR(cci_pmu->base))
> return -ENOMEM;
>
> /*
> - * CCI PMU has 5 overflow signals - one per counter; but some may be tied
> + * CCI PMU has one overflow interrupt per counter; but some may be tied
> * together to a common interrupt.
> */
> cci_pmu->nr_irqs = 0;
> - for (i = 0; i < CCI_PMU_MAX_HW_EVENTS; i++) {
> + for (i = 0; i < CCI_PMU_MAX_HW_CNTRS(cci_pmu->model); i++) {
> irq = platform_get_irq(pdev, i);
> if (irq < 0)
> break;
> @@ -981,9 +1044,9 @@ static int cci_pmu_probe(struct platform_device *pdev)
> * Ensure that the device tree has as many interrupts as the number
> * of counters.
> */
> - if (i < CCI_PMU_MAX_HW_EVENTS) {
> + if (i < CCI_PMU_MAX_HW_CNTRS(cci_pmu->model)) {
> dev_warn(&pdev->dev, "In-correct number of interrupts: %d, should be %d\n",
> - i, CCI_PMU_MAX_HW_EVENTS);
> + i, CCI_PMU_MAX_HW_CNTRS(cci_pmu->model));
> return -EINVAL;
> }
More information about the linux-arm-kernel
mailing list