[PATCH 3/3] perf/arm-ni: Support sharing IRQs within an NI instance
Will Deacon
will at kernel.org
Mon May 19 05:29:57 PDT 2025
On Tue, May 13, 2025 at 04:39:00PM +0100, Robin Murphy wrote:
> From: Shouping Wang <allen.wang at hj-micro.com>
>
> NI-700 has a distinct PMU interrupt output for each Clock Domain,
> however some integrations may still combine these together externally.
> The initial driver didn't attempt to support this, in anticipation of a
> more general solution for IRQ sharing between system PMU instances, but
> that's still a way off, so let's make this intermediate step for now to
> at least allow sharing IRQs within an individual NI instance.
>
> Now that CPU affinity and migration are cleaned up, it's fairly
> straightforward to adopt similar logic to arm-cmn, to identify CDs with
> a common interrupt and loop over them directly in the handler.
>
> Signed-off-by: Shouping Wang <allen.wang at hj-micro.com>
> [ rm: Rework for affinity handling, cosmetics, new commit message ]
> Signed-off-by: Robin Murphy <robin.murphy at arm.com>
> ---
> drivers/perf/arm-ni.c | 78 ++++++++++++++++++++++++++++---------------
> 1 file changed, 51 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/perf/arm-ni.c b/drivers/perf/arm-ni.c
> index 168750e78fc4..ca033c8785ff 100644
> --- a/drivers/perf/arm-ni.c
> +++ b/drivers/perf/arm-ni.c
> @@ -102,6 +102,7 @@ struct arm_ni_unit {
> struct arm_ni_cd {
> void __iomem *pmu_base;
> u16 id;
> + s8 irq_friend;
Why not just store the pointer to the cd as opposed to a relative offset?
> int num_units;
> int irq;
> struct pmu pmu;
> @@ -448,33 +449,37 @@ static irqreturn_t arm_ni_handle_irq(int irq, void *dev_id)
> {
> struct arm_ni_cd *cd = dev_id;
> irqreturn_t ret = IRQ_NONE;
> - u32 reg = readl_relaxed(cd->pmu_base + NI_PMOVSCLR);
>
> - if (reg & (1U << NI_CCNT_IDX)) {
> - ret = IRQ_HANDLED;
> - if (!(WARN_ON(!cd->ccnt))) {
> - arm_ni_event_read(cd->ccnt);
> - arm_ni_init_ccnt(cd);
> + for (;;) {
Could this be a do { ... } while (cd) loop instead?
> + u32 reg = readl_relaxed(cd->pmu_base + NI_PMOVSCLR);
> +
> + if (reg & (1U << NI_CCNT_IDX)) {
> + ret = IRQ_HANDLED;
> + if (!(WARN_ON(!cd->ccnt))) {
> + arm_ni_event_read(cd->ccnt);
> + arm_ni_init_ccnt(cd);
> + }
> }
> - }
> - for (int i = 0; i < NI_NUM_COUNTERS; i++) {
> - if (!(reg & (1U << i)))
> - continue;
> - ret = IRQ_HANDLED;
> - if (!(WARN_ON(!cd->evcnt[i]))) {
> - arm_ni_event_read(cd->evcnt[i]);
> - arm_ni_init_evcnt(cd, i);
> + for (int i = 0; i < NI_NUM_COUNTERS; i++) {
> + if (!(reg & (1U << i)))
> + continue;
> + ret = IRQ_HANDLED;
> + if (!(WARN_ON(!cd->evcnt[i]))) {
> + arm_ni_event_read(cd->evcnt[i]);
> + arm_ni_init_evcnt(cd, i);
> + }
> }
> + writel_relaxed(reg, cd->pmu_base + NI_PMOVSCLR);
> + if (!cd->irq_friend)
> + return ret;
> + cd += cd->irq_friend;
> }
> - writel_relaxed(reg, cd->pmu_base + NI_PMOVSCLR);
> - return ret;
> }
>
> static int arm_ni_init_cd(struct arm_ni *ni, struct arm_ni_node *node, u64 res_start)
> {
> struct arm_ni_cd *cd = ni->cds + node->id;
> const char *name;
> - int err;
>
> cd->id = node->id;
> cd->num_units = node->num_components;
> @@ -534,20 +539,11 @@ static int arm_ni_init_cd(struct arm_ni *ni, struct arm_ni_node *node, u64 res_s
> cd->pmu_base + NI_PMCR);
> writel_relaxed(U32_MAX, cd->pmu_base + NI_PMCNTENCLR);
> writel_relaxed(U32_MAX, cd->pmu_base + NI_PMOVSCLR);
> - writel_relaxed(U32_MAX, cd->pmu_base + NI_PMINTENSET);
>
> cd->irq = platform_get_irq(to_platform_device(ni->dev), cd->id);
> if (cd->irq < 0)
> return cd->irq;
>
> - err = devm_request_irq(ni->dev, cd->irq, arm_ni_handle_irq,
> - IRQF_NOBALANCING | IRQF_NO_THREAD,
> - dev_name(ni->dev), cd);
> - if (err)
> - return err;
> -
> - irq_set_affinity(cd->irq, cpumask_of(ni->cpu));
> -
> cd->pmu = (struct pmu) {
> .module = THIS_MODULE,
> .parent = ni->dev,
> @@ -593,6 +589,30 @@ static void arm_ni_probe_domain(void __iomem *base, struct arm_ni_node *node)
> node->num_components = readl_relaxed(base + NI_CHILD_NODE_INFO);
> }
>
> +static int arm_ni_init_irqs(struct arm_ni *ni)
> +{
> + int err;
> +
> + ni_for_each_cd(ni, cd) {
> + for (struct arm_ni_cd *prev = cd; prev-- > ni->cds; ) {
> + if (prev->irq == cd->irq) {
> + prev->irq_friend = cd - prev;
Can't this race with the read of `irq_friend` in the interrupt handler?
Similarly, how do you handle the race between the interrupt firing on
one CD and the friend CD being torn down on e.g. the remove path?
> + goto enable_irq;
> + }
> + }
> + err = devm_request_irq(ni->dev, cd->irq, arm_ni_handle_irq,
> + IRQF_NOBALANCING | IRQF_NO_THREAD,
> + dev_name(ni->dev), cd);
What's the reason not to use IRQF_SHARED and register multiple handlers?
I'm sure there is one, but capturing that in a comment would help to
justify the manual multiplexing.
Will
More information about the linux-arm-kernel
mailing list