[PATCH 2/2] perf:arm-ni: support PMUs to share IRQs for different clock domains

Shouping Wang allen.wang at hj-micro.com
Tue Apr 22 20:42:38 PDT 2025


‌Can multiple CDs of an NI700 device only register single PMU (similar
to CMN PMU driver) by treating their PMUs as a unified entity to support
shared interrupts? or do you already have existing solutions that
support shared interrupt scenarios?

On 4/22/2025 10:35 PM, Robin Murphy wrote:
> On 21/04/2025 10:54 am, Shouping Wang wrote:
>> For the same NI700 device, different CDs share a PMU interrupt. The
>> cd->cpu is set to the first CPU in the NUMA node associated with the
>> device, meaning all CDs share the same cd->cpu value.
> 
> But what if CPUs are being onlined/offlined while arm_ni_probe() is
> running? We can't necessarily assume that multiple calls to
> cpumask_local_spread() at different points in time will give consistent
> results to begin with.
> 
>> During event_init,
>> event->cpu is assigned to cd->cpu, resulting in all CD PMUs sharing the
>> interrupt being bound to the same CPU context. if the CPU goes offline,
>> the PMU context on that CPU is migrated to a target CPU, and the
>> interrupt is rebound to the target CPU.
> 
> Except the CDs will *each* try to migrate the same IRQ one-by-one as if
> it's theirs uniquely. I suppose it's quite likely that the
> cpumask_any_*() calls will all happen to pick the same new target CPU,
> given that we should be under the global CPU hotplug lock at that point,
> but I'm still not sure that's strictly guaranteed. Furthermore it opens
> a race window where as soon as the first irq_set_affinity() call has
> taken effect, the IRQ handler for all the *other* CDs may then be called
> on the new CPU before their respective PMU contexts have been migrated.
> 
>> My understanding is that the CPU
>> affinity and hotplug operations can remain synchronized. Could you
>> confirm if there are any misunderstandings in this logic?
> 
> For that to happen *reliably*, essentially the IRQ affinity needs to be
> managed at an equivalent level to the initial IRQ request itself.
> 
> Thanks,
> Robin.
> 
>> On 4/17/2025 10:41 PM, Robin Murphy wrote:
>>> On 10/04/2025 12:42 pm, Shouping Wang wrote:
>>>> The ARM NI700 contains multiple clock domains, each with a PMU.
>>>> In some hardware implementations, these PMUs under the same device
>>>> share a common interrupt line. The current codes implementation
>>>> only supports requesting a separate IRQ for each clock domain's PMU.
>>>>
>>>> Here, a single interrupt handler is registered for shared interrupt.
>>>> Within this handler, the interrupt status of all PMUs sharing the
>>>> interrupt is checked.
>>>
>>> Unfortunately this isn't sufficient for sharing an IRQ between multiple
>>> PMUs - the CPU affinity and hotplug context migration must be kept in
>>> sync as well.
>>>
>>> I guess I really should get back to my old plan to factor out a common
>>> helper library for all this stuff - that was the main reason I left
>>> combined IRQ support out of the initial version here rather than do
>>> another copy-paste of the arm_dmc620 design again...
>>>
>>> Thanks,
>>> Robin.
>>>
>>>> Signed-off-by: Shouping Wang <allen.wang at hj-micro.com>
>>>> ---
>>>>    drivers/perf/arm-ni.c | 77 ++++++++++++++++++++++++++++
>>>> +--------------
>>>>    1 file changed, 53 insertions(+), 24 deletions(-)
>>>>
>>>> diff --git a/drivers/perf/arm-ni.c b/drivers/perf/arm-ni.c
>>>> index 3f3d2e0f91fa..611085e89436 100644
>>>> --- a/drivers/perf/arm-ni.c
>>>> +++ b/drivers/perf/arm-ni.c
>>>> @@ -104,6 +104,7 @@ struct arm_ni_cd {
>>>>        u16 id;
>>>>        int num_units;
>>>>        int irq;
>>>> +    s8 irq_friend;
>>>>        int cpu;
>>>>        struct hlist_node cpuhp_node;
>>>>        struct pmu pmu;
>>>> @@ -446,26 +447,31 @@ static irqreturn_t arm_ni_handle_irq(int irq,
>>>> void *dev_id)
>>>>    {
>>>>        struct arm_ni_cd *cd = dev_id;
>>>>        irqreturn_t ret = IRQ_NONE;
>>>> -    u32 reg = readl_relaxed(cd->pmu_base + NI_PMOVSCLR);
>>>> +    u32 reg;
>>>>    -    if (reg & (1U << NI_CCNT_IDX)) {
>>>> -        ret = IRQ_HANDLED;
>>>> -        if (!(WARN_ON(!cd->ccnt))) {
>>>> -            arm_ni_event_read(cd->ccnt);
>>>> -            arm_ni_init_ccnt(cd);
>>>> +    for (;;) {
>>>> +        reg = readl_relaxed(cd->pmu_base + NI_PMOVSCLR);
>>>> +        if (reg & (1U << NI_CCNT_IDX)) {
>>>> +            ret = IRQ_HANDLED;
>>>> +            if (!(WARN_ON(!cd->ccnt))) {
>>>> +                arm_ni_event_read(cd->ccnt);
>>>> +                arm_ni_init_ccnt(cd);
>>>> +            }
>>>>            }
>>>> -    }
>>>> -    for (int i = 0; i < NI_NUM_COUNTERS; i++) {
>>>> -        if (!(reg & (1U << i)))
>>>> -            continue;
>>>> -        ret = IRQ_HANDLED;
>>>> -        if (!(WARN_ON(!cd->evcnt[i]))) {
>>>> -            arm_ni_event_read(cd->evcnt[i]);
>>>> -            arm_ni_init_evcnt(cd, i);
>>>> +        for (int i = 0; i < NI_NUM_COUNTERS; i++) {
>>>> +            if (!(reg & (1U << i)))
>>>> +                continue;
>>>> +            ret = IRQ_HANDLED;
>>>> +            if (!(WARN_ON(!cd->evcnt[i]))) {
>>>> +                arm_ni_event_read(cd->evcnt[i]);
>>>> +                arm_ni_init_evcnt(cd, i);
>>>> +            }
>>>>            }
>>>> +        writel_relaxed(reg, cd->pmu_base + NI_PMOVSCLR);
>>>> +        if (!cd->irq_friend)
>>>> +            return ret;
>>>> +        cd += cd->irq_friend;
>>>>        }
>>>> -    writel_relaxed(reg, cd->pmu_base + NI_PMOVSCLR);
>>>> -    return ret;
>>>>    }
>>>>      static int arm_ni_init_cd(struct arm_ni *ni, struct arm_ni_node
>>>> *node, u64 res_start)
>>>> @@ -538,12 +544,6 @@ static int arm_ni_init_cd(struct arm_ni *ni,
>>>> struct arm_ni_node *node, u64 res_s
>>>>        if (cd->irq < 0)
>>>>            return cd->irq;
>>>>    -    err = devm_request_irq(ni->dev, cd->irq, arm_ni_handle_irq,
>>>> -                   IRQF_NOBALANCING | IRQF_NO_THREAD,
>>>> -                   dev_name(ni->dev), cd);
>>>> -    if (err)
>>>> -        return err;
>>>> -
>>>>        cd->cpu = cpumask_local_spread(0, dev_to_node(ni->dev));
>>>>        cd->pmu = (struct pmu) {
>>>>            .module = THIS_MODULE,
>>>> @@ -603,6 +603,30 @@ static void arm_ni_probe_domain(void __iomem
>>>> *base, struct arm_ni_node *node)
>>>>        node->num_components = readl_relaxed(base + NI_CHILD_NODE_INFO);
>>>>    }
>>>>    +static int arm_ni_irq_init(struct arm_ni *ni)
>>>> +{
>>>> +    int irq;
>>>> +    int err = 0;
>>>> +
>>>> +    for (int i = 0; i < ni->num_cds; i++) {
>>>> +        irq = ni->cds[i].irq;
>>>> +        for (int j = i; j--; ) {
>>>> +            if (ni->cds[j].irq == irq) {
>>>> +                ni->cds[j].irq_friend = i-j;
>>>> +                goto next;
>>>> +            }
>>>> +        }
>>>> +        err =  devm_request_irq(ni->dev, irq, arm_ni_handle_irq,
>>>> +                    IRQF_NOBALANCING | IRQF_NO_THREAD,
>>>> +                     dev_name(ni->dev), &ni->cds[i]);
>>>> +        if (err)
>>>> +            return err;
>>>> +next:
>>>> +        ;
>>>> +    }
>>>> +    return 0;
>>>> +}
>>>> +
>>>>    static int arm_ni_probe(struct platform_device *pdev)
>>>>    {
>>>>        struct arm_ni_node cfg, vd, pd, cd;
>>>> @@ -611,6 +635,7 @@ static int arm_ni_probe(struct platform_device
>>>> *pdev)
>>>>        void __iomem *base;
>>>>        static atomic_t id;
>>>>        int num_cds;
>>>> +    int ret;
>>>>        u32 reg, part;
>>>>          /*
>>>> @@ -669,8 +694,6 @@ static int arm_ni_probe(struct platform_device
>>>> *pdev)
>>>>                reg = readl_relaxed(vd.base + NI_CHILD_PTR(p));
>>>>                arm_ni_probe_domain(base + reg, &pd);
>>>>                for (int c = 0; c < pd.num_components; c++) {
>>>> -                int ret;
>>>> -
>>>>                    reg = readl_relaxed(pd.base + NI_CHILD_PTR(c));
>>>>                    arm_ni_probe_domain(base + reg, &cd);
>>>>                    ret = arm_ni_init_cd(ni, &cd, res->start);
>>>> @@ -683,6 +706,12 @@ static int arm_ni_probe(struct platform_device
>>>> *pdev)
>>>>            }
>>>>        }
>>>>    +    ret = arm_ni_irq_init(ni);
>>>> +    if (ret) {
>>>> +        arm_ni_remove(pdev);
>>>> +        return ret;
>>>> +    }
>>>> +
>>>>        return 0;
>>>>    }
>>>>    
>>>
>>>
>>
> 
> 




More information about the linux-arm-kernel mailing list