[PATCH] coresight: etm4x: add CPU hotplug support for probing

Tamas Zsoldos tamas.zsoldos at arm.com
Mon Jul 4 08:45:27 PDT 2022


Hi Suzuki,

On 6/23/22 11:25, Suzuki K Poulose wrote:
> Hi Tamas
> 
> Thanks for the patch. I have tested this on my Juno board and
> it works fine. I have some comments inline.

Thank you for taking a look. I will send out a v2 based on your
suggestions.

> On 16/06/2022 16:18, Tamas Zsoldos wrote:
>> etm4x devices cannot be successfully probed when their CPU is offline.
>> This adds a mechanism to delay the probing in this case and try again
>> once the CPU is brought online.
> 
> It may be worth elaborating this a bit. e.g,
> 
>   "For e.g., if the system is booted with maxcpus=n, the ETM won't
>    be probed on CPUs n+1, even when the CPUs are brought up online
>    later"
> 
> , >
>> Signed-off-by: Tamas Zsoldos <tamas.zsoldos at arm.com>
>> ---
>>   .../coresight/coresight-etm4x-core.c          | 195 ++++++++++++++----
>>   1 file changed, 155 insertions(+), 40 deletions(-)
>>
>> diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c 
>> b/drivers/hwtracing/coresight/coresight-etm4x-core.c
>> index 7f416a12000e..f0bff08e9de9 100644
>> --- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
>> +++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
>> @@ -70,6 +70,14 @@ struct etm4_init_arg {
>>       struct csdev_access    *csa;
>>   };
>> +struct etm4_delayed_probe {
>> +    u32            etm_pid;
>> +    struct device        *dev;
>> +};
>> +
>> +static DEFINE_PER_CPU(struct etm4_delayed_probe *, delayed_probe);
>> +static enum cpuhp_state hp_probe;
> 
> I am wondering if we can reuse the existing hotplug notifier ? See
> more on this below.
> 
>> +
>>   /*
>>    * Check if TRCSSPCICRn(i) is implemented for a given instance.
>>    *
>> @@ -1938,48 +1946,20 @@ static void etm4_pm_clear(void)
>>       }
>>   }
>> -static int etm4_probe(struct device *dev, void __iomem *base, u32 
>> etm_pid)
>> +static int etm4_add_coresight_dev(struct device *dev,
>> +                  struct csdev_access *access)
> 
> Could this be :
>      etm4_add_coresight_dev(struct etm4_init_arg *init_arg)
> 
> That provides us with "access". Also if replace
> 
>      init_arg.drvdata field with init_arg.dev
> 
> we can use that for
>      etm4_init_arch_data()
> and
>      etm4_add_coresight_dev.
> 
> we can get to drvdata from the dev via, dev_get_drvdata(dev).
> 
>>   {
>>       int ret;
>>       struct coresight_platform_data *pdata = NULL;
>> -    struct etmv4_drvdata *drvdata;
>> +    struct etmv4_drvdata *drvdata = dev_get_drvdata(dev);
>>       struct coresight_desc desc = { 0 };
>> -    struct etm4_init_arg init_arg = { 0 };
>>       u8 major, minor;
>>       char *type_name;
>> -    drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
>>       if (!drvdata)
>> -        return -ENOMEM;
>> -
>> -    dev_set_drvdata(dev, drvdata);
>> -
>> -    if (pm_save_enable == PARAM_PM_SAVE_FIRMWARE)
>> -        pm_save_enable = coresight_loses_context_with_cpu(dev) ?
>> -                   PARAM_PM_SAVE_SELF_HOSTED : PARAM_PM_SAVE_NEVER;
>> -
>> -    if (pm_save_enable != PARAM_PM_SAVE_NEVER) {
>> -        drvdata->save_state = devm_kmalloc(dev,
>> -                sizeof(struct etmv4_save_state), GFP_KERNEL);
>> -        if (!drvdata->save_state)
>> -            return -ENOMEM;
>> -    }
>> -
>> -    drvdata->base = base;
>> -
>> -    spin_lock_init(&drvdata->spinlock);
>> -
>> -    drvdata->cpu = coresight_get_cpu(dev);
>> -    if (drvdata->cpu < 0)
>> -        return drvdata->cpu;
>> -
>> -    init_arg.drvdata = drvdata;
>> -    init_arg.csa = &desc.access;
>> -    init_arg.pid = etm_pid;
>> +        return -EINVAL;
>> -    if (smp_call_function_single(drvdata->cpu,
>> -                etm4_init_arch_data,  &init_arg, 1))
>> -        dev_err(dev, "ETM arch init failed\n");
>> +    desc.access = *access;
>>       if (!drvdata->arch)
>>           return -EINVAL;
>> @@ -2050,6 +2030,69 @@ static int etm4_probe(struct device *dev, void 
>> __iomem *base, u32 etm_pid)
>>       return 0;
>>   }
>> +static int etm4_probe(struct device *dev, void __iomem *base, u32 
>> etm_pid)
>> +{
> 
> 
> 
>> +    struct etmv4_drvdata *drvdata;
>> +    struct csdev_access access = { 0 };
>> +    struct etm4_init_arg init_arg = { 0 };
>> +    struct etm4_delayed_probe *delayed;
>> +
>> +    drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
>> +    if (!drvdata)
>> +        return -ENOMEM;
>> +
>> +    dev_set_drvdata(dev, drvdata);
>> +
>> +    if (pm_save_enable == PARAM_PM_SAVE_FIRMWARE)
>> +        pm_save_enable = coresight_loses_context_with_cpu(dev) ?
>> +                   PARAM_PM_SAVE_SELF_HOSTED : PARAM_PM_SAVE_NEVER;
>> +
>> +    if (pm_save_enable != PARAM_PM_SAVE_NEVER) {
>> +        drvdata->save_state = devm_kmalloc(dev,
>> +                sizeof(struct etmv4_save_state), GFP_KERNEL);
>> +        if (!drvdata->save_state)
>> +            return -ENOMEM;
>> +    }
>> +
>> +    drvdata->base = base;
>> +
>> +    spin_lock_init(&drvdata->spinlock);
>> +
>> +    drvdata->cpu = coresight_get_cpu(dev);
>> +    if (drvdata->cpu < 0)
>> +        return drvdata->cpu;
>> +
>> +    init_arg.drvdata = drvdata;
>> +    init_arg.csa = &access;
>> +    init_arg.pid = etm_pid;
>> +
>> +    /*
>> +     * Serialize against CPUHP callbacks to avoid race condition
>> +     * between the smp call and saving the delayed probe.
>> +     */
>> +    cpus_read_lock();
>> +    if (smp_call_function_single(drvdata->cpu,
>> +                etm4_init_arch_data,  &init_arg, 1)) {
>> +        /* The CPU was offline, try again once it comes online. */
>> +        delayed = devm_kmalloc(dev, sizeof(*delayed), GFP_KERNEL);
>> +        if (!delayed) {
>> +            cpus_read_unlock();
>> +            return -ENOMEM;
>> +        }
>> +
>> +        delayed->etm_pid = etm_pid;
>> +        delayed->dev = dev;
>> +
>> +        per_cpu(delayed_probe, drvdata->cpu) = delayed;
>> +
>> +        cpus_read_unlock();
>> +        return 0;
>> +    }
>> +    cpus_read_unlock();
>> +
>> +    return etm4_add_coresight_dev(dev, &access);
>> +}
>> +
>>   static int etm4_probe_amba(struct amba_device *adev, const struct 
>> amba_id *id)
>>   {
>>       void __iomem *base;
>> @@ -2088,6 +2131,64 @@ static int etm4_probe_platform_dev(struct 
>> platform_device *pdev)
>>       return ret;
>>   }
>> +static int etm4_probe_cpu(unsigned int cpu)
>> +{
>> +    int ret;
>> +    struct etm4_delayed_probe di;
>> +    struct csdev_access access = { 0 };
>> +    struct etm4_init_arg init_arg;
>> +    struct etm4_delayed_probe *dp = *this_cpu_ptr(&delayed_probe);
> 
> This could be called from the existing etm4_starting_cpu() ?

etm4_starting_cpu() is too early (it results in a "bad: scheduling
from the idle thread!" through the AMBA pm runtime), but
etm4_online_cpu() works.

> static int etm4_starting_cpu(unsigned int cpu)
> {
>      if (!etmdrvdata[cpu])
> -        return 0;
> +         return etm4_probe_cpu(cpu);
> 
> 
>          spin_lock(&etmdrvdata[cpu]->spinlock);
>          if (!etmdrvdata[cpu]->os_unlock)
>                  etm4_os_unlock(etmdrvdata[cpu]);
> 
>          if (local_read(&etmdrvdata[cpu]->mode))
>                  etm4_enable_hw(etmdrvdata[cpu]);
>          spin_unlock(&etmdrvdata[cpu]->spinlock);
> 
> }
> 
>> +
>> +    if (!dp)
>> +        return 0;
>> +
>> +    di = *dp;
>> +    devm_kfree(di.dev, dp);
>> +    *this_cpu_ptr(&delayed_probe) = NULL;
>> +
>> +    ret = pm_runtime_resume_and_get(di.dev);
>> +    if (ret < 0) {
>> +        dev_err(di.dev, "Failed to get PM runtime!\n");
>> +        return 0;
>> +    }
>> +
>> +    init_arg.drvdata =  dev_get_drvdata(di.dev);
>> +    if (!init_arg.drvdata)
>> +        return 0;
>> +
>> +    init_arg.csa = &access;
>> +    init_arg.pid = di.etm_pid;
>> +    etm4_init_arch_data(&init_arg);
>> +
>> +    etm4_add_coresight_dev(di.dev, &access);
>> +
>> +    pm_runtime_put(di.dev);
>> +    return 0;
>> +}
>> +
> 
> 
>> +static int etm4_setup_cpuhp_probe(void)
>> +{
>> +    int ret;
>> +
>> +    ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
>> +                    "arm/coresight4:delayed_probe",
>> +                    etm4_probe_cpu, NULL);
>> +
>> +    if (ret > 0) {
>> +        hp_probe = ret;
>> +        return 0;
>> +    }
>> +    return ret;
>> +}
>> +
>> +static void etm4_remove_cpuhp_probe(void)
>> +{
>> +    if (hp_probe)
>> +        cpuhp_remove_state_nocalls(hp_probe);
>> +
>> +    hp_probe = 0;
>> +}
>> +
> 
> And we could get rid of the above ^^
> 
>>   static struct amba_cs_uci_id uci_id_etm4[] = {
>>       {
>>           /*  ETMv4 UCI data */
>> @@ -2102,16 +2203,20 @@ static void clear_etmdrvdata(void *info)
>>       int cpu = *(int *)info;
>>       etmdrvdata[cpu] = NULL;
>> +    per_cpu(delayed_probe, cpu) = NULL;
>>   }
>>   static int __exit etm4_remove_dev(struct etmv4_drvdata *drvdata)
>>   {
>> -    etm_perf_symlink(drvdata->csdev, false);
>> +    bool had_dev;
>>       /*
>>        * Taking hotplug lock here to avoid racing between 
>> etm4_remove_dev()
>>        * and CPU hotplug call backs.
>>        */
>>       cpus_read_lock();
>> +
>> +    had_dev = etmdrvdata[drvdata->cpu];
>> +
> 
> Is this ever false ? Why do we need this change ? How does this patch
> affect the tear down ?

Yes, it will be false when a delayed probe is pending. Since the patch
introduces a new state the driver can be in (the delayed probe), it
needs to make sure it does not try to undo things that have not been
done yet. This is what this condition is meant to do: it basically
checks if etm4_add_coresight_dev() has already run for this device.

It would be probably easier to understand if it just checked if
there's a delayed probe, I will change it to that.

Thanks,
Tamás



More information about the linux-arm-kernel mailing list