[PATCH 2/3] coresight etm4x: Add 32-bit read/write option to split 64-bit words
Suzuki K Poulose
suzuki.poulose at arm.com
Mon Jan 23 09:58:48 PST 2023
On 22/01/2023 08:32, Steve Clevenger wrote:
>
> Hi Suzuki,
>
> Comments in-line.
>
> Steve
>
> On 1/20/2023 3:19 AM, Suzuki K Poulose wrote:
>> On 20/01/2023 00:51, Steve Clevenger wrote:
>>> Add 32-bit read/write access option for Ampere ETMv4.6 64-bit registers.
>>> Ampere Computing erratum AC03_DEBUG_10 describes a design decision where
>>> 64-bit read/write access is not supported for the ETMv4.6 implementation.
>>> These 64-bit registers must be accessed as 2 ea. 32-bit registers.
>>> AC03_DEBUG_10 is described in the AmpereOne Developer Errata:
>>> https://solutions.amperecomputing.com/customer-connect/products/AmpereOne-device-documentation
>>
>> As with the previous comment, please :
>> a) If this is because of the system instruction access support
>> b) Document the erratum
>>
> I presume you're referring to your previous comment about adding these
> errata to "Documentation/arm64/silicon-errata.rst". Let me see if
> there's any heartburn with this internal to Ampere. I don't expect there
> to be.
>
>>>
>>> Fix drvdata->nr_addr_cmp for() loop range bug to drvdata->nr_addr_cmp * 2
>>> in etm_enable_hw.
>>
>> Good catch ! Please separate this out and send it as a fix. I can queue
>> this.
> I'll submit it as a separate patch.
>
>>
>>>
>>> Signed-off-by: Steve Clevenger <scclevenger at os.amperecomputing.com>
>>> ---
>>> .../coresight/coresight-etm4x-core.c | 81 ++++++++++++++-----
>>> drivers/hwtracing/coresight/coresight-etm4x.h | 32 ++++++++
>>> 2 files changed, 93 insertions(+), 20 deletions(-)
>>>
>>> diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c
>>> b/drivers/hwtracing/coresight/coresight-etm4x-core.c
>>> index 533be1928a09..bf4daa649cdf 100644
>>> --- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
>>> +++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
>>> @@ -452,18 +452,31 @@ static int etm4_enable_hw(struct etmv4_drvdata
>>> *drvdata)
>>> if (etm4x_sspcicrn_present(drvdata, i))
>>> etm4x_relaxed_write32(csa, config->ss_pe_cmp[i],
>>> TRCSSPCICRn(i));
>>> }
>>> - for (i = 0; i < drvdata->nr_addr_cmp; i++) {
>>> - etm4x_relaxed_write64(csa, config->addr_val[i], TRCACVRn(i));
>>> - etm4x_relaxed_write64(csa, config->addr_acc[i], TRCACATRn(i));
>>> + for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) {
>>> + if (drvdata->no_quad_mmio) {
>>> + etm4x_split_write64(csa, config->addr_val[i], TRCACVRn(i));
>>> + etm4x_split_write64(csa, config->addr_acc[i], TRCACATRn(i));
>>> + } else {
>>> + etm4x_relaxed_write64(csa, config->addr_val[i],
>>> TRCACVRn(i));
>>> + etm4x_relaxed_write64(csa, config->addr_acc[i],
>>> TRCACATRn(i));
>>> + }
>>> + }
>>
>> Something like this can be leave some places out. I think we could hide
>> it under the generic helpers and handle it there. May be "struct
>> csdev_access" can cache this "no_quad_mmio" and do the right thing ?
> I'm not sure what you're suggesting here. Please be more specific.
>
e.g.,
struct csdev_access {
bool no_64bit_access;
}
And use the csdev_*_ operations could :
if (csa->no_64bit_access) {
split access
} else {
}
i.e., move the tracking of no_quad_mmio to "csa" from "drvdata"
Suzuki
>>
>>
>>> + for (i = 0; i < drvdata->numcidc; i++) {
>>> + if (drvdata->no_quad_mmio)
>>> + etm4x_split_write64(csa, config->ctxid_pid[i],
>>> TRCCIDCVRn(i));
>>> + else
>>> + etm4x_relaxed_write64(csa, config->ctxid_pid[i],
>>> TRCCIDCVRn(i));
>>> }
>>> - for (i = 0; i < drvdata->numcidc; i++)
>>> - etm4x_relaxed_write64(csa, config->ctxid_pid[i], TRCCIDCVRn(i));
>>> etm4x_relaxed_write32(csa, config->ctxid_mask0, TRCCIDCCTLR0);
>>> if (drvdata->numcidc > 4)
>>> etm4x_relaxed_write32(csa, config->ctxid_mask1, TRCCIDCCTLR1);
>>> - for (i = 0; i < drvdata->numvmidc; i++)
>>> - etm4x_relaxed_write64(csa, config->vmid_val[i], TRCVMIDCVRn(i));
>>> + for (i = 0; i < drvdata->numvmidc; i++) {
>>> + if (drvdata->no_quad_mmio)
>>> + etm4x_split_write64(csa, config->vmid_val[i],
>>> TRCVMIDCVRn(i));
>>> + else
>>> + etm4x_relaxed_write64(csa, config->vmid_val[i],
>>> TRCVMIDCVRn(i));
>>> + }
>>> etm4x_relaxed_write32(csa, config->vmid_mask0, TRCVMIDCCTLR0);
>>> if (drvdata->numvmidc > 4)
>>> etm4x_relaxed_write32(csa, config->vmid_mask1, TRCVMIDCCTLR1);
>>> @@ -1670,8 +1683,13 @@ static int __etm4_cpu_save(struct etmv4_drvdata
>>> *drvdata)
>>> }
>>> for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) {
>>> - state->trcacvr[i] = etm4x_read64(csa, TRCACVRn(i));
>>> - state->trcacatr[i] = etm4x_read64(csa, TRCACATRn(i));
>>> + if (drvdata->no_quad_mmio) {
>>> + state->trcacvr[i] = etm4x_split_read64(csa, TRCACVRn(i));
>>> + state->trcacatr[i] = etm4x_split_read64(csa, TRCACATRn(i));
>>> + } else {
>>> + state->trcacvr[i] = etm4x_read64(csa, TRCACVRn(i));
>>> + state->trcacatr[i] = etm4x_read64(csa, TRCACATRn(i));
>>> + }
>>> }
>>> /*
>>> @@ -1681,11 +1699,19 @@ static int __etm4_cpu_save(struct
>>> etmv4_drvdata *drvdata)
>>> * unit") of ARM IHI 0064D.
>>> */
>>> - for (i = 0; i < drvdata->numcidc; i++)
>>> - state->trccidcvr[i] = etm4x_read64(csa, TRCCIDCVRn(i));
>>> + for (i = 0; i < drvdata->numcidc; i++) {
>>> + if (drvdata->no_quad_mmio)
>>> + state->trccidcvr[i] = etm4x_split_read64(csa,
>>> TRCCIDCVRn(i));
>>> + else
>>> + state->trccidcvr[i] = etm4x_read64(csa, TRCCIDCVRn(i));
>>> + }
>>> - for (i = 0; i < drvdata->numvmidc; i++)
>>> - state->trcvmidcvr[i] = etm4x_read64(csa, TRCVMIDCVRn(i));
>>> + for (i = 0; i < drvdata->numvmidc; i++) {
>>> + if (drvdata->no_quad_mmio)
>>> + state->trcvmidcvr[i] = etm4x_split_read64(csa,
>>> TRCVMIDCVRn(i));
>>> + else
>>> + state->trcvmidcvr[i] = etm4x_read64(csa, TRCVMIDCVRn(i));
>>> + }
>>> state->trccidcctlr0 = etm4x_read32(csa, TRCCIDCCTLR0);
>>> if (drvdata->numcidc > 4)
>>> @@ -1799,15 +1825,28 @@ static void __etm4_cpu_restore(struct
>>> etmv4_drvdata *drvdata)
>>> }
>>> for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) {
>>> - etm4x_relaxed_write64(csa, state->trcacvr[i], TRCACVRn(i));
>>> - etm4x_relaxed_write64(csa, state->trcacatr[i], TRCACATRn(i));
>>> + if (drvdata->no_quad_mmio) {
>>> + etm4x_split_write64(csa, state->trcacvr[i], TRCACVRn(i));
>>> + etm4x_split_write64(csa, state->trcacatr[i], TRCACATRn(i));
>>> + } else {
>>> + etm4x_relaxed_write64(csa, state->trcacvr[i], TRCACVRn(i));
>>> + etm4x_relaxed_write64(csa, state->trcacatr[i],
>>> TRCACATRn(i));
>>> + }
>>> }
>>> - for (i = 0; i < drvdata->numcidc; i++)
>>> - etm4x_relaxed_write64(csa, state->trccidcvr[i], TRCCIDCVRn(i));
>>> + for (i = 0; i < drvdata->numcidc; i++) {
>>> + if (drvdata->no_quad_mmio)
>>> + etm4x_split_write64(csa, state->trccidcvr[i],
>>> TRCCIDCVRn(i));
>>> + else
>>> + etm4x_relaxed_write64(csa, state->trccidcvr[i],
>>> TRCCIDCVRn(i));
>>> + }
>>> - for (i = 0; i < drvdata->numvmidc; i++)
>>> - etm4x_relaxed_write64(csa, state->trcvmidcvr[i],
>>> TRCVMIDCVRn(i));
>>> + for (i = 0; i < drvdata->numvmidc; i++) {
>>> + if (drvdata->no_quad_mmio)
>>> + etm4x_split_write64(csa, state->trcvmidcvr[i],
>>> TRCVMIDCVRn(i));
>>> + else
>>> + etm4x_relaxed_write64(csa, state->trcvmidcvr[i],
>>> TRCVMIDCVRn(i));
>>> + }
>>> etm4x_relaxed_write32(csa, state->trccidcctlr0, TRCCIDCCTLR0);
>>> if (drvdata->numcidc > 4)
>>> @@ -2047,8 +2086,10 @@ static int etm4_probe(struct device *dev, void
>>> __iomem *base, u32 etm_pid)
>>> * isolates the manufacturer JEP106 ID in the PID.
>>> * TRCPIDR2 (JEDC|DES_1) << 16 | TRCPIDR1 (DES_0) << 8)
>>> */
>>> - if ((init_arg.pid & 0x000FF000) == 0x00096000)
>>> + if ((init_arg.pid & 0x000FF000) == 0x00096000) {
>>> drvdata->mmio_external = true;
>>> + drvdata->no_quad_mmio = true;
>>> + }
>>> /*
>>> * Serialize against CPUHP callbacks to avoid race condition
>>> diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h
>>> b/drivers/hwtracing/coresight/coresight-etm4x.h
>>> index cf4f9f2e1807..0650bcdff410 100644
>>> --- a/drivers/hwtracing/coresight/coresight-etm4x.h
>>> +++ b/drivers/hwtracing/coresight/coresight-etm4x.h
>>> @@ -1016,6 +1016,7 @@ struct etmv4_save_state {
>>> * the trace unit.
>>> * @arch_features: Bitmap of arch features of etmv4 devices.
>>> * @mmio_external: True if ETM considers MMIO an external access.
>>> + * @no_quad_mmio: True if ETM does not support 64-bit (quad) access.
>>> */
>>> struct etmv4_drvdata {
>>> void __iomem *base;
>>> @@ -1069,6 +1070,7 @@ struct etmv4_drvdata {
>>> bool skip_power_up;
>>> DECLARE_BITMAP(arch_features, ETM4_IMPDEF_FEATURE_MAX);
>>> bool mmio_external;
>>> + bool no_quad_mmio;
>>> };
>>> /* Address comparator access types */
>>> @@ -1093,6 +1095,36 @@ void etm4_config_trace_mode(struct etmv4_config
>>> *config);
>>> u64 etm4x_sysreg_read(u32 offset, bool _relaxed, bool _64bit);
>>> void etm4x_sysreg_write(u64 val, u32 offset, bool _relaxed, bool
>>> _64bit);
>>> +/* 64-bit aligned to convert 64-bit access to 2 ea. 32-bit access */
>>> +#pragma pack(push, 8)
>>> +
>>> +struct etm_quad_split {
>>> + u32 lsw;
>>> + u32 msw;
>>> +};
>>> +
>>> +#pragma pack(pop)
>>> +
>>> +static inline u64 etm4x_split_read64(struct csdev_access *csa,
>>> unsigned int offset)
>>> +{
>>> + struct etm_quad_split container;
>>> +
>>> + container.lsw = etm4x_read32(csa, offset);
>>> + container.msw = etm4x_read32(csa, offset + sizeof(u32));
>>> +
>>> + return *(u64 *) &container;
>>
>> Wouldn't this break with the "endianness" flip ? (Not that we have BE
>> implementations). Could we not combine the two values to a 64bit value
>> and pass that instead ?
> The split implementation writes/reads 32-bit words to/from 2 consecutive
> 32-bit aligned memory addresses independent of endianness so it doesn't
> care. I'm not sure I understand what you're getting at by combining the
> 2 ea. 32-bit values into a 1 ea. 64-bit value. The etm4x_split_read64
> and etm4x_split_write64 calls both use 64-bit values in and out.
> Internal to this code, both read and write accesses must use 32-bit values.
>
>>
>> Similarly below.
>>
>> Suzuki
>>
>>> +}
>>> +
>>> +static inline void etm4x_split_write64(struct csdev_access *csa, u64
>>> quad, unsigned int offset)
>>> +{
>>> + struct etm_quad_split container;
>>> +
>>> + *(u64 *) &container = quad;
>>> +
>>> + etm4x_relaxed_write32(csa, container.lsw, offset);
>>> + etm4x_relaxed_write32(csa, container.msw, offset + sizeof(u32));
>>> +}
>>> +
>>> static inline bool etm4x_is_ete(struct etmv4_drvdata *drvdata)
>>> {
>>> return drvdata->arch >= ETM_ARCH_ETE;
>>
More information about the linux-arm-kernel
mailing list