[PATCH 2/3] coresight etm4x: Add 32-bit read/write option to split 64-bit words

Suzuki K Poulose suzuki.poulose at arm.com
Fri Jan 20 03:19:20 PST 2023


On 20/01/2023 00:51, Steve Clevenger wrote:
> Add 32-bit read/write access option for Ampere ETMv4.6 64-bit registers.
> Ampere Computing erratum AC03_DEBUG_10 describes a design decision where
> 64-bit read/write access is not supported for the ETMv4.6 implementation.
> These 64-bit registers must be accessed as 2 ea. 32-bit registers.
> AC03_DEBUG_10 is described in the AmpereOne Developer Errata:
> https://solutions.amperecomputing.com/customer-connect/products/AmpereOne-device-documentation

As with the previous comment, please :
   a) If this is because of the system instruction access support
   b) Document the erratum

> 
> Fix drvdata->nr_addr_cmp for() loop range bug to drvdata->nr_addr_cmp * 2
> in etm_enable_hw.

Good catch ! Please separate this out and send it as a fix. I can queue 
this.

> 
> Signed-off-by: Steve Clevenger <scclevenger at os.amperecomputing.com>
> ---
>   .../coresight/coresight-etm4x-core.c          | 81 ++++++++++++++-----
>   drivers/hwtracing/coresight/coresight-etm4x.h | 32 ++++++++
>   2 files changed, 93 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c
> index 533be1928a09..bf4daa649cdf 100644
> --- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
> +++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
> @@ -452,18 +452,31 @@ static int etm4_enable_hw(struct etmv4_drvdata *drvdata)
>   		if (etm4x_sspcicrn_present(drvdata, i))
>   			etm4x_relaxed_write32(csa, config->ss_pe_cmp[i], TRCSSPCICRn(i));
>   	}
> -	for (i = 0; i < drvdata->nr_addr_cmp; i++) {
> -		etm4x_relaxed_write64(csa, config->addr_val[i], TRCACVRn(i));
> -		etm4x_relaxed_write64(csa, config->addr_acc[i], TRCACATRn(i));
> +	for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) {
> +		if (drvdata->no_quad_mmio) {
> +			etm4x_split_write64(csa, config->addr_val[i], TRCACVRn(i));
> +			etm4x_split_write64(csa, config->addr_acc[i], TRCACATRn(i));
> +		} else {
> +			etm4x_relaxed_write64(csa, config->addr_val[i], TRCACVRn(i));
> +			etm4x_relaxed_write64(csa, config->addr_acc[i], TRCACATRn(i));
> +		}
> +	}

Something like this can be leave some places out. I think we could hide 
it under the generic helpers and handle it there. May be "struct 
csdev_access" can cache this "no_quad_mmio" and do the right thing ?


> +	for (i = 0; i < drvdata->numcidc; i++) {
> +		if (drvdata->no_quad_mmio)
> +			etm4x_split_write64(csa, config->ctxid_pid[i], TRCCIDCVRn(i));
> +		else
> +			etm4x_relaxed_write64(csa, config->ctxid_pid[i], TRCCIDCVRn(i));
>   	}
> -	for (i = 0; i < drvdata->numcidc; i++)
> -		etm4x_relaxed_write64(csa, config->ctxid_pid[i], TRCCIDCVRn(i));
>   	etm4x_relaxed_write32(csa, config->ctxid_mask0, TRCCIDCCTLR0);
>   	if (drvdata->numcidc > 4)
>   		etm4x_relaxed_write32(csa, config->ctxid_mask1, TRCCIDCCTLR1);
>   
> -	for (i = 0; i < drvdata->numvmidc; i++)
> -		etm4x_relaxed_write64(csa, config->vmid_val[i], TRCVMIDCVRn(i));
> +	for (i = 0; i < drvdata->numvmidc; i++) {
> +		if (drvdata->no_quad_mmio)
> +			etm4x_split_write64(csa, config->vmid_val[i], TRCVMIDCVRn(i));
> +		else
> +			etm4x_relaxed_write64(csa, config->vmid_val[i], TRCVMIDCVRn(i));
> +	}
>   	etm4x_relaxed_write32(csa, config->vmid_mask0, TRCVMIDCCTLR0);
>   	if (drvdata->numvmidc > 4)
>   		etm4x_relaxed_write32(csa, config->vmid_mask1, TRCVMIDCCTLR1);
> @@ -1670,8 +1683,13 @@ static int __etm4_cpu_save(struct etmv4_drvdata *drvdata)
>   	}
>   
>   	for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) {
> -		state->trcacvr[i] = etm4x_read64(csa, TRCACVRn(i));
> -		state->trcacatr[i] = etm4x_read64(csa, TRCACATRn(i));
> +		if (drvdata->no_quad_mmio) {
> +			state->trcacvr[i] = etm4x_split_read64(csa, TRCACVRn(i));
> +			state->trcacatr[i] = etm4x_split_read64(csa, TRCACATRn(i));
> +		} else {
> +			state->trcacvr[i] = etm4x_read64(csa, TRCACVRn(i));
> +			state->trcacatr[i] = etm4x_read64(csa, TRCACATRn(i));
> +		}
>   	}
>   
>   	/*
> @@ -1681,11 +1699,19 @@ static int __etm4_cpu_save(struct etmv4_drvdata *drvdata)
>   	 * unit") of ARM IHI 0064D.
>   	 */
>   
> -	for (i = 0; i < drvdata->numcidc; i++)
> -		state->trccidcvr[i] = etm4x_read64(csa, TRCCIDCVRn(i));
> +	for (i = 0; i < drvdata->numcidc; i++) {
> +		if (drvdata->no_quad_mmio)
> +			state->trccidcvr[i] = etm4x_split_read64(csa, TRCCIDCVRn(i));
> +		else
> +			state->trccidcvr[i] = etm4x_read64(csa, TRCCIDCVRn(i));
> +	}
>   
> -	for (i = 0; i < drvdata->numvmidc; i++)
> -		state->trcvmidcvr[i] = etm4x_read64(csa, TRCVMIDCVRn(i));
> +	for (i = 0; i < drvdata->numvmidc; i++) {
> +		if (drvdata->no_quad_mmio)
> +			state->trcvmidcvr[i] = etm4x_split_read64(csa, TRCVMIDCVRn(i));
> +		else
> +			state->trcvmidcvr[i] = etm4x_read64(csa, TRCVMIDCVRn(i));
> +	}
>   
>   	state->trccidcctlr0 = etm4x_read32(csa, TRCCIDCCTLR0);
>   	if (drvdata->numcidc > 4)
> @@ -1799,15 +1825,28 @@ static void __etm4_cpu_restore(struct etmv4_drvdata *drvdata)
>   	}
>   
>   	for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) {
> -		etm4x_relaxed_write64(csa, state->trcacvr[i], TRCACVRn(i));
> -		etm4x_relaxed_write64(csa, state->trcacatr[i], TRCACATRn(i));
> +		if (drvdata->no_quad_mmio) {
> +			etm4x_split_write64(csa, state->trcacvr[i], TRCACVRn(i));
> +			etm4x_split_write64(csa, state->trcacatr[i], TRCACATRn(i));
> +		} else {
> +			etm4x_relaxed_write64(csa, state->trcacvr[i], TRCACVRn(i));
> +			etm4x_relaxed_write64(csa, state->trcacatr[i], TRCACATRn(i));
> +		}
>   	}
>   
> -	for (i = 0; i < drvdata->numcidc; i++)
> -		etm4x_relaxed_write64(csa, state->trccidcvr[i], TRCCIDCVRn(i));
> +	for (i = 0; i < drvdata->numcidc; i++) {
> +		if (drvdata->no_quad_mmio)
> +			etm4x_split_write64(csa, state->trccidcvr[i], TRCCIDCVRn(i));
> +		else
> +			etm4x_relaxed_write64(csa, state->trccidcvr[i], TRCCIDCVRn(i));
> +	}
>   
> -	for (i = 0; i < drvdata->numvmidc; i++)
> -		etm4x_relaxed_write64(csa, state->trcvmidcvr[i], TRCVMIDCVRn(i));
> +	for (i = 0; i < drvdata->numvmidc; i++) {
> +		if (drvdata->no_quad_mmio)
> +			etm4x_split_write64(csa, state->trcvmidcvr[i], TRCVMIDCVRn(i));
> +		else
> +			etm4x_relaxed_write64(csa, state->trcvmidcvr[i], TRCVMIDCVRn(i));
> +	}
>   
>   	etm4x_relaxed_write32(csa, state->trccidcctlr0, TRCCIDCCTLR0);
>   	if (drvdata->numcidc > 4)
> @@ -2047,8 +2086,10 @@ static int etm4_probe(struct device *dev, void __iomem *base, u32 etm_pid)
>   	 * isolates the manufacturer JEP106 ID in the PID.
>   	 * TRCPIDR2 (JEDC|DES_1) << 16 | TRCPIDR1 (DES_0) << 8)
>   	 */
> -	if ((init_arg.pid & 0x000FF000) == 0x00096000)
> +	if ((init_arg.pid & 0x000FF000) == 0x00096000) {
>   		drvdata->mmio_external = true;
> +		drvdata->no_quad_mmio = true;
> +	}
>   
>   	/*
>   	 * Serialize against CPUHP callbacks to avoid race condition
> diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h b/drivers/hwtracing/coresight/coresight-etm4x.h
> index cf4f9f2e1807..0650bcdff410 100644
> --- a/drivers/hwtracing/coresight/coresight-etm4x.h
> +++ b/drivers/hwtracing/coresight/coresight-etm4x.h
> @@ -1016,6 +1016,7 @@ struct etmv4_save_state {
>    *		   the trace unit.
>    * @arch_features: Bitmap of arch features of etmv4 devices.
>    * @mmio_external: True if ETM considers MMIO an external access.
> + * @no_quad_mmio:  True if ETM does not support 64-bit (quad) access.
>    */
>   struct etmv4_drvdata {
>   	void __iomem			*base;
> @@ -1069,6 +1070,7 @@ struct etmv4_drvdata {
>   	bool				skip_power_up;
>   	DECLARE_BITMAP(arch_features, ETM4_IMPDEF_FEATURE_MAX);
>   	bool				mmio_external;
> +	bool				no_quad_mmio;
>   };
>   
>   /* Address comparator access types */
> @@ -1093,6 +1095,36 @@ void etm4_config_trace_mode(struct etmv4_config *config);
>   u64 etm4x_sysreg_read(u32 offset, bool _relaxed, bool _64bit);
>   void etm4x_sysreg_write(u64 val, u32 offset, bool _relaxed, bool _64bit);
>   
> +/* 64-bit aligned to convert 64-bit access to 2 ea. 32-bit access */
> +#pragma pack(push, 8)
> +
> +struct etm_quad_split {
> +	u32 lsw;
> +	u32 msw;
> +};
> +
> +#pragma pack(pop)
> +
> +static inline u64 etm4x_split_read64(struct csdev_access *csa, unsigned int offset)
> +{
> +	struct etm_quad_split container;
> +
> +	container.lsw = etm4x_read32(csa, offset);
> +	container.msw = etm4x_read32(csa, offset + sizeof(u32));
> +
> +	return *(u64 *) &container;

Wouldn't this break with the "endianness" flip ? (Not that we have BE 
implementations). Could we not combine the two values to a 64bit value 
and pass that instead ?

Similarly below.

Suzuki

> +}
> +
> +static inline void etm4x_split_write64(struct csdev_access *csa, u64 quad, unsigned int offset)
> +{
> +	struct etm_quad_split container;
> +
> +	*(u64 *) &container = quad;
> +
> +	etm4x_relaxed_write32(csa, container.lsw, offset);
> +	etm4x_relaxed_write32(csa, container.msw, offset + sizeof(u32));
> +}
> +
>   static inline bool etm4x_is_ete(struct etmv4_drvdata *drvdata)
>   {
>   	return drvdata->arch >= ETM_ARCH_ETE;




More information about the linux-arm-kernel mailing list