[PATCH v6 13/37] arm64/sme: Basic enumeration support

Catalin Marinas catalin.marinas at arm.com
Thu Dec 9 10:41:26 PST 2021


On Mon, Nov 15, 2021 at 03:28:11PM +0000, Mark Brown wrote:
> diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
> index 7b23b16f21ce..6f8ca04b6566 100644
> --- a/arch/arm64/include/uapi/asm/hwcap.h
> +++ b/arch/arm64/include/uapi/asm/hwcap.h
> @@ -76,5 +76,13 @@
>  #define HWCAP2_BTI		(1 << 17)
>  #define HWCAP2_MTE		(1 << 18)
>  #define HWCAP2_ECV		(1 << 19)
> +#define HWCAP2_SME		(1 << 20)
> +#define HWCAP2_SME_I16I64	(1 << 21)
> +#define HWCAP2_SME_F64F64	(1 << 22)
> +#define HWCAP2_SME_I8I32	(1 << 23)
> +#define HWCAP2_SME_F16F32	(1 << 24)
> +#define HWCAP2_SME_B16F32	(1 << 25)
> +#define HWCAP2_SME_F32F32	(1 << 26)
> +#define HWCAP2_SME_FA64		(1 << 27)

At this pace we'll need HWCAP3 pretty soon (since we only allocated
32-bit in each). I wonder whether we could instead not bother at all and
just provide user-space emulation for ID_AA64SMFR0_EL1.

>  #endif /* _UAPI__ASM_HWCAP_H */
> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
> index 81824c7ea74f..3cf60819c354 100644
> --- a/arch/arm64/kernel/cpufeature.c
> +++ b/arch/arm64/kernel/cpufeature.c
> @@ -246,6 +246,7 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
>  };
>  
>  static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = {
> +	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_SME_SHIFT, 4, 0),
>  	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_MPAMFRAC_SHIFT, 4, 0),
>  	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_RASFRAC_SHIFT, 4, 0),
>  	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_MTE),
> @@ -278,6 +279,24 @@ static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = {
>  	ARM64_FTR_END,
>  };
>  
> +static const struct arm64_ftr_bits ftr_id_aa64smfr0[] = {
> +	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
> +		       FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_FA64_SHIFT, 1, 0),
> +	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
> +		       FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_I16I64_SHIFT, 4, 0),
> +	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
> +		       FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_F64F64_SHIFT, 1, 0),
> +	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
> +		       FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_I8I32_SHIFT, 4, 0),
> +	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
> +		       FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_F16F32_SHIFT, 1, 0),
> +	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
> +		       FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_B16F32_SHIFT, 1, 0),
> +	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
> +		       FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_F32F32_SHIFT, 1, 0),
> +	ARM64_FTR_END,
> +};
> +
>  static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
>  	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_ECV_SHIFT, 4, 0),
>  	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_FGT_SHIFT, 4, 0),
> @@ -628,6 +647,7 @@ static const struct __ftr_reg_entry {
>  	ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1,
>  			       &id_aa64pfr1_override),
>  	ARM64_FTR_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0),
> +	ARM64_FTR_REG(SYS_ID_AA64SMFR0_EL1, ftr_id_aa64smfr0),
>  
>  	/* Op1 = 0, CRn = 0, CRm = 5 */
>  	ARM64_FTR_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0),
> @@ -939,6 +959,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
>  	init_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0);
>  	init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1);
>  	init_cpu_ftr_reg(SYS_ID_AA64ZFR0_EL1, info->reg_id_aa64zfr0);
> +	init_cpu_ftr_reg(SYS_ID_AA64SMFR0_EL1, info->reg_id_aa64smfr0);
>  
>  	if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0))
>  		init_32bit_cpu_features(&info->aarch32);
> @@ -2370,6 +2391,30 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
>  		.matches = has_cpuid_feature,
>  		.min_field_value = 1,
>  	},
> +#ifdef CONFIG_ARM64_SME
> +	{
> +		.desc = "Scalable Matrix Extension",
> +		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
> +		.capability = ARM64_SME,
> +		.sys_reg = SYS_ID_AA64PFR1_EL1,
> +		.sign = FTR_UNSIGNED,
> +		.field_pos = ID_AA64PFR1_SME_SHIFT,
> +		.min_field_value = ID_AA64PFR1_SME,
> +		.matches = has_cpuid_feature,
> +		.cpu_enable = sme_kernel_enable,
> +	},
> +	{
> +		.desc = "FA64",
> +		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
> +		.capability = ARM64_SME_FA64,
> +		.sys_reg = SYS_ID_AA64SMFR0_EL1,
> +		.sign = FTR_UNSIGNED,
> +		.field_pos = ID_AA64SMFR0_FA64_SHIFT,
> +		.min_field_value = ID_AA64SMFR0_FA64,
> +		.matches = has_feature_flag,
> +		.cpu_enable = fa64_kernel_enable,
> +	},

I'll comment here rather than the patch introducing has_feature_flag():
an alternative would be to add a .field_width option and in
feature_matches() use cpuid_feature_extract_field_width() directly. All
the arm64_ftr_bits entries already have a width, so just generalise it
for arm64_cpu_capabilities.

-- 
Catalin



More information about the linux-arm-kernel mailing list