[PATCH v10 09/39] arm64/sme: Basic enumeration support

Christophe Fergeau cfergeau at redhat.com
Tue Nov 8 08:30:01 PST 2022


Hey,

I'm running Fedora virtual machines on M1 macbooks using Apple's virtualization
framework [0]. I've been having issues with recent linux kernels: when I try to
start my virtual machine, the framework errors out early during boot with
"Virtualization error" and no additional information.
I don't even have serial console output until much later in the boot process.

I've bisected this issue down to the patch I'm repyling to, and if I comment
out this added line:

+	info->reg_id_aa64smfr0 = read_cpuid(ID_AA64SMFR0_EL1);

then I can start my VM again.

I've raised this issue with Apple as there's most likely a bug to fix in their
hypervisor, but I was wondering if this could also be worked around kernel-side?
Before this change, I've had no problems with my linux VMs.

Thanks,

Christophe

[0] https://developer.apple.com/documentation/virtualization?language=objc

> This patch introduces basic cpufeature support for discovering the presence
> of the Scalable Matrix Extension.
> 
> Signed-off-by: Mark Brown <broonie at kernel.org>
> ---
>  Documentation/arm64/elf_hwcaps.rst  | 33 +++++++++++++++++
>  arch/arm64/include/asm/cpu.h        |  1 +
>  arch/arm64/include/asm/cpufeature.h | 12 ++++++
>  arch/arm64/include/asm/fpsimd.h     |  2 +
>  arch/arm64/include/asm/hwcap.h      |  8 ++++
>  arch/arm64/include/uapi/asm/hwcap.h |  8 ++++
>  arch/arm64/kernel/cpufeature.c      | 57 +++++++++++++++++++++++++++++
>  arch/arm64/kernel/cpuinfo.c         |  9 +++++
>  arch/arm64/kernel/fpsimd.c          | 30 +++++++++++++++
>  arch/arm64/tools/cpucaps            |  2 +
>  10 files changed, 162 insertions(+)
> 
> diff --git a/Documentation/arm64/elf_hwcaps.rst b/Documentation/arm64/elf_hwcaps.rst
> index b72ff17d600a..5626cf208000 100644
> --- a/Documentation/arm64/elf_hwcaps.rst
> +++ b/Documentation/arm64/elf_hwcaps.rst
> @@ -259,6 +259,39 @@ HWCAP2_RPRES
>  
>      Functionality implied by ID_AA64ISAR2_EL1.RPRES == 0b0001.
>  
> +HWCAP2_SME
> +
> +    Functionality implied by ID_AA64PFR1_EL1.SME == 0b0001, as described
> +    by Documentation/arm64/sme.rst.
> +
> +HWCAP2_SME_I16I64
> +
> +    Functionality implied by ID_AA64SMFR0_EL1.I16I64 == 0b1111.
> +
> +HWCAP2_SME_F64F64
> +
> +    Functionality implied by ID_AA64SMFR0_EL1.F64F64 == 0b1.
> +
> +HWCAP2_SME_I8I32
> +
> +    Functionality implied by ID_AA64SMFR0_EL1.I8I32 == 0b1111.
> +
> +HWCAP2_SME_F16F32
> +
> +    Functionality implied by ID_AA64SMFR0_EL1.F16F32 == 0b1.
> +
> +HWCAP2_SME_B16F32
> +
> +    Functionality implied by ID_AA64SMFR0_EL1.B16F32 == 0b1.
> +
> +HWCAP2_SME_F32F32
> +
> +    Functionality implied by ID_AA64SMFR0_EL1.F32F32 == 0b1.
> +
> +HWCAP2_SME_FA64
> +
> +    Functionality implied by ID_AA64SMFR0_EL1.FA64 == 0b1.
> +
>  4. Unused AT_HWCAP bits
>  -----------------------
>  
> diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h
> index a58e366f0b07..d08062bcb9c1 100644
> --- a/arch/arm64/include/asm/cpu.h
> +++ b/arch/arm64/include/asm/cpu.h
> @@ -58,6 +58,7 @@ struct cpuinfo_arm64 {
>  	u64		reg_id_aa64pfr0;
>  	u64		reg_id_aa64pfr1;
>  	u64		reg_id_aa64zfr0;
> +	u64		reg_id_aa64smfr0;
>  
>  	struct cpuinfo_32bit	aarch32;
>  
> diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
> index 2728abd9cae4..f93b1442143f 100644
> --- a/arch/arm64/include/asm/cpufeature.h
> +++ b/arch/arm64/include/asm/cpufeature.h
> @@ -728,6 +728,18 @@ static __always_inline bool system_supports_sve(void)
>  		cpus_have_const_cap(ARM64_SVE);
>  }
>  
> +static __always_inline bool system_supports_sme(void)
> +{
> +	return IS_ENABLED(CONFIG_ARM64_SME) &&
> +		cpus_have_const_cap(ARM64_SME);
> +}
> +
> +static __always_inline bool system_supports_fa64(void)
> +{
> +	return IS_ENABLED(CONFIG_ARM64_SME) &&
> +		cpus_have_const_cap(ARM64_SME_FA64);
> +}
> +
>  static __always_inline bool system_supports_cnp(void)
>  {
>  	return IS_ENABLED(CONFIG_ARM64_CNP) &&
> diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
> index c90f7f99a768..6b7eb6f2cecd 100644
> --- a/arch/arm64/include/asm/fpsimd.h
> +++ b/arch/arm64/include/asm/fpsimd.h
> @@ -74,6 +74,8 @@ extern void sve_set_vq(unsigned long vq_minus_1);
>  
>  struct arm64_cpu_capabilities;
>  extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused);
> +extern void sme_kernel_enable(const struct arm64_cpu_capabilities *__unused);
> +extern void fa64_kernel_enable(const struct arm64_cpu_capabilities *__unused);
>  
>  extern u64 read_zcr_features(void);
>  
> diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
> index f68fbb207473..76d9999527c5 100644
> --- a/arch/arm64/include/asm/hwcap.h
> +++ b/arch/arm64/include/asm/hwcap.h
> @@ -108,6 +108,14 @@
>  #define KERNEL_HWCAP_ECV		__khwcap2_feature(ECV)
>  #define KERNEL_HWCAP_AFP		__khwcap2_feature(AFP)
>  #define KERNEL_HWCAP_RPRES		__khwcap2_feature(RPRES)
> +#define KERNEL_HWCAP_SME		__khwcap2_feature(SME)
> +#define KERNEL_HWCAP_SME_I16I64		__khwcap2_feature(SME_I16I64)
> +#define KERNEL_HWCAP_SME_F64F64		__khwcap2_feature(SME_F64F64)
> +#define KERNEL_HWCAP_SME_I8I32		__khwcap2_feature(SME_I8I32)
> +#define KERNEL_HWCAP_SME_F16F32		__khwcap2_feature(SME_F16F32)
> +#define KERNEL_HWCAP_SME_B16F32		__khwcap2_feature(SME_B16F32)
> +#define KERNEL_HWCAP_SME_F32F32		__khwcap2_feature(SME_F32F32)
> +#define KERNEL_HWCAP_SME_FA64		__khwcap2_feature(SME_FA64)
>  
>  /*
>   * This yields a mask that user programs can use to figure out what
> diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
> index f03731847d9d..60de5626f8fb 100644
> --- a/arch/arm64/include/uapi/asm/hwcap.h
> +++ b/arch/arm64/include/uapi/asm/hwcap.h
> @@ -78,5 +78,13 @@
>  #define HWCAP2_ECV		(1 << 19)
>  #define HWCAP2_AFP		(1 << 20)
>  #define HWCAP2_RPRES		(1 << 21)
> +#define HWCAP2_SME		(1 << 22)
> +#define HWCAP2_SME_I16I64	(1 << 23)
> +#define HWCAP2_SME_F64F64	(1 << 24)
> +#define HWCAP2_SME_I8I32	(1 << 25)
> +#define HWCAP2_SME_F16F32	(1 << 26)
> +#define HWCAP2_SME_B16F32	(1 << 27)
> +#define HWCAP2_SME_F32F32	(1 << 28)
> +#define HWCAP2_SME_FA64		(1 << 29)
>  
>  #endif /* _UAPI__ASM_HWCAP_H */
> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
> index d9f09e40aaf6..3cd7fe53cffa 100644
> --- a/arch/arm64/kernel/cpufeature.c
> +++ b/arch/arm64/kernel/cpufeature.c
> @@ -251,6 +251,7 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
>  };
>  
>  static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = {
> +	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_SME_SHIFT, 4, 0),
>  	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_MPAMFRAC_SHIFT, 4, 0),
>  	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_RASFRAC_SHIFT, 4, 0),
>  	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_MTE),
> @@ -283,6 +284,24 @@ static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = {
>  	ARM64_FTR_END,
>  };
>  
> +static const struct arm64_ftr_bits ftr_id_aa64smfr0[] = {
> +	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
> +		       FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_FA64_SHIFT, 1, 0),
> +	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
> +		       FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_I16I64_SHIFT, 4, 0),
> +	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
> +		       FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_F64F64_SHIFT, 1, 0),
> +	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
> +		       FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_I8I32_SHIFT, 4, 0),
> +	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
> +		       FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_F16F32_SHIFT, 1, 0),
> +	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
> +		       FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_B16F32_SHIFT, 1, 0),
> +	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
> +		       FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_F32F32_SHIFT, 1, 0),
> +	ARM64_FTR_END,
> +};
> +
>  static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
>  	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_ECV_SHIFT, 4, 0),
>  	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_FGT_SHIFT, 4, 0),
> @@ -634,6 +653,7 @@ static const struct __ftr_reg_entry {
>  	ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1,
>  			       &id_aa64pfr1_override),
>  	ARM64_FTR_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0),
> +	ARM64_FTR_REG(SYS_ID_AA64SMFR0_EL1, ftr_id_aa64smfr0),
>  
>  	/* Op1 = 0, CRn = 0, CRm = 5 */
>  	ARM64_FTR_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0),
> @@ -947,6 +967,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
>  	init_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0);
>  	init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1);
>  	init_cpu_ftr_reg(SYS_ID_AA64ZFR0_EL1, info->reg_id_aa64zfr0);
> +	init_cpu_ftr_reg(SYS_ID_AA64SMFR0_EL1, info->reg_id_aa64smfr0);
>  
>  	if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0))
>  		init_32bit_cpu_features(&info->aarch32);
> @@ -2400,6 +2421,32 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
>  		.matches = has_cpuid_feature,
>  		.min_field_value = 1,
>  	},
> +#ifdef CONFIG_ARM64_SME
> +	{
> +		.desc = "Scalable Matrix Extension",
> +		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
> +		.capability = ARM64_SME,
> +		.sys_reg = SYS_ID_AA64PFR1_EL1,
> +		.sign = FTR_UNSIGNED,
> +		.field_pos = ID_AA64PFR1_SME_SHIFT,
> +		.field_width = 4,
> +		.min_field_value = ID_AA64PFR1_SME,
> +		.matches = has_cpuid_feature,
> +		.cpu_enable = sme_kernel_enable,
> +	},
> +	{
> +		.desc = "FA64",
> +		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
> +		.capability = ARM64_SME_FA64,
> +		.sys_reg = SYS_ID_AA64SMFR0_EL1,
> +		.sign = FTR_UNSIGNED,
> +		.field_pos = ID_AA64SMFR0_FA64_SHIFT,
> +		.field_width = 1,
> +		.min_field_value = ID_AA64SMFR0_FA64,
> +		.matches = has_cpuid_feature,
> +		.cpu_enable = fa64_kernel_enable,
> +	},
> +#endif /* CONFIG_ARM64_SME */
>  	{},
>  };
>  
> @@ -2524,6 +2571,16 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
>  	HWCAP_CAP(SYS_ID_AA64MMFR0_EL1, ID_AA64MMFR0_ECV_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ECV),
>  	HWCAP_CAP(SYS_ID_AA64MMFR1_EL1, ID_AA64MMFR1_AFP_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AFP),
>  	HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_RPRES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RPRES),
> +#ifdef CONFIG_ARM64_SME
> +	HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SME_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_SME, CAP_HWCAP, KERNEL_HWCAP_SME),
> +	HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_FA64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_FA64, CAP_HWCAP, KERNEL_HWCAP_SME_FA64),
> +	HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_I16I64_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_I16I64, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64),
> +	HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_F64F64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_F64F64, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64),
> +	HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_I8I32_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_I8I32, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32),
> +	HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_F16F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_F16F32, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32),
> +	HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_B16F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_B16F32, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32),
> +	HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_F32F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_F32F32, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32),
> +#endif /* CONFIG_ARM64_SME */
>  	{},
>  };
>  
> diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
> index 591c18a889a5..33ec182e872e 100644
> --- a/arch/arm64/kernel/cpuinfo.c
> +++ b/arch/arm64/kernel/cpuinfo.c
> @@ -97,6 +97,14 @@ static const char *const hwcap_str[] = {
>  	[KERNEL_HWCAP_ECV]		= "ecv",
>  	[KERNEL_HWCAP_AFP]		= "afp",
>  	[KERNEL_HWCAP_RPRES]		= "rpres",
> +	[KERNEL_HWCAP_SME]		= "sme",
> +	[KERNEL_HWCAP_SME_I16I64]	= "smei16i64",
> +	[KERNEL_HWCAP_SME_F64F64]	= "smef64f64",
> +	[KERNEL_HWCAP_SME_I8I32]	= "smei8i32",
> +	[KERNEL_HWCAP_SME_F16F32]	= "smef16f32",
> +	[KERNEL_HWCAP_SME_B16F32]	= "smeb16f32",
> +	[KERNEL_HWCAP_SME_F32F32]	= "smef32f32",
> +	[KERNEL_HWCAP_SME_FA64]		= "smefa64",
>  };
>  
>  #ifdef CONFIG_COMPAT
> @@ -400,6 +408,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
>  	info->reg_id_aa64pfr0 = read_cpuid(ID_AA64PFR0_EL1);
>  	info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1);
>  	info->reg_id_aa64zfr0 = read_cpuid(ID_AA64ZFR0_EL1);
> +	info->reg_id_aa64smfr0 = read_cpuid(ID_AA64SMFR0_EL1);
>  
>  	if (id_aa64pfr1_mte(info->reg_id_aa64pfr1))
>  		info->reg_gmid = read_cpuid(GMID_EL1);
> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> index 5280e098cfb5..576490be3c2b 100644
> --- a/arch/arm64/kernel/fpsimd.c
> +++ b/arch/arm64/kernel/fpsimd.c
> @@ -987,6 +987,32 @@ void fpsimd_release_task(struct task_struct *dead_task)
>  
>  #endif /* CONFIG_ARM64_SVE */
>  
> +#ifdef CONFIG_ARM64_SME
> +
> +void sme_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
> +{
> +	/* Set priority for all PEs to architecturally defined minimum */
> +	write_sysreg_s(read_sysreg_s(SYS_SMPRI_EL1) & ~SMPRI_EL1_PRIORITY_MASK,
> +		       SYS_SMPRI_EL1);
> +
> +	/* Allow SME in kernel */
> +	write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_SMEN_EL1EN, CPACR_EL1);
> +	isb();
> +}
> +
> +/*
> + * This must be called after sme_kernel_enable(), we rely on the
> + * feature table being sorted to ensure this.
> + */
> +void fa64_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
> +{
> +	/* Allow use of FA64 */
> +	write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_FA64_MASK,
> +		       SYS_SMCR_EL1);
> +}
> +
> +#endif /* CONFIG_ARM64_SVE */
> +
>  /*
>   * Trapped SVE access
>   *
> @@ -1532,6 +1558,10 @@ static int __init fpsimd_init(void)
>  	if (!cpu_have_named_feature(ASIMD))
>  		pr_notice("Advanced SIMD is not implemented\n");
>  
> +
> +	if (cpu_have_named_feature(SME) && !cpu_have_named_feature(SVE))
> +		pr_notice("SME is implemented but not SVE\n");
> +
>  	return sve_sysctl_init();
>  }
>  core_initcall(fpsimd_init);
> diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
> index 870c39537dd0..58dfc8547c64 100644
> --- a/arch/arm64/tools/cpucaps
> +++ b/arch/arm64/tools/cpucaps
> @@ -41,6 +41,8 @@ KVM_PROTECTED_MODE
>  MISMATCHED_CACHE_TYPE
>  MTE
>  MTE_ASYMM
> +SME
> +SME_FA64
>  SPECTRE_V2
>  SPECTRE_V3A
>  SPECTRE_V4
> -- 
> 2.30.2
> 




More information about the linux-arm-kernel mailing list