[PATCH v3 13/38] arm64: Use a positive cpucap for FP/SIMD

Mark Rutland mark.rutland at arm.com
Tue Oct 10 03:31:14 PDT 2023


Currently we have a negative cpucap which describes the *absence* of
FP/SIMD rather than *presence* of FP/SIMD. This largely works, but is
somewhat awkward relative to other cpucaps that describe the presence of
a feature, and it would be nicer to have a cpucap which describes the
presence of FP/SIMD:

* This will allow the cpucap to be treated as a standard
  ARM64_CPUCAP_SYSTEM_FEATURE, which can be detected with the standard
  has_cpuid_feature() function and ARM64_CPUID_FIELDS() description.

* This ensures that the cpucap will only transition from not-present to
  present, reducing the risk of unintentional and/or unsafe usage of
  FP/SIMD before cpucaps are finalized.

* This will allow using arm64_cpu_capabilities::cpu_enable() to enable
  the use of FP/SIMD later, with FP/SIMD being disabled at boot time
  otherwise. This will ensure that any unintentional and/or unsafe usage
  of FP/SIMD prior to this is trapped, and will ensure that FP/SIMD is
  never unintentionally enabled for userspace in mismatched big.LITTLE
  systems.

This patch replaces the negative ARM64_HAS_NO_FPSIMD cpucap with a
positive ARM64_HAS_FPSIMD cpucap, making changes as described above.
Note that as FP/SIMD will now be trapped when not supported system-wide,
do_fpsimd_acc() must handle these traps in the same way as for SVE and
SME. The commentary in fpsimd_restore_current_state() is updated to
describe the new scheme.

No users of system_supports_fpsimd() need to know that FP/SIMD is
available prior to alternatives being patched, so this is updated to
use alternative_has_cap_likely() to check for the ARM64_HAS_FPSIMD
cpucap, without generating code to test the system_cpucaps bitmap.

Signed-off-by: Mark Rutland <mark.rutland at arm.com>
Reviewed-by: Mark Brown <broonie at kernel.org>
Cc: Catalin Marinas <catalin.marinas at arm.com>
Cc: Suzuki K Poulose <suzuki.poulose at arm.com>
Cc: Will Deacon <will at kernel.org>
---
 arch/arm64/include/asm/cpufeature.h |  2 +-
 arch/arm64/include/asm/fpsimd.h     |  1 +
 arch/arm64/kernel/cpufeature.c      | 18 ++++--------
 arch/arm64/kernel/fpsimd.c          | 44 +++++++++++++++++++++++------
 arch/arm64/mm/proc.S                |  3 +-
 arch/arm64/tools/cpucaps            |  2 +-
 6 files changed, 44 insertions(+), 26 deletions(-)

diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 1233a8ff96a88..a1698945916ed 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -760,7 +760,7 @@ static inline bool system_supports_mixed_endian(void)
 
 static __always_inline bool system_supports_fpsimd(void)
 {
-	return !cpus_have_const_cap(ARM64_HAS_NO_FPSIMD);
+	return alternative_has_cap_likely(ARM64_HAS_FPSIMD);
 }
 
 static inline bool system_uses_hw_pan(void)
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index 0cbaa06b394a0..c43ae9c013ec4 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -149,6 +149,7 @@ extern void sme_save_state(void *state, int zt);
 extern void sme_load_state(void const *state, int zt);
 
 struct arm64_cpu_capabilities;
+extern void cpu_enable_fpsimd(const struct arm64_cpu_capabilities *__unused);
 extern void cpu_enable_sve(const struct arm64_cpu_capabilities *__unused);
 extern void cpu_enable_sme(const struct arm64_cpu_capabilities *__unused);
 extern void cpu_enable_sme2(const struct arm64_cpu_capabilities *__unused);
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index fb828f8c49e31..c493fa582a190 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1580,14 +1580,6 @@ static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry, int _
 		MIDR_CPU_VAR_REV(1, MIDR_REVISION_MASK));
 }
 
-static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unused)
-{
-	u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
-
-	return cpuid_feature_extract_signed_field(pfr0,
-					ID_AA64PFR0_EL1_FP_SHIFT) < 0;
-}
-
 static bool has_cache_idc(const struct arm64_cpu_capabilities *entry,
 			  int scope)
 {
@@ -2398,11 +2390,11 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, CSV3, IMP)
 	},
 	{
-		/* FP/SIMD is not implemented */
-		.capability = ARM64_HAS_NO_FPSIMD,
-		.type = ARM64_CPUCAP_BOOT_RESTRICTED_CPU_LOCAL_FEATURE,
-		.min_field_value = 0,
-		.matches = has_no_fpsimd,
+		.capability = ARM64_HAS_FPSIMD,
+		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
+		.matches = has_cpuid_feature,
+		.cpu_enable = cpu_enable_fpsimd,
+		ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, FP, IMP)
 	},
 #ifdef CONFIG_ARM64_PMEM
 	{
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 45ea9cabbaa41..f1c9eadea200d 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -1160,6 +1160,13 @@ static void __init sve_efi_setup(void)
 	panic("Cannot allocate percpu memory for EFI SVE save/restore");
 }
 
+void cpu_enable_fpsimd(const struct arm64_cpu_capabilities *__always_unused p)
+{
+	unsigned long enable = CPACR_EL1_FPEN_EL1EN | CPACR_EL1_FPEN_EL0EN;
+	write_sysreg(read_sysreg(CPACR_EL1) | enable, CPACR_EL1);
+	isb();
+}
+
 void cpu_enable_sve(const struct arm64_cpu_capabilities *__always_unused p)
 {
 	write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_ZEN_EL1EN, CPACR_EL1);
@@ -1520,8 +1527,17 @@ void do_sme_acc(unsigned long esr, struct pt_regs *regs)
  */
 void do_fpsimd_acc(unsigned long esr, struct pt_regs *regs)
 {
-	/* TODO: implement lazy context saving/restoring */
-	WARN_ON(1);
+	/* Even if we chose not to use FPSIMD, the hardware could still trap: */
+	if (!system_supports_fpsimd()) {
+		force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
+		return;
+	}
+
+	/*
+	 * When FPSIMD is enabled, we should never take a trap unless something
+	 * has gone very wrong.
+	 */
+	BUG();
 }
 
 /*
@@ -1762,13 +1778,23 @@ void fpsimd_bind_state_to_cpu(struct cpu_fp_state *state)
 void fpsimd_restore_current_state(void)
 {
 	/*
-	 * For the tasks that were created before we detected the absence of
-	 * FP/SIMD, the TIF_FOREIGN_FPSTATE could be set via fpsimd_thread_switch(),
-	 * e.g, init. This could be then inherited by the children processes.
-	 * If we later detect that the system doesn't support FP/SIMD,
-	 * we must clear the flag for  all the tasks to indicate that the
-	 * FPSTATE is clean (as we can't have one) to avoid looping for ever in
-	 * do_notify_resume().
+	 * TIF_FOREIGN_FPSTATE is set on the init task and copied by
+	 * arch_dup_task_struct() regardless of whether FP/SIMD is detected.
+	 * Thus user threads can have this set even when FP/SIMD hasn't been
+	 * detected.
+	 *
+	 * When FP/SIMD is detected, begin_new_exec() will set
+	 * TIF_FOREIGN_FPSTATE via flush_thread() -> fpsimd_flush_thread(),
+	 * and fpsimd_thread_switch() will set TIF_FOREIGN_FPSTATE when
+	 * switching tasks. We detect FP/SIMD before we exec the first user
+	 * process, ensuring this has TIF_FOREIGN_FPSTATE set and
+	 * do_notify_resume() will call fpsimd_restore_current_state() to
+	 * install the user FP/SIMD context.
+	 *
+	 * When FP/SIMD is not detected, nothing else will clear or set
+	 * TIF_FOREIGN_FPSTATE prior to the first return to userspace, and
+	 * we must clear TIF_FOREIGN_FPSTATE to avoid do_notify_resume()
+	 * looping forever calling fpsimd_restore_current_state().
 	 */
 	if (!system_supports_fpsimd()) {
 		clear_thread_flag(TIF_FOREIGN_FPSTATE);
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 14fdf645edc88..f66c37a1610e1 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -405,8 +405,7 @@ SYM_FUNC_START(__cpu_setup)
 	tlbi	vmalle1				// Invalidate local TLB
 	dsb	nsh
 
-	mov	x1, #3 << 20
-	msr	cpacr_el1, x1			// Enable FP/ASIMD
+	msr	cpacr_el1, xzr			// Reset cpacr_el1
 	mov	x1, #1 << 12			// Reset mdscr_el1 and disable
 	msr	mdscr_el1, x1			// access to the DCC from EL0
 	isb					// Unmask debug exceptions now,
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index c3f06fdef6099..1adf562c914d9 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -27,6 +27,7 @@ HAS_ECV_CNTPOFF
 HAS_EPAN
 HAS_EVT
 HAS_FGT
+HAS_FPSIMD
 HAS_GENERIC_AUTH
 HAS_GENERIC_AUTH_ARCH_QARMA3
 HAS_GENERIC_AUTH_ARCH_QARMA5
@@ -39,7 +40,6 @@ HAS_LDAPR
 HAS_LSE_ATOMICS
 HAS_MOPS
 HAS_NESTED_VIRT
-HAS_NO_FPSIMD
 HAS_NO_HW_PREFETCH
 HAS_PAN
 HAS_S1PIE
-- 
2.30.2




More information about the linux-arm-kernel mailing list