[PATCH v3 13/38] arm64: Use a positive cpucap for FP/SIMD
Mark Rutland
mark.rutland at arm.com
Tue Oct 10 03:31:14 PDT 2023
Currently we have a negative cpucap which describes the *absence* of
FP/SIMD rather than *presence* of FP/SIMD. This largely works, but is
somewhat awkward relative to other cpucaps that describe the presence of
a feature, and it would be nicer to have a cpucap which describes the
presence of FP/SIMD:
* This will allow the cpucap to be treated as a standard
ARM64_CPUCAP_SYSTEM_FEATURE, which can be detected with the standard
has_cpuid_feature() function and ARM64_CPUID_FIELDS() description.
* This ensures that the cpucap will only transition from not-present to
present, reducing the risk of unintentional and/or unsafe usage of
FP/SIMD before cpucaps are finalized.
* This will allow using arm64_cpu_capabilities::cpu_enable() to enable
the use of FP/SIMD later, with FP/SIMD being disabled at boot time
otherwise. This will ensure that any unintentional and/or unsafe usage
of FP/SIMD prior to this is trapped, and will ensure that FP/SIMD is
never unintentionally enabled for userspace in mismatched big.LITTLE
systems.
This patch replaces the negative ARM64_HAS_NO_FPSIMD cpucap with a
positive ARM64_HAS_FPSIMD cpucap, making changes as described above.
Note that as FP/SIMD will now be trapped when not supported system-wide,
do_fpsimd_acc() must handle these traps in the same way as for SVE and
SME. The commentary in fpsimd_restore_current_state() is updated to
describe the new scheme.
No users of system_supports_fpsimd() need to know that FP/SIMD is
available prior to alternatives being patched, so this is updated to
use alternative_has_cap_likely() to check for the ARM64_HAS_FPSIMD
cpucap, without generating code to test the system_cpucaps bitmap.
Signed-off-by: Mark Rutland <mark.rutland at arm.com>
Reviewed-by: Mark Brown <broonie at kernel.org>
Cc: Catalin Marinas <catalin.marinas at arm.com>
Cc: Suzuki K Poulose <suzuki.poulose at arm.com>
Cc: Will Deacon <will at kernel.org>
---
arch/arm64/include/asm/cpufeature.h | 2 +-
arch/arm64/include/asm/fpsimd.h | 1 +
arch/arm64/kernel/cpufeature.c | 18 ++++--------
arch/arm64/kernel/fpsimd.c | 44 +++++++++++++++++++++++------
arch/arm64/mm/proc.S | 3 +-
arch/arm64/tools/cpucaps | 2 +-
6 files changed, 44 insertions(+), 26 deletions(-)
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 1233a8ff96a88..a1698945916ed 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -760,7 +760,7 @@ static inline bool system_supports_mixed_endian(void)
static __always_inline bool system_supports_fpsimd(void)
{
- return !cpus_have_const_cap(ARM64_HAS_NO_FPSIMD);
+ return alternative_has_cap_likely(ARM64_HAS_FPSIMD);
}
static inline bool system_uses_hw_pan(void)
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index 0cbaa06b394a0..c43ae9c013ec4 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -149,6 +149,7 @@ extern void sme_save_state(void *state, int zt);
extern void sme_load_state(void const *state, int zt);
struct arm64_cpu_capabilities;
+extern void cpu_enable_fpsimd(const struct arm64_cpu_capabilities *__unused);
extern void cpu_enable_sve(const struct arm64_cpu_capabilities *__unused);
extern void cpu_enable_sme(const struct arm64_cpu_capabilities *__unused);
extern void cpu_enable_sme2(const struct arm64_cpu_capabilities *__unused);
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index fb828f8c49e31..c493fa582a190 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1580,14 +1580,6 @@ static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry, int _
MIDR_CPU_VAR_REV(1, MIDR_REVISION_MASK));
}
-static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unused)
-{
- u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
-
- return cpuid_feature_extract_signed_field(pfr0,
- ID_AA64PFR0_EL1_FP_SHIFT) < 0;
-}
-
static bool has_cache_idc(const struct arm64_cpu_capabilities *entry,
int scope)
{
@@ -2398,11 +2390,11 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, CSV3, IMP)
},
{
- /* FP/SIMD is not implemented */
- .capability = ARM64_HAS_NO_FPSIMD,
- .type = ARM64_CPUCAP_BOOT_RESTRICTED_CPU_LOCAL_FEATURE,
- .min_field_value = 0,
- .matches = has_no_fpsimd,
+ .capability = ARM64_HAS_FPSIMD,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_cpuid_feature,
+ .cpu_enable = cpu_enable_fpsimd,
+ ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, FP, IMP)
},
#ifdef CONFIG_ARM64_PMEM
{
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 45ea9cabbaa41..f1c9eadea200d 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -1160,6 +1160,13 @@ static void __init sve_efi_setup(void)
panic("Cannot allocate percpu memory for EFI SVE save/restore");
}
+void cpu_enable_fpsimd(const struct arm64_cpu_capabilities *__always_unused p)
+{
+ unsigned long enable = CPACR_EL1_FPEN_EL1EN | CPACR_EL1_FPEN_EL0EN;
+ write_sysreg(read_sysreg(CPACR_EL1) | enable, CPACR_EL1);
+ isb();
+}
+
void cpu_enable_sve(const struct arm64_cpu_capabilities *__always_unused p)
{
write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_ZEN_EL1EN, CPACR_EL1);
@@ -1520,8 +1527,17 @@ void do_sme_acc(unsigned long esr, struct pt_regs *regs)
*/
void do_fpsimd_acc(unsigned long esr, struct pt_regs *regs)
{
- /* TODO: implement lazy context saving/restoring */
- WARN_ON(1);
+ /* Even if we chose not to use FPSIMD, the hardware could still trap: */
+ if (!system_supports_fpsimd()) {
+ force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
+ return;
+ }
+
+ /*
+ * When FPSIMD is enabled, we should never take a trap unless something
+ * has gone very wrong.
+ */
+ BUG();
}
/*
@@ -1762,13 +1778,23 @@ void fpsimd_bind_state_to_cpu(struct cpu_fp_state *state)
void fpsimd_restore_current_state(void)
{
/*
- * For the tasks that were created before we detected the absence of
- * FP/SIMD, the TIF_FOREIGN_FPSTATE could be set via fpsimd_thread_switch(),
- * e.g, init. This could be then inherited by the children processes.
- * If we later detect that the system doesn't support FP/SIMD,
- * we must clear the flag for all the tasks to indicate that the
- * FPSTATE is clean (as we can't have one) to avoid looping for ever in
- * do_notify_resume().
+ * TIF_FOREIGN_FPSTATE is set on the init task and copied by
+ * arch_dup_task_struct() regardless of whether FP/SIMD is detected.
+ * Thus user threads can have this set even when FP/SIMD hasn't been
+ * detected.
+ *
+ * When FP/SIMD is detected, begin_new_exec() will set
+ * TIF_FOREIGN_FPSTATE via flush_thread() -> fpsimd_flush_thread(),
+ * and fpsimd_thread_switch() will set TIF_FOREIGN_FPSTATE when
+ * switching tasks. We detect FP/SIMD before we exec the first user
+ * process, ensuring this has TIF_FOREIGN_FPSTATE set and
+ * do_notify_resume() will call fpsimd_restore_current_state() to
+ * install the user FP/SIMD context.
+ *
+ * When FP/SIMD is not detected, nothing else will clear or set
+ * TIF_FOREIGN_FPSTATE prior to the first return to userspace, and
+ * we must clear TIF_FOREIGN_FPSTATE to avoid do_notify_resume()
+ * looping forever calling fpsimd_restore_current_state().
*/
if (!system_supports_fpsimd()) {
clear_thread_flag(TIF_FOREIGN_FPSTATE);
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 14fdf645edc88..f66c37a1610e1 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -405,8 +405,7 @@ SYM_FUNC_START(__cpu_setup)
tlbi vmalle1 // Invalidate local TLB
dsb nsh
- mov x1, #3 << 20
- msr cpacr_el1, x1 // Enable FP/ASIMD
+ msr cpacr_el1, xzr // Reset cpacr_el1
mov x1, #1 << 12 // Reset mdscr_el1 and disable
msr mdscr_el1, x1 // access to the DCC from EL0
isb // Unmask debug exceptions now,
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index c3f06fdef6099..1adf562c914d9 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -27,6 +27,7 @@ HAS_ECV_CNTPOFF
HAS_EPAN
HAS_EVT
HAS_FGT
+HAS_FPSIMD
HAS_GENERIC_AUTH
HAS_GENERIC_AUTH_ARCH_QARMA3
HAS_GENERIC_AUTH_ARCH_QARMA5
@@ -39,7 +40,6 @@ HAS_LDAPR
HAS_LSE_ATOMICS
HAS_MOPS
HAS_NESTED_VIRT
-HAS_NO_FPSIMD
HAS_NO_HW_PREFETCH
HAS_PAN
HAS_S1PIE
--
2.30.2
More information about the linux-arm-kernel
mailing list