[PATCH 16/18] arm64: fpsimd: Move sve_flush_live() inline
Vladimir Murzin
vladimir.murzin at arm.com
Thu May 28 03:49:25 PDT 2026
On 5/21/26 14:25, Mark Rutland wrote:
> Currently sve_flush_live() is written in out-of-line assembly. It would
> be nice if we could move it inline such that control flow can be written
> more clearly in C, and to permit the removal of otherwise unused
> assembly macros.
>
> The 'flush_ffr' argument is redundant as sve_flush_live() is always
> called from non-streaming mode, and all callers pass 'true'. Remove the
> argument and make it a requirement that the function is called from
> non-streaming mode.
>
> The 'vq_minus_1' argument is unnecessary, as sve_flush_live() can read
> the live VL directly using the RDVL instruction (wrapped by the
> sve_get_vl() helper function).
>
> Move the function to C, with the simplifications above.
>
> Signed-off-by: Mark Rutland <mark.rutland at arm.com>
> Cc: Catalin Marinas <catalin.marinas at arm.com>
> Cc: Fuad Tabba <tabba at google.com>
> Cc: James Morse <james.morse at arm.com>
> Cc: Marc Zyngier <maz at kernel.org>
> Cc: Mark Brown <broonie at kernel.org>
> Cc: Oliver Upton <oupton at kernel.org>
> Cc: Will Deacon <will at kernel.org>
> ---
> arch/arm64/include/asm/fpsimd.h | 26 +++++++++++++++++++++++-
> arch/arm64/include/asm/fpsimdmacros.h | 29 ---------------------------
> arch/arm64/kernel/entry-common.c | 8 ++------
> arch/arm64/kernel/entry-fpsimd.S | 22 --------------------
> arch/arm64/kernel/fpsimd.c | 2 +-
> 5 files changed, 28 insertions(+), 59 deletions(-)
>
> diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
> index d005324bbcf3e..550987b36206a 100644
> --- a/arch/arm64/include/asm/fpsimd.h
> +++ b/arch/arm64/include/asm/fpsimd.h
> @@ -332,7 +332,31 @@ static inline void sve_load_state(const struct sve_state *state, bool ffr)
> __sve_load_p(state, vl, ffr);
> }
>
> -extern void sve_flush_live(bool flush_ffr, unsigned long vq_minus_1);
> +
> +/*
> + * Zero all SVE registers except for the first 128 bits of each vector.
> + *
> + * The caller must ensure that the VL has been configured and the CPU must be
> + * in non-streaming mode.
> + */
> +static inline void sve_flush_live(void)
> +{
> + unsigned long vl = sve_get_vl();
> +
> + if (vl > sizeof(__uint128_t)) {
> + asm volatile(
> + __FPSIMD_PREAMBLE
> + FOR_EACH_Z_REG("n", "mov v\\n\\().16b, v\\n\\().16b")
> + );
> + }
> +
> + asm volatile(
> + __SVE_PREAMBLE
> + FOR_EACH_P_REG("n", "pfalse p\\n\\().b")
> + " wrffr p0.b\n"
> + );
> +}
> +
> extern void sme_save_state(struct sme_state *state, int zt);
> extern void sme_load_state(const struct sme_state *state, int zt);
>
> diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h
> index ebf8b47313e90..9e352b5c6b764 100644
> --- a/arch/arm64/include/asm/fpsimdmacros.h
> +++ b/arch/arm64/include/asm/fpsimdmacros.h
> @@ -40,19 +40,6 @@
> .endif
> .endm
>
> -/* Deprecated macros for SVE instructions */
> -
> -/* WRFFR P\np.B */
> -.macro _sve_wrffr np
> - wrffr p\np\().b
> -.endm
> -
> -/* PFALSE P\np.B */
> -.macro _sve_pfalse np
> - .arch_extension sve
> - pfalse p\np\().b
> -.endm
> -
> /* Deprecated macros for SME instructions */
>
> /* RDSVL X\nx, #\imm */
> @@ -130,22 +117,6 @@
> .purgem _for__body
> .endm
>
> -/* Preserve the first 128-bits of Znz and zero the rest. */
> -.macro _sve_flush_z nz
> - _sve_check_zreg \nz
> - mov v\nz\().16b, v\nz\().16b
> -.endm
> -
> -.macro sve_flush_z
> - _for n, 0, 31, _sve_flush_z \n
> -.endm
> -.macro sve_flush_p
> - _for n, 0, 15, _sve_pfalse \n
> -.endm
> -.macro sve_flush_ffr
> - _sve_wrffr 0
> -.endm
> -
> .macro sme_save_za nxbase, xvl, nw
> mov w\nw, #0
>
> diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
> index cb54335465f66..2352297330e12 100644
> --- a/arch/arm64/kernel/entry-common.c
> +++ b/arch/arm64/kernel/entry-common.c
> @@ -237,12 +237,8 @@ static inline void fpsimd_syscall_enter(void)
> if (!system_supports_sve())
> return;
>
> - if (test_thread_flag(TIF_SVE)) {
> - unsigned int sve_vq_minus_one;
> -
> - sve_vq_minus_one = sve_vq_from_vl(task_get_sve_vl(current)) - 1;
> - sve_flush_live(true, sve_vq_minus_one);
> - }
> + if (test_thread_flag(TIF_SVE))
> + sve_flush_live();
>
> /*
> * Any live non-FPSIMD SVE state has been zeroed. Allow
> diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S
> index 0575d90e6dffb..bff941eea9566 100644
> --- a/arch/arm64/kernel/entry-fpsimd.S
> +++ b/arch/arm64/kernel/entry-fpsimd.S
> @@ -11,28 +11,6 @@
> #include <asm/assembler.h>
> #include <asm/fpsimdmacros.h>
>
> -#ifdef CONFIG_ARM64_SVE
> -
> -/*
> - * Zero all SVE registers but the first 128-bits of each vector
> - *
> - * VQ must already be configured by caller, any further updates of VQ
> - * will need to ensure that the register state remains valid.
> - *
> - * x0 = include FFR?
> - * x1 = VQ - 1
> - */
> -SYM_FUNC_START(sve_flush_live)
> - cbz x1, 1f // A VQ-1 of 0 is 128 bits so no extra Z state
> - sve_flush_z
> -1: sve_flush_p
> - tbz x0, #0, 2f
> - sve_flush_ffr
> -2: ret
> -SYM_FUNC_END(sve_flush_live)
> -
> -#endif /* CONFIG_ARM64_SVE */
> -
> #ifdef CONFIG_ARM64_SME
>
> /*
> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> index f9b3eeacf130d..42177b439b3c7 100644
> --- a/arch/arm64/kernel/fpsimd.c
> +++ b/arch/arm64/kernel/fpsimd.c
> @@ -1338,7 +1338,7 @@ void do_sve_acc(unsigned long esr, struct pt_regs *regs)
> if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
> unsigned long vq = sve_vq_from_vl(task_get_sve_vl(current));
> sysreg_clear_set_s(SYS_ZCR_EL1, ZCR_ELx_LEN, vq - 1);
> - sve_flush_live(true, vq - 1);
> + sve_flush_live();
> fpsimd_bind_task_to_cpu();
> } else {
> fpsimd_to_sve(current);
> -- 2.30.2
>
FWIW,
Reviewed-by: Vladimir Murzin <vladimir.murzin at arm.com>
More information about the linux-arm-kernel
mailing list