[PATCH 16/18] arm64: fpsimd: Move sve_flush_live() inline

Vladimir Murzin vladimir.murzin at arm.com
Thu May 28 03:49:25 PDT 2026


On 5/21/26 14:25, Mark Rutland wrote:
> Currently sve_flush_live() is written in out-of-line assembly. It would
> be nice if we could move it inline such that control flow can be written
> more clearly in C, and to permit the removal of otherwise unused
> assembly macros.
> 
> The 'flush_ffr' argument is redundant as sve_flush_live() is always
> called from non-streaming mode, and all callers pass 'true'. Remove the
> argument and make it a requirement that the function is called from
> non-streaming mode.
> 
> The 'vq_minus_1' argument is unnecessary, as sve_flush_live() can read
> the live VL directly using the RDVL instruction (wrapped by the
> sve_get_vl() helper function).
> 
> Move the function to C, with the simplifications above.
> 
> Signed-off-by: Mark Rutland <mark.rutland at arm.com>
> Cc: Catalin Marinas <catalin.marinas at arm.com>
> Cc: Fuad Tabba <tabba at google.com>
> Cc: James Morse <james.morse at arm.com>
> Cc: Marc Zyngier <maz at kernel.org>
> Cc: Mark Brown <broonie at kernel.org>
> Cc: Oliver Upton <oupton at kernel.org>
> Cc: Will Deacon <will at kernel.org>
> ---
>  arch/arm64/include/asm/fpsimd.h       | 26 +++++++++++++++++++++++-
>  arch/arm64/include/asm/fpsimdmacros.h | 29 ---------------------------
>  arch/arm64/kernel/entry-common.c      |  8 ++------
>  arch/arm64/kernel/entry-fpsimd.S      | 22 --------------------
>  arch/arm64/kernel/fpsimd.c            |  2 +-
>  5 files changed, 28 insertions(+), 59 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
> index d005324bbcf3e..550987b36206a 100644
> --- a/arch/arm64/include/asm/fpsimd.h
> +++ b/arch/arm64/include/asm/fpsimd.h
> @@ -332,7 +332,31 @@ static inline void sve_load_state(const struct sve_state *state, bool ffr)
>  	__sve_load_p(state, vl, ffr);
>  }
>  
> -extern void sve_flush_live(bool flush_ffr, unsigned long vq_minus_1);
> +
> +/*
> + * Zero all SVE registers except for the first 128 bits of each vector.
> + *
> + * The caller must ensure that the VL has been configured and the CPU must be
> + * in non-streaming mode.
> + */
> +static inline void sve_flush_live(void)
> +{
> +	unsigned long vl = sve_get_vl();
> +
> +	if (vl > sizeof(__uint128_t)) {
> +		asm volatile(
> +		__FPSIMD_PREAMBLE
> +		FOR_EACH_Z_REG("n", "mov	v\\n\\().16b, v\\n\\().16b")
> +		);
> +	}
> +
> +	asm volatile(
> +	__SVE_PREAMBLE
> +	FOR_EACH_P_REG("n", "pfalse	p\\n\\().b")
> +	"	wrffr	p0.b\n"
> +	);
> +}
> +
>  extern void sme_save_state(struct sme_state *state, int zt);
>  extern void sme_load_state(const struct sme_state *state, int zt);
>  
> diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h
> index ebf8b47313e90..9e352b5c6b764 100644
> --- a/arch/arm64/include/asm/fpsimdmacros.h
> +++ b/arch/arm64/include/asm/fpsimdmacros.h
> @@ -40,19 +40,6 @@
>  	.endif
>  .endm
>  
> -/* Deprecated macros for SVE instructions */
> -
> -/* WRFFR P\np.B */
> -.macro _sve_wrffr np
> -	wrffr p\np\().b
> -.endm
> -
> -/* PFALSE P\np.B */
> -.macro _sve_pfalse np
> -	.arch_extension sve
> -	pfalse	p\np\().b
> -.endm
> -
>  /* Deprecated macros for SME instructions */
>  
>  /* RDSVL X\nx, #\imm */
> @@ -130,22 +117,6 @@
>  	.purgem _for__body
>  .endm
>  
> -/* Preserve the first 128-bits of Znz and zero the rest. */
> -.macro _sve_flush_z nz
> -	_sve_check_zreg \nz
> -	mov	v\nz\().16b, v\nz\().16b
> -.endm
> -
> -.macro sve_flush_z
> - _for n, 0, 31, _sve_flush_z	\n
> -.endm
> -.macro sve_flush_p
> - _for n, 0, 15, _sve_pfalse	\n
> -.endm
> -.macro sve_flush_ffr
> -		_sve_wrffr	0
> -.endm
> -
>  .macro sme_save_za nxbase, xvl, nw
>  	mov	w\nw, #0
>  
> diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
> index cb54335465f66..2352297330e12 100644
> --- a/arch/arm64/kernel/entry-common.c
> +++ b/arch/arm64/kernel/entry-common.c
> @@ -237,12 +237,8 @@ static inline void fpsimd_syscall_enter(void)
>  	if (!system_supports_sve())
>  		return;
>  
> -	if (test_thread_flag(TIF_SVE)) {
> -		unsigned int sve_vq_minus_one;
> -
> -		sve_vq_minus_one = sve_vq_from_vl(task_get_sve_vl(current)) - 1;
> -		sve_flush_live(true, sve_vq_minus_one);
> -	}
> +	if (test_thread_flag(TIF_SVE))
> +		sve_flush_live();
>  
>  	/*
>  	 * Any live non-FPSIMD SVE state has been zeroed. Allow
> diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S
> index 0575d90e6dffb..bff941eea9566 100644
> --- a/arch/arm64/kernel/entry-fpsimd.S
> +++ b/arch/arm64/kernel/entry-fpsimd.S
> @@ -11,28 +11,6 @@
>  #include <asm/assembler.h>
>  #include <asm/fpsimdmacros.h>
>  
> -#ifdef CONFIG_ARM64_SVE
> -
> -/*
> - * Zero all SVE registers but the first 128-bits of each vector
> - *
> - * VQ must already be configured by caller, any further updates of VQ
> - * will need to ensure that the register state remains valid.
> - *
> - * x0 = include FFR?
> - * x1 = VQ - 1
> - */
> -SYM_FUNC_START(sve_flush_live)
> -	cbz		x1, 1f	// A VQ-1 of 0 is 128 bits so no extra Z state
> -	sve_flush_z
> -1:	sve_flush_p
> -	tbz		x0, #0, 2f
> -	sve_flush_ffr
> -2:	ret
> -SYM_FUNC_END(sve_flush_live)
> -
> -#endif /* CONFIG_ARM64_SVE */
> -
>  #ifdef CONFIG_ARM64_SME
>  
>  /*
> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> index f9b3eeacf130d..42177b439b3c7 100644
> --- a/arch/arm64/kernel/fpsimd.c
> +++ b/arch/arm64/kernel/fpsimd.c
> @@ -1338,7 +1338,7 @@ void do_sve_acc(unsigned long esr, struct pt_regs *regs)
>  	if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
>  		unsigned long vq = sve_vq_from_vl(task_get_sve_vl(current));
>  		sysreg_clear_set_s(SYS_ZCR_EL1, ZCR_ELx_LEN, vq - 1);
> -		sve_flush_live(true, vq - 1);
> +		sve_flush_live();
>  		fpsimd_bind_task_to_cpu();
>  	} else {
>  		fpsimd_to_sve(current);
> -- 2.30.2
> 

FWIW,

Reviewed-by: Vladimir Murzin <vladimir.murzin at arm.com>




More information about the linux-arm-kernel mailing list