[PATCH 15/18] arm64: fpsimd: Move SVE save/restore inline

Vladimir Murzin vladimir.murzin at arm.com
Thu May 28 03:39:32 PDT 2026


On 5/21/26 14:25, Mark Rutland wrote:
> Currently the SVE register save/restore sequences are written in
> out-of-line assembly routines. While this works, it's somewhat painful:
> 
> * As KVM needs to be able to use the sequences in hyp code, separate
>   assembly files are used for the regular kernel and KVM code. While the
>   common logic is shared in assembly macros, this still requires some
>   duplication, and has lead to some trivial divergence.
> 
> * As the SVE LDR/STR instrucitons have limited addressing modes, the
>   assembly macros use an awkward pattern requiring negative offsets.
>   This could be written more clearly with addresses being generated in C
>   code.
> 
> * As the FFR does not always exist in streaming mode, some awkward
>   conditional branching has been written in assembly which could be
>   clearer in C (and would permit the compiler to optimize out
>   unnecessary branches in some cases).
> 
> * For historical reasons, the assembly macros take some register
>   arguments as numerical indices (e.g. "sve_save 0, x1" uses x0 and x1),
>   which is simply confusing.
> 
> * For historical reasons, the SVE save/restore code and FPSIMD
>   save/restore code have a distinct sequences for FPSR and FPCR. Ideally
>   this logic would be shared.
> 
> * The assembly sequences can't be instrumented, and so it's harder than
>   necessary to catch memory safety issues.
> 
> To handle the above, move the SVE register save/restore sequences
> to inline assembly.
> 
> Neither GCC nor LLVM instrument memory arguments to inline assembly, so
> explicit instrumentation is added in the same manner as other assembly
> routines. This instrumentation is implicitly disabled by Kbuild for nVHE
> hyp code.
> 
> Signed-off-by: Mark Rutland <mark.rutland at arm.com>
> Cc: Catalin Marinas <catalin.marinas at arm.com>
> Cc: Fuad Tabba <tabba at google.com>
> Cc: James Morse <james.morse at arm.com>
> Cc: Marc Zyngier <maz at kernel.org>
> Cc: Mark Brown <broonie at kernel.org>
> Cc: Oliver Upton <oupton at kernel.org>
> Cc: Will Deacon <will at kernel.org>
> ---
>  arch/arm64/include/asm/fpsimd.h         | 119 +++++++++++++++++++++++-
>  arch/arm64/include/asm/fpsimdmacros.h   |  61 ------------
>  arch/arm64/include/asm/kvm_hyp.h        |   3 -
>  arch/arm64/kernel/entry-fpsimd.S        |  22 -----
>  arch/arm64/kvm/hyp/fpsimd.S             |  21 -----
>  arch/arm64/kvm/hyp/include/hyp/switch.h |   4 +-
>  arch/arm64/kvm/hyp/nvhe/Makefile        |   2 +-
>  arch/arm64/kvm/hyp/nvhe/hyp-main.c      |   4 +-
>  arch/arm64/kvm/hyp/vhe/Makefile         |   2 +-
>  9 files changed, 123 insertions(+), 115 deletions(-)
>  delete mode 100644 arch/arm64/kvm/hyp/fpsimd.S
> 
> diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
> index 560814acc60c0..d005324bbcf3e 100644
> --- a/arch/arm64/include/asm/fpsimd.h
> +++ b/arch/arm64/include/asm/fpsimd.h
> @@ -215,8 +215,123 @@ static inline unsigned int sve_get_vl(void)
>  	return vl;
>  }
>  
> -extern void sve_save_state(struct sve_state *state, int save_ffr);
> -extern void sve_load_state(const struct sve_state *state, int restore_ffr);
> +#define FOR_EACH_Z_REG(idx_str, asm_str)											\
> +	"	.irp " idx_str ",0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31\n"	\
> +	asm_str	"\n"														\
> +	"	.endr\n"
> +
> +#define FOR_EACH_P_REG(idx_str, asm_str)											\
> +	"	.irp " idx_str ",0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15\n"	\
> +	asm_str	"\n"								\
> +	"	.endr\n"
> +
> +static inline void __sve_save_z(struct sve_state *state, unsigned long vl)
> +{
> +	instrument_write(state, SVE_NUM_ZREGS * vl);
> +	asm volatile(
> +	__SVE_PREAMBLE
> +	FOR_EACH_Z_REG("n", "str	z\\n, [%[zregs], #\\n, MUL VL]")
> +	:
> +	: [zregs] "r" (state)
> +	: "memory"
> +	);
> +}
> +
> +static inline void __sve_load_z(const struct sve_state *state, unsigned long vl)
> +{
> +	instrument_read(state, SVE_NUM_ZREGS * vl);
> +	asm volatile(
> +	__SVE_PREAMBLE
> +	FOR_EACH_Z_REG("n", "ldr	z\\n, [%[zregs], #\\n, MUL VL]")
> +	:
> +	: [zregs] "r" (state)
> +	: "memory"
> +	);
> +}
> +
> +static inline void __sve_save_p(struct sve_state *state, unsigned long vl, bool ffr)
> +{
> +	void *pregs = (void *)state + SVE_NUM_ZREGS * vl;
> +	unsigned long pl = vl / 8;
> +	void *pffr = pregs + SVE_NUM_PREGS * pl;
> +
> +	instrument_write(pregs, SVE_NUM_PREGS * pl);
> +	asm volatile(
> +	__SVE_PREAMBLE
> +	FOR_EACH_P_REG("n", "str	p\\n, [%[pregs], #\\n, MUL VL]\n")
> +	:
> +	: [pregs] "r" (pregs)
> +	: "memory"
> +	);
> +
> +	instrument_write(pffr, pl);
> +	if (ffr) {
> +		asm volatile(
> +		__SVE_PREAMBLE
> +		"	rdffr	p0.b\n"
> +		"	str	p0, [%[pffr]]\n"
> +		"	ldr	p0, [%[pregs]]\n"
> +		:
> +		: [pregs] "r" (pregs),
> +		  [pffr] "r" (pffr)
> +		: "memory"
> +		);
> +	} else {
> +		asm volatile(
> +		__SVE_PREAMBLE
> +		"	pfalse	p0.b\n"
> +		"	str	p0, [%[pffr]]\n"
> +		"	ldr	p0, [%[pregs]]\n"
> +		:
> +		: [pregs] "r" (pregs),
> +		  [pffr] "r" (pffr)
> +		: "memory"
> +		);
> +	}
> +}
> +
> +static inline void __sve_load_p(const struct sve_state *state, unsigned long vl, bool ffr)
> +{
> +	const void *pregs = (const void *)state + SVE_NUM_ZREGS * vl;
> +	unsigned long pl = vl / 8;
> +	const void *pffr = pregs + SVE_NUM_PREGS * pl;
> +
> +	if (ffr) {
> +		instrument_read(pffr, pl);
> +		asm volatile(
> +		__SVE_PREAMBLE
> +		"	ldr	p0, [%[pffr]]\n"
> +		"	wrffr	p0.b\n"
> +		:
> +		: [pffr] "r" (pffr)
> +		: "memory"
> +		);
> +	}
> +
> +	instrument_read(pregs, SVE_NUM_PREGS * pl);
> +	asm volatile(
> +	__SVE_PREAMBLE
> +	FOR_EACH_P_REG("n", "ldr	p\\n, [%[pregs], #\\n, MUL VL]\n")
> +	:
> +	: [pregs] "r" (pregs)
> +	: "memory"
> +	);
> +}
> +
> +static inline void sve_save_state(struct sve_state *state, bool ffr)
> +{
> +	unsigned long vl = sve_get_vl();
> +	__sve_save_z(state, vl);
> +	__sve_save_p(state, vl, ffr);
> +}
> +
> +static inline void sve_load_state(const struct sve_state *state, bool ffr)
> +{
> +	unsigned long vl = sve_get_vl();
> +	__sve_load_z(state, vl);
> +	__sve_load_p(state, vl, ffr);
> +}
> +
>  extern void sve_flush_live(bool flush_ffr, unsigned long vq_minus_1);
>  extern void sme_save_state(struct sme_state *state, int zt);
>  extern void sme_load_state(const struct sme_state *state, int zt);
> diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h
> index 08f4863e67715..ebf8b47313e90 100644
> --- a/arch/arm64/include/asm/fpsimdmacros.h
> +++ b/arch/arm64/include/asm/fpsimdmacros.h
> @@ -42,36 +42,6 @@
>  
>  /* Deprecated macros for SVE instructions */
>  
> -/* STR (vector): STR Z\nz, [X\nxbase, #\offset, MUL VL] */
> -.macro _sve_str_v nz, nxbase, offset=0
> -	.arch_extension sve
> -	str	z\nz, [X\nxbase, #\offset, MUL VL]
> -.endm
> -
> -/* LDR (vector): LDR Z\nz, [X\nxbase, #\offset, MUL VL] */
> -.macro _sve_ldr_v nz, nxbase, offset=0
> -	.arch_extension sve
> -	ldr	z\nz, [X\nxbase, #\offset, MUL VL]
> -.endm
> -
> -/* STR (predicate): STR P\np, [X\nxbase, #\offset, MUL VL] */
> -.macro _sve_str_p np, nxbase, offset=0
> -	.arch_extension sve
> -	str	p\np, [X\nxbase, #\offset, MUL VL]
> -.endm
> -
> -/* LDR (predicate): LDR P\np, [X\nxbase, #\offset, MUL VL] */
> -.macro _sve_ldr_p np, nxbase, offset=0
> -	.arch_extension sve
> -	ldr p\np, [x\nxbase, #\offset, MUL VL]
> -.endm
> -
> -/* RDFFR (unpredicated): RDFFR P\np.B */
> -.macro _sve_rdffr np
> -	.arch_extension sve
> -	rdffr p\np\().b
> -.endm
> -
>  /* WRFFR P\np.B */
>  .macro _sve_wrffr np
>  	wrffr p\np\().b
> @@ -176,37 +146,6 @@
>  		_sve_wrffr	0
>  .endm
>  
> -.macro _sve_pffr ptr
> -	.arch_extension sve
> -	addvl	\ptr, \ptr, #16
> -	addvl	\ptr, \ptr, #16
> -	addpl	\ptr, \ptr, #16
> -.endm
> -
> -.macro sve_save nxbase, save_ffr
> -		_sve_pffr	x\nxbase
> - _for n, 0, 31,	_sve_str_v	\n, \nxbase, \n - 34
> - _for n, 0, 15,	_sve_str_p	\n, \nxbase, \n - 16
> -		cbz		\save_ffr, 921f
> -		_sve_rdffr	0
> -		b		922f
> -921:
> -		_sve_pfalse	0			// Zero out FFR
> -922:
> -		_sve_str_p	0, \nxbase
> -		_sve_ldr_p	0, \nxbase, -16
> -.endm
> -
> -.macro sve_load nxbase, restore_ffr
> -		_sve_pffr	x\nxbase
> - _for n, 0, 31,	_sve_ldr_v	\n, \nxbase, \n - 34
> -		cbz		\restore_ffr, 921f
> -		_sve_ldr_p	0, \nxbase
> -		_sve_wrffr	0
> -921:
> - _for n, 0, 15,	_sve_ldr_p	\n, \nxbase, \n - 16
> -.endm
> -
>  .macro sme_save_za nxbase, xvl, nw
>  	mov	w\nw, #0
>  
> diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
> index 38356eee592ad..ad19de1d0654f 100644
> --- a/arch/arm64/include/asm/kvm_hyp.h
> +++ b/arch/arm64/include/asm/kvm_hyp.h
> @@ -121,9 +121,6 @@ void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu);
>  void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu);
>  #endif
>  
> -void __sve_save_state(struct sve_state *sve, int save_ffr);
> -void __sve_restore_state(struct sve_state *sve, int restore_ffr);
> -
>  u64 __guest_enter(struct kvm_vcpu *vcpu);
>  
>  bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt, u32 func_id);
> diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S
> index 4fa00c94f28b7..0575d90e6dffb 100644
> --- a/arch/arm64/kernel/entry-fpsimd.S
> +++ b/arch/arm64/kernel/entry-fpsimd.S
> @@ -13,28 +13,6 @@
>  
>  #ifdef CONFIG_ARM64_SVE
>  
> -/*
> - * Save the SVE state
> - *
> - * x0 - pointer to buffer for state
> - * x1 - Save FFR if non-zero
> - */
> -SYM_FUNC_START(sve_save_state)
> -	sve_save 0, x1
> -	ret
> -SYM_FUNC_END(sve_save_state)
> -
> -/*
> - * Load the SVE state
> - *
> - * x0 - pointer to buffer for state
> - * x1 - Restore FFR if non-zero
> - */
> -SYM_FUNC_START(sve_load_state)
> -	sve_load 0, x1
> -	ret
> -SYM_FUNC_END(sve_load_state)
> -
>  /*
>   * Zero all SVE registers but the first 128-bits of each vector
>   *
> diff --git a/arch/arm64/kvm/hyp/fpsimd.S b/arch/arm64/kvm/hyp/fpsimd.S
> deleted file mode 100644
> index beacec33b2541..0000000000000
> --- a/arch/arm64/kvm/hyp/fpsimd.S
> +++ /dev/null
> @@ -1,21 +0,0 @@
> -/* SPDX-License-Identifier: GPL-2.0-only */
> -/*
> - * Copyright (C) 2015 - ARM Ltd
> - * Author: Marc Zyngier <marc.zyngier at arm.com>
> - */
> -
> -#include <linux/linkage.h>
> -
> -#include <asm/fpsimdmacros.h>
> -
> -	.text
> -
> -SYM_FUNC_START(__sve_restore_state)
> -	sve_load 0, x1
> -	ret
> -SYM_FUNC_END(__sve_restore_state)
> -
> -SYM_FUNC_START(__sve_save_state)
> -	sve_save 0, x1
> -	ret
> -SYM_FUNC_END(__sve_save_state)
> diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
> index 72e658255cda7..41c60c9eea423 100644
> --- a/arch/arm64/kvm/hyp/include/hyp/switch.h
> +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
> @@ -467,7 +467,7 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu)
>  	 * vCPU. Start off with the max VL so we can load the SVE state.
>  	 */
>  	sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2);
> -	__sve_restore_state(kern_hyp_va(vcpu->arch.sve_state), true);
> +	sve_load_state(kern_hyp_va(vcpu->arch.sve_state), true);
>  	fpsimd_load_common(&vcpu->arch.ctxt.fp_regs);
>  
>  	/*
> @@ -488,7 +488,7 @@ static inline void __hyp_sve_save_host(void)
>  
>  	ctxt_sys_reg(hctxt, ZCR_EL1) = read_sysreg_el1(SYS_ZCR);
>  	write_sysreg_s(sve_vq_from_vl(kvm_host_sve_max_vl) - 1, SYS_ZCR_EL2);
> -	__sve_save_state(sve_regs, true);
> +	sve_save_state(sve_regs, true);
>  	fpsimd_save_common(&hctxt->fp_regs);
>  }
>  
> diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
> index 62cdfbff75625..f57450ebcb498 100644
> --- a/arch/arm64/kvm/hyp/nvhe/Makefile
> +++ b/arch/arm64/kvm/hyp/nvhe/Makefile
> @@ -26,7 +26,7 @@ hyp-obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o
>  	 hyp-main.o hyp-smp.o psci-relay.o early_alloc.o page_alloc.o \
>  	 cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o stacktrace.o ffa.o
>  hyp-obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
> -	 ../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o ../vgic-v5-sr.o
> +	 ../hyp-entry.o ../exception.o ../pgtable.o ../vgic-v5-sr.o
>  hyp-obj-y += ../../../kernel/smccc-call.o
>  hyp-obj-$(CONFIG_LIST_HARDENED) += list_debug.o
>  hyp-obj-$(CONFIG_NVHE_EL2_TRACING) += clock.o trace.o events.o
> diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> index 72d025b2178a7..5c43943f24380 100644
> --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> @@ -35,7 +35,7 @@ static void __hyp_sve_save_guest(struct kvm_vcpu *vcpu)
>  	 * on the VL, so use a consistent (i.e., the maximum) guest VL.
>  	 */
>  	sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2);
> -	__sve_save_state(kern_hyp_va(vcpu->arch.sve_state), true);
> +	sve_save_state(kern_hyp_va(vcpu->arch.sve_state), true);
>  	fpsimd_save_common(&vcpu->arch.ctxt.fp_regs);
>  	write_sysreg_s(sve_vq_from_vl(kvm_host_sve_max_vl) - 1, SYS_ZCR_EL2);
>  }
> @@ -55,7 +55,7 @@ static void __hyp_sve_restore_host(void)
>  	 * need to be revisited.
>  	 */
>  	write_sysreg_s(sve_vq_from_vl(kvm_host_sve_max_vl) - 1, SYS_ZCR_EL2);
> -	__sve_restore_state(sve_regs, true);
> +	sve_load_state(sve_regs, true);
>  	fpsimd_load_common(&hctxt->fp_regs);
>  	write_sysreg_el1(ctxt_sys_reg(hctxt, ZCR_EL1), SYS_ZCR);
>  }
> diff --git a/arch/arm64/kvm/hyp/vhe/Makefile b/arch/arm64/kvm/hyp/vhe/Makefile
> index 9695328bbd96e..d6b3475145c0e 100644
> --- a/arch/arm64/kvm/hyp/vhe/Makefile
> +++ b/arch/arm64/kvm/hyp/vhe/Makefile
> @@ -10,4 +10,4 @@ CFLAGS_switch.o += -Wno-override-init
>  
>  obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o
>  obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
> -	 ../fpsimd.o ../hyp-entry.o ../exception.o ../vgic-v5-sr.o
> +	 ../hyp-entry.o ../exception.o ../vgic-v5-sr.o
> -- 2.30.2
> 

It seems we have to remember that the memory layout and pointer
arithmetic must stay in sync with asm/sigcontext.h. Hopefully,
these do not change often.

FWIW,

Reviewed-by: Vladimir Murzin <vladimir.murzin at arm.com>




More information about the linux-arm-kernel mailing list