[PATCH 15/18] arm64: fpsimd: Move SVE save/restore inline

Mark Rutland mark.rutland at arm.com
Thu May 21 06:25:53 PDT 2026


Currently the SVE register save/restore sequences are written in
out-of-line assembly routines. While this works, it's somewhat painful:

* As KVM needs to be able to use the sequences in hyp code, separate
  assembly files are used for the regular kernel and KVM code. While the
  common logic is shared in assembly macros, this still requires some
  duplication, and has lead to some trivial divergence.

* As the SVE LDR/STR instrucitons have limited addressing modes, the
  assembly macros use an awkward pattern requiring negative offsets.
  This could be written more clearly with addresses being generated in C
  code.

* As the FFR does not always exist in streaming mode, some awkward
  conditional branching has been written in assembly which could be
  clearer in C (and would permit the compiler to optimize out
  unnecessary branches in some cases).

* For historical reasons, the assembly macros take some register
  arguments as numerical indices (e.g. "sve_save 0, x1" uses x0 and x1),
  which is simply confusing.

* For historical reasons, the SVE save/restore code and FPSIMD
  save/restore code have a distinct sequences for FPSR and FPCR. Ideally
  this logic would be shared.

* The assembly sequences can't be instrumented, and so it's harder than
  necessary to catch memory safety issues.

To handle the above, move the SVE register save/restore sequences
to inline assembly.

Neither GCC nor LLVM instrument memory arguments to inline assembly, so
explicit instrumentation is added in the same manner as other assembly
routines. This instrumentation is implicitly disabled by Kbuild for nVHE
hyp code.

Signed-off-by: Mark Rutland <mark.rutland at arm.com>
Cc: Catalin Marinas <catalin.marinas at arm.com>
Cc: Fuad Tabba <tabba at google.com>
Cc: James Morse <james.morse at arm.com>
Cc: Marc Zyngier <maz at kernel.org>
Cc: Mark Brown <broonie at kernel.org>
Cc: Oliver Upton <oupton at kernel.org>
Cc: Will Deacon <will at kernel.org>
---
 arch/arm64/include/asm/fpsimd.h         | 119 +++++++++++++++++++++++-
 arch/arm64/include/asm/fpsimdmacros.h   |  61 ------------
 arch/arm64/include/asm/kvm_hyp.h        |   3 -
 arch/arm64/kernel/entry-fpsimd.S        |  22 -----
 arch/arm64/kvm/hyp/fpsimd.S             |  21 -----
 arch/arm64/kvm/hyp/include/hyp/switch.h |   4 +-
 arch/arm64/kvm/hyp/nvhe/Makefile        |   2 +-
 arch/arm64/kvm/hyp/nvhe/hyp-main.c      |   4 +-
 arch/arm64/kvm/hyp/vhe/Makefile         |   2 +-
 9 files changed, 123 insertions(+), 115 deletions(-)
 delete mode 100644 arch/arm64/kvm/hyp/fpsimd.S

diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index 560814acc60c0..d005324bbcf3e 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -215,8 +215,123 @@ static inline unsigned int sve_get_vl(void)
 	return vl;
 }
 
-extern void sve_save_state(struct sve_state *state, int save_ffr);
-extern void sve_load_state(const struct sve_state *state, int restore_ffr);
+#define FOR_EACH_Z_REG(idx_str, asm_str)											\
+	"	.irp " idx_str ",0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31\n"	\
+	asm_str	"\n"														\
+	"	.endr\n"
+
+#define FOR_EACH_P_REG(idx_str, asm_str)											\
+	"	.irp " idx_str ",0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15\n"	\
+	asm_str	"\n"								\
+	"	.endr\n"
+
+static inline void __sve_save_z(struct sve_state *state, unsigned long vl)
+{
+	instrument_write(state, SVE_NUM_ZREGS * vl);
+	asm volatile(
+	__SVE_PREAMBLE
+	FOR_EACH_Z_REG("n", "str	z\\n, [%[zregs], #\\n, MUL VL]")
+	:
+	: [zregs] "r" (state)
+	: "memory"
+	);
+}
+
+static inline void __sve_load_z(const struct sve_state *state, unsigned long vl)
+{
+	instrument_read(state, SVE_NUM_ZREGS * vl);
+	asm volatile(
+	__SVE_PREAMBLE
+	FOR_EACH_Z_REG("n", "ldr	z\\n, [%[zregs], #\\n, MUL VL]")
+	:
+	: [zregs] "r" (state)
+	: "memory"
+	);
+}
+
+static inline void __sve_save_p(struct sve_state *state, unsigned long vl, bool ffr)
+{
+	void *pregs = (void *)state + SVE_NUM_ZREGS * vl;
+	unsigned long pl = vl / 8;
+	void *pffr = pregs + SVE_NUM_PREGS * pl;
+
+	instrument_write(pregs, SVE_NUM_PREGS * pl);
+	asm volatile(
+	__SVE_PREAMBLE
+	FOR_EACH_P_REG("n", "str	p\\n, [%[pregs], #\\n, MUL VL]\n")
+	:
+	: [pregs] "r" (pregs)
+	: "memory"
+	);
+
+	instrument_write(pffr, pl);
+	if (ffr) {
+		asm volatile(
+		__SVE_PREAMBLE
+		"	rdffr	p0.b\n"
+		"	str	p0, [%[pffr]]\n"
+		"	ldr	p0, [%[pregs]]\n"
+		:
+		: [pregs] "r" (pregs),
+		  [pffr] "r" (pffr)
+		: "memory"
+		);
+	} else {
+		asm volatile(
+		__SVE_PREAMBLE
+		"	pfalse	p0.b\n"
+		"	str	p0, [%[pffr]]\n"
+		"	ldr	p0, [%[pregs]]\n"
+		:
+		: [pregs] "r" (pregs),
+		  [pffr] "r" (pffr)
+		: "memory"
+		);
+	}
+}
+
+static inline void __sve_load_p(const struct sve_state *state, unsigned long vl, bool ffr)
+{
+	const void *pregs = (const void *)state + SVE_NUM_ZREGS * vl;
+	unsigned long pl = vl / 8;
+	const void *pffr = pregs + SVE_NUM_PREGS * pl;
+
+	if (ffr) {
+		instrument_read(pffr, pl);
+		asm volatile(
+		__SVE_PREAMBLE
+		"	ldr	p0, [%[pffr]]\n"
+		"	wrffr	p0.b\n"
+		:
+		: [pffr] "r" (pffr)
+		: "memory"
+		);
+	}
+
+	instrument_read(pregs, SVE_NUM_PREGS * pl);
+	asm volatile(
+	__SVE_PREAMBLE
+	FOR_EACH_P_REG("n", "ldr	p\\n, [%[pregs], #\\n, MUL VL]\n")
+	:
+	: [pregs] "r" (pregs)
+	: "memory"
+	);
+}
+
+static inline void sve_save_state(struct sve_state *state, bool ffr)
+{
+	unsigned long vl = sve_get_vl();
+	__sve_save_z(state, vl);
+	__sve_save_p(state, vl, ffr);
+}
+
+static inline void sve_load_state(const struct sve_state *state, bool ffr)
+{
+	unsigned long vl = sve_get_vl();
+	__sve_load_z(state, vl);
+	__sve_load_p(state, vl, ffr);
+}
+
 extern void sve_flush_live(bool flush_ffr, unsigned long vq_minus_1);
 extern void sme_save_state(struct sme_state *state, int zt);
 extern void sme_load_state(const struct sme_state *state, int zt);
diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h
index 08f4863e67715..ebf8b47313e90 100644
--- a/arch/arm64/include/asm/fpsimdmacros.h
+++ b/arch/arm64/include/asm/fpsimdmacros.h
@@ -42,36 +42,6 @@
 
 /* Deprecated macros for SVE instructions */
 
-/* STR (vector): STR Z\nz, [X\nxbase, #\offset, MUL VL] */
-.macro _sve_str_v nz, nxbase, offset=0
-	.arch_extension sve
-	str	z\nz, [X\nxbase, #\offset, MUL VL]
-.endm
-
-/* LDR (vector): LDR Z\nz, [X\nxbase, #\offset, MUL VL] */
-.macro _sve_ldr_v nz, nxbase, offset=0
-	.arch_extension sve
-	ldr	z\nz, [X\nxbase, #\offset, MUL VL]
-.endm
-
-/* STR (predicate): STR P\np, [X\nxbase, #\offset, MUL VL] */
-.macro _sve_str_p np, nxbase, offset=0
-	.arch_extension sve
-	str	p\np, [X\nxbase, #\offset, MUL VL]
-.endm
-
-/* LDR (predicate): LDR P\np, [X\nxbase, #\offset, MUL VL] */
-.macro _sve_ldr_p np, nxbase, offset=0
-	.arch_extension sve
-	ldr p\np, [x\nxbase, #\offset, MUL VL]
-.endm
-
-/* RDFFR (unpredicated): RDFFR P\np.B */
-.macro _sve_rdffr np
-	.arch_extension sve
-	rdffr p\np\().b
-.endm
-
 /* WRFFR P\np.B */
 .macro _sve_wrffr np
 	wrffr p\np\().b
@@ -176,37 +146,6 @@
 		_sve_wrffr	0
 .endm
 
-.macro _sve_pffr ptr
-	.arch_extension sve
-	addvl	\ptr, \ptr, #16
-	addvl	\ptr, \ptr, #16
-	addpl	\ptr, \ptr, #16
-.endm
-
-.macro sve_save nxbase, save_ffr
-		_sve_pffr	x\nxbase
- _for n, 0, 31,	_sve_str_v	\n, \nxbase, \n - 34
- _for n, 0, 15,	_sve_str_p	\n, \nxbase, \n - 16
-		cbz		\save_ffr, 921f
-		_sve_rdffr	0
-		b		922f
-921:
-		_sve_pfalse	0			// Zero out FFR
-922:
-		_sve_str_p	0, \nxbase
-		_sve_ldr_p	0, \nxbase, -16
-.endm
-
-.macro sve_load nxbase, restore_ffr
-		_sve_pffr	x\nxbase
- _for n, 0, 31,	_sve_ldr_v	\n, \nxbase, \n - 34
-		cbz		\restore_ffr, 921f
-		_sve_ldr_p	0, \nxbase
-		_sve_wrffr	0
-921:
- _for n, 0, 15,	_sve_ldr_p	\n, \nxbase, \n - 16
-.endm
-
 .macro sme_save_za nxbase, xvl, nw
 	mov	w\nw, #0
 
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index 38356eee592ad..ad19de1d0654f 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -121,9 +121,6 @@ void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu);
 void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu);
 #endif
 
-void __sve_save_state(struct sve_state *sve, int save_ffr);
-void __sve_restore_state(struct sve_state *sve, int restore_ffr);
-
 u64 __guest_enter(struct kvm_vcpu *vcpu);
 
 bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt, u32 func_id);
diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S
index 4fa00c94f28b7..0575d90e6dffb 100644
--- a/arch/arm64/kernel/entry-fpsimd.S
+++ b/arch/arm64/kernel/entry-fpsimd.S
@@ -13,28 +13,6 @@
 
 #ifdef CONFIG_ARM64_SVE
 
-/*
- * Save the SVE state
- *
- * x0 - pointer to buffer for state
- * x1 - Save FFR if non-zero
- */
-SYM_FUNC_START(sve_save_state)
-	sve_save 0, x1
-	ret
-SYM_FUNC_END(sve_save_state)
-
-/*
- * Load the SVE state
- *
- * x0 - pointer to buffer for state
- * x1 - Restore FFR if non-zero
- */
-SYM_FUNC_START(sve_load_state)
-	sve_load 0, x1
-	ret
-SYM_FUNC_END(sve_load_state)
-
 /*
  * Zero all SVE registers but the first 128-bits of each vector
  *
diff --git a/arch/arm64/kvm/hyp/fpsimd.S b/arch/arm64/kvm/hyp/fpsimd.S
deleted file mode 100644
index beacec33b2541..0000000000000
--- a/arch/arm64/kvm/hyp/fpsimd.S
+++ /dev/null
@@ -1,21 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2015 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier at arm.com>
- */
-
-#include <linux/linkage.h>
-
-#include <asm/fpsimdmacros.h>
-
-	.text
-
-SYM_FUNC_START(__sve_restore_state)
-	sve_load 0, x1
-	ret
-SYM_FUNC_END(__sve_restore_state)
-
-SYM_FUNC_START(__sve_save_state)
-	sve_save 0, x1
-	ret
-SYM_FUNC_END(__sve_save_state)
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 72e658255cda7..41c60c9eea423 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -467,7 +467,7 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu)
 	 * vCPU. Start off with the max VL so we can load the SVE state.
 	 */
 	sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2);
-	__sve_restore_state(kern_hyp_va(vcpu->arch.sve_state), true);
+	sve_load_state(kern_hyp_va(vcpu->arch.sve_state), true);
 	fpsimd_load_common(&vcpu->arch.ctxt.fp_regs);
 
 	/*
@@ -488,7 +488,7 @@ static inline void __hyp_sve_save_host(void)
 
 	ctxt_sys_reg(hctxt, ZCR_EL1) = read_sysreg_el1(SYS_ZCR);
 	write_sysreg_s(sve_vq_from_vl(kvm_host_sve_max_vl) - 1, SYS_ZCR_EL2);
-	__sve_save_state(sve_regs, true);
+	sve_save_state(sve_regs, true);
 	fpsimd_save_common(&hctxt->fp_regs);
 }
 
diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
index 62cdfbff75625..f57450ebcb498 100644
--- a/arch/arm64/kvm/hyp/nvhe/Makefile
+++ b/arch/arm64/kvm/hyp/nvhe/Makefile
@@ -26,7 +26,7 @@ hyp-obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o
 	 hyp-main.o hyp-smp.o psci-relay.o early_alloc.o page_alloc.o \
 	 cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o stacktrace.o ffa.o
 hyp-obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
-	 ../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o ../vgic-v5-sr.o
+	 ../hyp-entry.o ../exception.o ../pgtable.o ../vgic-v5-sr.o
 hyp-obj-y += ../../../kernel/smccc-call.o
 hyp-obj-$(CONFIG_LIST_HARDENED) += list_debug.o
 hyp-obj-$(CONFIG_NVHE_EL2_TRACING) += clock.o trace.o events.o
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index 72d025b2178a7..5c43943f24380 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -35,7 +35,7 @@ static void __hyp_sve_save_guest(struct kvm_vcpu *vcpu)
 	 * on the VL, so use a consistent (i.e., the maximum) guest VL.
 	 */
 	sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2);
-	__sve_save_state(kern_hyp_va(vcpu->arch.sve_state), true);
+	sve_save_state(kern_hyp_va(vcpu->arch.sve_state), true);
 	fpsimd_save_common(&vcpu->arch.ctxt.fp_regs);
 	write_sysreg_s(sve_vq_from_vl(kvm_host_sve_max_vl) - 1, SYS_ZCR_EL2);
 }
@@ -55,7 +55,7 @@ static void __hyp_sve_restore_host(void)
 	 * need to be revisited.
 	 */
 	write_sysreg_s(sve_vq_from_vl(kvm_host_sve_max_vl) - 1, SYS_ZCR_EL2);
-	__sve_restore_state(sve_regs, true);
+	sve_load_state(sve_regs, true);
 	fpsimd_load_common(&hctxt->fp_regs);
 	write_sysreg_el1(ctxt_sys_reg(hctxt, ZCR_EL1), SYS_ZCR);
 }
diff --git a/arch/arm64/kvm/hyp/vhe/Makefile b/arch/arm64/kvm/hyp/vhe/Makefile
index 9695328bbd96e..d6b3475145c0e 100644
--- a/arch/arm64/kvm/hyp/vhe/Makefile
+++ b/arch/arm64/kvm/hyp/vhe/Makefile
@@ -10,4 +10,4 @@ CFLAGS_switch.o += -Wno-override-init
 
 obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o
 obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
-	 ../fpsimd.o ../hyp-entry.o ../exception.o ../vgic-v5-sr.o
+	 ../hyp-entry.o ../exception.o ../vgic-v5-sr.o
-- 
2.30.2




More information about the linux-arm-kernel mailing list