[PATCH v2 17/18] arm64: fpsimd: Move SME save/restore inline
Vladimir Murzin
vladimir.murzin at arm.com
Fri May 29 01:51:06 PDT 2026
On 5/28/26 17:54, Mark Rutland wrote:
> Currently the SVE register save/restore sequences are written in
> out-of-line assembly routines. While this works, it's somewhat painful:
>
> * For KVM to use the sequences, portions of the logic will need to be
> duplicated in KVM hyp code. While the common logic can be shared in
> assembly macros, this is very likely to lead to unnecessary divergence
> and be a maintenance burden.
>
> * For historical reasons, the assembly macros take some register
> arguments as numerical indices (e.g. "sme_save_za 0, x2, 12" uses x0, x1, and
> x12), which is simply confusing.
>
> * Address generation and control flow are far clearer in C than in
> assembly.
>
> * The assembly sequences can't be instrumented, and so it's harder than
> necessary to catch memory safety issues.
>
> To handle the above, move the SME register save/restore sequences
> to inline assembly.
>
> Neither GCC nor LLVM instrument memory arguments to inline assembly, so
> explicit instrumentation is added in the same manner as other assembly
> routines. This instrumentation is implicitly disabled by Kbuild for nVHE
> hyp code.
>
> Signed-off-by: Mark Rutland <mark.rutland at arm.com>
> Cc: Catalin Marinas <catalin.marinas at arm.com>
> Cc: Fuad Tabba <tabba at google.com>
> Cc: James Morse <james.morse at arm.com>
> Cc: Marc Zyngier <maz at kernel.org>
> Cc: Mark Brown <broonie at kernel.org>
> Cc: Oliver Upton <oupton at kernel.org>
> Cc: Will Deacon <will at kernel.org>
> ---
> arch/arm64/include/asm/fpsimd.h | 106 +++++++++++++++++++++++++-
> arch/arm64/include/asm/fpsimdmacros.h | 76 ------------------
> arch/arm64/kernel/Makefile | 2 +-
> arch/arm64/kernel/entry-fpsimd.S | 48 ------------
> 4 files changed, 104 insertions(+), 128 deletions(-)
> delete mode 100644 arch/arm64/kernel/entry-fpsimd.S
>
> diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
> index 550987b36206a..3ab271320d1de 100644
> --- a/arch/arm64/include/asm/fpsimd.h
> +++ b/arch/arm64/include/asm/fpsimd.h
> @@ -357,9 +357,6 @@ static inline void sve_flush_live(void)
> );
> }
>
> -extern void sme_save_state(struct sme_state *state, int zt);
> -extern void sme_load_state(const struct sme_state *state, int zt);
> -
> struct arm64_cpu_capabilities;
> extern void cpu_enable_fpsimd(const struct arm64_cpu_capabilities *__unused);
> extern void cpu_enable_sve(const struct arm64_cpu_capabilities *__unused);
> @@ -639,6 +636,106 @@ static inline size_t __sme_state_size(unsigned int sme_vl)
> return size;
> }
>
> +static inline void __sme_save_za(struct sme_state *state, unsigned long svl)
> +{
> + /*
> + * The <Wv> argument to LDR/STR (array vector) can only encode W12-W15.
> + * The "Ucj" constraint exists for this, but is only supported by GCC
> + * 14.1.0+ and LLVM 18.1.0+.
> + */
> + register unsigned int v asm ("w12");
> +
> + instrument_write(state, svl * svl);
> + for (v = 0; v < svl; v++) {
> + void *pav = (void *)state + v * svl;
> +
> + asm volatile(
> + __SME_PREAMBLE
> + " str za[%w[v], #0], [%[pav]]\n"
> + :
> + : [v] "r" (v),
> + [pav] "r" (pav)
> + : "memory"
> + );
> + }
> +}
> +
> +static inline void __sme_load_za(const struct sme_state *state, unsigned long svl)
> +{
> + /* See comment in __sme_save_za */
> + register unsigned int v asm ("w12");
> +
> + instrument_read(state, svl * svl);
> + for (v = 0; v < svl; v++) {
> + void *pav = (void *)state + v * svl;
> +
> + asm volatile(
> + __SME_PREAMBLE
> + " ldr za[%w[v], #0], [%[pav]]\n"
> + :
> + : [v] "r" (v),
> + [pav] "r" (pav)
> + : "memory"
> + );
> + }
> +}
> +
> +static inline void __sme_save_zt(struct sme_state *state, unsigned long svl)
> +{
> + void *pzt = (void *)state + svl * svl;
> +
> + instrument_write(pzt, svl);
> + asm volatile(
> + __DEFINE_ASM_GPR_NUMS
> + /*
> + * STR ZT0, [<Xn|SP>]
> + * Supported by binutils 2.41+.
> + * Supported by LLVM 16+
> + */
> + " .inst 0xe13f8000 | ((.L__gpr_num_%[pzt]) << 5)\n"
> + :
> + : [pzt] "r" (pzt)
> + : "memory"
> + );
> +}
> +
> +static inline void __sme_load_zt(const struct sme_state *state, unsigned long svl)
> +{
> + void *pzt = (void *)state + svl * svl;
> +
> + instrument_read(pzt, svl);
> + asm volatile(
> + __DEFINE_ASM_GPR_NUMS
> + /*
> + * LDR ZT0, [<Xn|SP>]
> + * Supported by binutils 2.41+.
> + * Supported by LLVM 16+
> + */
> + " .inst 0xe11f8000 | ((.L__gpr_num_%[pzt]) << 5)\n"
> + :
> + : [pzt] "r" (pzt)
> + : "memory"
> + );
> +}
> +
> +static inline void sme_save_state(struct sme_state *state, bool zt)
> +{
> + unsigned long svl = sme_get_vl();
> +
> + __sme_save_za(state, svl);
> + if (zt)
> + __sme_save_zt(state, svl);
> +}
> +
> +static inline void sme_load_state(const struct sme_state *state, bool zt)
> +{
> + unsigned long svl = sme_get_vl();
> +
> + __sme_load_za(state, svl);
> + if (zt)
> + __sme_load_zt(state, svl);
> +}
> +
> /*
> * Return how many bytes of memory are required to store the full SME
> * specific state for task, given task's currently configured vector
> @@ -695,6 +792,9 @@ static inline size_t sme_state_size(struct task_struct const *task)
> return 0;
> }
>
> +static inline void sme_save_state(struct sme_state *state, bool zt) { BUILD_BUG(); }
> +static inline void sme_load_state(const struct sme_state *state, bool zt) { BUILD_BUG(); }
> +
> static inline void sme_enter_from_user_mode(void) { }
> static inline void sme_exit_to_user_mode(void) { }
>
> diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h
> index 9e352b5c6b764..a763fd03ffef3 100644
> --- a/arch/arm64/include/asm/fpsimdmacros.h
> +++ b/arch/arm64/include/asm/fpsimdmacros.h
> @@ -40,60 +40,6 @@
> .endif
> .endm
>
> -/* Deprecated macros for SME instructions */
> -
> -/* RDSVL X\nx, #\imm */
> -.macro _sme_rdsvl nx, imm
> - .arch_extension sme
> - rdsvl x\nx, #\imm
> -.endm
> -
> -/*
> - * STR (vector from ZA array):
> - * STR ZA[W\nw, #\offset], [X\nxbase, #\offset, MUL VL]
> - */
> -.macro _sme_str_zav nw, nxbase, offset=0
> - .arch_extension sme
> - str za[w\nw, #\offset], [x\nxbase, #\offset, MUL VL]
> -.endm
> -
> -/*
> - * LDR (vector to ZA array):
> - * LDR ZA[w\nw, #\offset], [X\nxbase, #\offset, MUL VL]
> - */
> -.macro _sme_ldr_zav nw, nxbase, offset=0
> - .arch_extension sme
> - ldr za[w\nw, #\offset], [x\nxbase, #\offset, MUL VL]
> -.endm
> -
> -/*
> - * SME2 instruction encodings for older assemblers.
> - * Supported by binutils 2.41+.
> - * Supported by LLVM 16+
> - */
> -
> -/*
> - * LDR (ZT0)
> - *
> - * LDR ZT0, nx
> - */
> -.macro _ldr_zt nx
> - _check_general_reg \nx
> - .inst 0xe11f8000 \
> - | (\nx << 5)
> -.endm
> -
> -/*
> - * STR (ZT0)
> - *
> - * STR ZT0, nx
> - */
> -.macro _str_zt nx
> - _check_general_reg \nx
> - .inst 0xe13f8000 \
> - | (\nx << 5)
> -.endm
> -
> .macro __for from:req, to:req
> .if (\from) == (\to)
> _for__body %\from
> @@ -116,25 +62,3 @@
>
> .purgem _for__body
> .endm
> -
> -.macro sme_save_za nxbase, xvl, nw
> - mov w\nw, #0
> -
> -423:
> - _sme_str_zav \nw, \nxbase
> - add x\nxbase, x\nxbase, \xvl
> - add x\nw, x\nw, #1
> - cmp \xvl, x\nw
> - bne 423b
> -.endm
> -
> -.macro sme_load_za nxbase, xvl, nw
> - mov w\nw, #0
> -
> -423:
> - _sme_ldr_zav \nw, \nxbase
> - add x\nxbase, x\nxbase, \xvl
> - add x\nw, x\nw, #1
> - cmp \xvl, x\nw
> - bne 423b
> -.endm
> diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
> index 74b76bb704523..d2690c3ec5288 100644
> --- a/arch/arm64/kernel/Makefile
> +++ b/arch/arm64/kernel/Makefile
> @@ -27,7 +27,7 @@ KCOV_INSTRUMENT_idle.o := n
>
> # Object file lists.
> obj-y := debug-monitors.o entry.o irq.o fpsimd.o \
> - entry-common.o entry-fpsimd.o process.o ptrace.o \
> + entry-common.o process.o ptrace.o \
> setup.o signal.o sys.o stacktrace.o time.o traps.o \
> io.o vdso.o hyp-stub.o psci.o cpu_ops.o \
> return_address.o cpuinfo.o cpu_errata.o \
> diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S
> deleted file mode 100644
> index bff941eea9566..0000000000000
> --- a/arch/arm64/kernel/entry-fpsimd.S
> +++ /dev/null
> @@ -1,48 +0,0 @@
> -/* SPDX-License-Identifier: GPL-2.0-only */
> -/*
> - * FP/SIMD state saving and restoring
> - *
> - * Copyright (C) 2012 ARM Ltd.
> - * Author: Catalin Marinas <catalin.marinas at arm.com>
> - */
> -
> -#include <linux/linkage.h>
> -
> -#include <asm/assembler.h>
> -#include <asm/fpsimdmacros.h>
> -
> -#ifdef CONFIG_ARM64_SME
> -
> -/*
> - * Save the ZA and ZT state
> - *
> - * x0 - pointer to buffer for state
> - * x1 - number of ZT registers to save
> - */
> -SYM_FUNC_START(sme_save_state)
> - _sme_rdsvl 2, 1 // x2 = VL/8
> - sme_save_za 0, x2, 12 // Leaves x0 pointing to the end of ZA
> -
> - cbz x1, 1f
> - _str_zt 0
> -1:
> - ret
> -SYM_FUNC_END(sme_save_state)
> -
> -/*
> - * Load the ZA and ZT state
> - *
> - * x0 - pointer to buffer for state
> - * x1 - number of ZT registers to save
> - */
> -SYM_FUNC_START(sme_load_state)
> - _sme_rdsvl 2, 1 // x2 = VL/8
> - sme_load_za 0, x2, 12 // Leaves x0 pointing to the end of ZA
> -
> - cbz x1, 1f
> - _ldr_zt 0
> -1:
> - ret
> -SYM_FUNC_END(sme_load_state)
> -
> -#endif /* CONFIG_ARM64_SME */
> -- 2.30.2
>
FWIW,
Reviewed-by: Vladimir Murzin <vladimir.murzin at arm.com>
More information about the linux-arm-kernel
mailing list