[PATCH 08/18] arm64: fpsimd: Use assembler for baseline SME instructions

Vladimir Murzin vladimir.murzin at arm.com
Wed May 27 06:06:00 PDT 2026


On 5/21/26 14:25, Mark Rutland wrote:
> We currently support assemblers which do not support SME instructions,
> and have macros to manually encode SME instructions. This was
> necessary historically as SME support was developed before assembler
> support was widely available, but things have changed:
> 
> * All currently supported versions of LLVM support baseline SME
>   instructions. Building the kernel requires LLVM 15+, while LLVM 13+
>   supports SME.
> 
> * GNU binutils has supported baseline SME instructions since 2.38, which
>   was released on 09 February 2022. Toolchains using this or later are
>   widely available. For example Debian 12 (released on 10 June 2023)
>   provides binutils 2.40. Toolchains provided kernel.org provide
>   binutils 2.38+ since the GCC 12.1.0 release (released between 06 May
>   2022 and 17 August 2022).
> 
> * For various reasons, SME support was marked as BROKEN, and re-enabled
>   in v6.16 (released on 27 July 2025). The earliest support LTS kernel
>   with SME support is v6.18.y, v6.18 was tagged on 30 November 2025, and
>   contemporary toolchains (GCC 15.2 and binutils 2.45) supported
>   baseline SME instructions.
> 
> * Any distribution which intends to support SME will presumably have a
>   toolchain that supports baseline SME instructions such that userspace
>   can be built.
> 
> Considering the above, there's no practical benefit to allowing SME to
> be built when the toolchain doesn't support baseline SME instructions.
> 
> Make CONFIG_ARM64_SME depend on assembler support for SME, and remove
> the manual encoding of SME instructions. The various _sme_<insn> macros
> are kept for now, and will be cleaned up in subsequent patches.
> 
> A couple of SME2 instructions require a more recent toolchain, and are
> left as-is for now. I've looked through releases of binutils and LLVM to
> find when support was added, and noted this in a comment.
> 
> Signed-off-by: Mark Rutland <mark.rutland at arm.com>
> Cc: Catalin Marinas <catalin.marinas at arm.com>
> Cc: Fuad Tabba <tabba at google.com>
> Cc: James Morse <james.morse at arm.com>
> Cc: Marc Zyngier <maz at kernel.org>
> Cc: Mark Brown <broonie at kernel.org>
> Cc: Oliver Upton <oupton at kernel.org>
> Cc: Will Deacon <will at kernel.org>
> ---
>  arch/arm64/Kconfig                    |  5 ++++
>  arch/arm64/include/asm/fpsimdmacros.h | 38 +++++++++++----------------
>  2 files changed, 20 insertions(+), 23 deletions(-)
> 
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index fe60738e5943b..378e50fef247a 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -2247,10 +2247,15 @@ config ARM64_SVE
>  	  booting the kernel.  If unsure and you are not observing these
>  	  symptoms, you should assume that it is safe to say Y.
>  
> +config AS_HAS_SME
> +	# Supported by LLVM 13+ and binutils 2.38+
> +	def_bool $(as-instr,.arch_extension sme)
> +
>  config ARM64_SME
>  	bool "ARM Scalable Matrix Extension support"
>  	default y
>  	depends on ARM64_SVE
> +	depends on AS_HAS_SME
>  	help
>  	  The Scalable Matrix Extension (SME) is an extension to the AArch64
>  	  execution state which utilises a substantial subset of the SVE
> diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h
> index 1122eea6daacf..d0bdbbf2d44ad 100644
> --- a/arch/arm64/include/asm/fpsimdmacros.h
> +++ b/arch/arm64/include/asm/fpsimdmacros.h
> @@ -148,46 +148,38 @@
>  	pfalse	p\np\().b
>  .endm
>  
> -/* SME instruction encodings for non-SME-capable assemblers */
> -/* (pre binutils 2.38/LLVM 13) */
> +/* Deprecated macros for SME instructions */
>  
>  /* RDSVL X\nx, #\imm */
>  .macro _sme_rdsvl nx, imm
> -	_check_general_reg \nx
> -	_check_num (\imm), -0x20, 0x1f
> -	.inst	0x04bf5800			\
> -		| (\nx)				\
> -		| (((\imm) & 0x3f) << 5)
> +	.arch_extension sme
> +	rdsvl x\nx, #\imm
>  .endm
>  
>  /*
>   * STR (vector from ZA array):
> - *	STR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
> + *	STR ZA[W\nw, #\offset], [X\nxbase, #\offset, MUL VL]
>   */
>  .macro _sme_str_zav nw, nxbase, offset=0
> -	_sme_check_wv \nw
> -	_check_general_reg \nxbase
> -	_check_num (\offset), -0x100, 0xff
> -	.inst	0xe1200000			\
> -		| (((\nw) & 3) << 13)		\
> -		| ((\nxbase) << 5)		\
> -		| ((\offset) & 7)
> +	.arch_extension sme
> +	str	za[w\nw, #\offset], [x\nxbase, #\offset, MUL VL]
>  .endm
>  
>  /*
>   * LDR (vector to ZA array):
> - *	LDR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
> + *	LDR ZA[w\nw, #\offset], [X\nxbase, #\offset, MUL VL]
>   */
>  .macro _sme_ldr_zav nw, nxbase, offset=0
> -	_sme_check_wv \nw
> -	_check_general_reg \nxbase
> -	_check_num (\offset), -0x100, 0xff
> -	.inst	0xe1000000			\
> -		| (((\nw) & 3) << 13)		\
> -		| ((\nxbase) << 5)		\
> -		| ((\offset) & 7)
> +	.arch_extension sme
> +	ldr	za[w\nw, #\offset], [x\nxbase, #\offset, MUL VL]
>  .endm
>  
> +/*
> + * SME2 instruction encodings for older assemblers.
> + * Supported by binutils 2.41+.
> + * Supported by LLVM 16+
> + */
> +
>  /*
>   * LDR (ZT0)
>   *
> -- 2.30.2
> 

Same nitpick about redundancy of instruction comments, but I assume you want to keep them :)

FWIW,

Reviewed-by: Vladimir Murzin <vladimir.murzin at arm.com>




More information about the linux-arm-kernel mailing list