[PATCH 3/3] arm64: lib: Use MOPS for usercopy routines
Robin Murphy
robin.murphy at arm.com
Thu Feb 20 11:15:53 PST 2025
On 18/02/2025 5:14 pm, Kristina Martšenko wrote:
> Similarly to what was done with the memcpy() routines, make
> copy_to_user(), copy_from_user() and clear_user() also use the Armv8.8
> FEAT_MOPS instructions.
>
> Both MOPS implementation options (A and B) are supported, including
> asymmetric systems. The exception fixup code fixes up the registers
> according to the option used.
>
> In case of a fault the routines return precisely how much was not copied
> (as required by the comment in include/linux/uaccess.h), as unprivileged
> versions of CPY/SET are guaranteed not to have written past the
> addresses reported in the GPRs.
>
> The MOPS instructions could possibly be inlined into callers (and
> patched to branch to the generic implementation if not detected;
> similarly to what x86 does), but as a first step this patch just uses
> them in the out-of-line routines.
>
> Signed-off-by: Kristina Martšenko <kristina.martsenko at arm.com>
> ---
> arch/arm64/include/asm/asm-uaccess.h | 4 ++++
> arch/arm64/lib/clear_user.S | 25 +++++++++++++++++++++----
> arch/arm64/lib/copy_from_user.S | 10 ++++++++++
> arch/arm64/lib/copy_template.S | 10 ++++++++++
> arch/arm64/lib/copy_to_user.S | 10 ++++++++++
> 5 files changed, 55 insertions(+), 4 deletions(-)
>
> diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h
> index 5b6efe8abeeb..9148f5a31968 100644
> --- a/arch/arm64/include/asm/asm-uaccess.h
> +++ b/arch/arm64/include/asm/asm-uaccess.h
> @@ -61,6 +61,10 @@ alternative_else_nop_endif
> 9999: x; \
> _asm_extable_uaccess 9999b, l
>
> +#define USER_CPY(l, uaccess_is_write, x...) \
> +9999: x; \
> + _asm_extable_uaccess_cpy 9999b, l, uaccess_is_write
> +
> /*
> * Generate the assembly for LDTR/STTR with exception table entries.
> * This is complicated as there is no post-increment or pair versions of the
> diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S
> index a5a5f5b97b17..de9a303b6ad0 100644
> --- a/arch/arm64/lib/clear_user.S
> +++ b/arch/arm64/lib/clear_user.S
> @@ -17,14 +17,27 @@
> * Alignment fixed up by hardware.
> */
>
> - .p2align 4
> - // Alignment is for the loop, but since the prologue (including BTI)
> - // is also 16 bytes we can keep any padding outside the function
> SYM_FUNC_START(__arch_clear_user)
> add x2, x0, x1
This subtlety...
> +
> +#ifdef CONFIG_AS_HAS_MOPS
> + .arch_extension mops
> +alternative_if_not ARM64_HAS_MOPS
> + b .Lno_mops
> +alternative_else_nop_endif
> +
> +USER(9f, setpt [x0]!, x1!, xzr)
> +USER(6f, setmt [x0]!, x1!, xzr)
> +USER(6f, setet [x0]!, x1!, xzr)
> + mov x0, #0
> + ret
> +.Lno_mops:
> +#endif
> +
> subs x1, x1, #8
> b.mi 2f
> -1:
> +
> +1: .p2align 4
> USER(9f, sttr xzr, [x0])
> add x0, x0, #8
> subs x1, x1, #8
> @@ -47,6 +60,10 @@ USER(7f, sttrb wzr, [x2, #-1])
> ret
>
> // Exception fixups
> +6: b.cs 9f
> + // Registers are in Option A format
> + add x0, x0, x1
> + b 9f
...and then all this, is a bit hard to follow IMO. I'd be inclined to
just have dedicated "mov x0, x1; ret" and "cneg x0, x1, cc; ret" fixups
for the prologue and other ops, rather than entangle them with the
non-MOPS flow at all. (Plus then the prologue fixup could arguably be
the normal exit path as well...)
> 7: sub x0, x2, #5 // Adjust for faulting on the final byte...
> 8: add x0, x0, #4 // ...or the second word of the 4-7 byte case
> 9: sub x0, x2, x0
> diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S
> index 34e317907524..400057d607ec 100644
> --- a/arch/arm64/lib/copy_from_user.S
> +++ b/arch/arm64/lib/copy_from_user.S
> @@ -52,6 +52,13 @@
> stp \reg1, \reg2, [\ptr], \val
> .endm
>
> + .macro cpy1 dst, src, count
> + .arch_extension mops
> + USER_CPY(9997f, 0, cpyfprt [\dst]!, [\src]!, \count!)
> + USER_CPY(9996f, 0, cpyfmrt [\dst]!, [\src]!, \count!)
> + USER_CPY(9996f, 0, cpyfert [\dst]!, [\src]!, \count!)
> + .endm
> +
> end .req x5
> srcin .req x15
> SYM_FUNC_START(__arch_copy_from_user)
> @@ -62,6 +69,9 @@ SYM_FUNC_START(__arch_copy_from_user)
> ret
>
> // Exception fixups
> +9996: b.cs 9997f
> + // Registers are in Option A format
> + add dst, dst, count
However for copies it's somewhat justified since, IIUC, MOPS aren't
guaranteed to make progress if we're starting on the last byte of a page
and the next page is unmapped, and thus we may still have a "try harder"
requirement similar to the previous alignment fault case, is that right?
Thanks,
Robin.
> 9997: cmp dst, dstin
> b.ne 9998f
> // Before being absolutely sure we couldn't copy anything, try harder
> diff --git a/arch/arm64/lib/copy_template.S b/arch/arm64/lib/copy_template.S
> index 488df234c49a..7f2f5a0e2fb9 100644
> --- a/arch/arm64/lib/copy_template.S
> +++ b/arch/arm64/lib/copy_template.S
> @@ -40,6 +40,16 @@ D_l .req x13
> D_h .req x14
>
> mov dst, dstin
> +
> +#ifdef CONFIG_AS_HAS_MOPS
> +alternative_if_not ARM64_HAS_MOPS
> + b .Lno_mops
> +alternative_else_nop_endif
> + cpy1 dst, src, count
> + b .Lexitfunc
> +.Lno_mops:
> +#endif
> +
> cmp count, #16
> /*When memory length is less than 16, the accessed are not aligned.*/
> b.lo .Ltiny15
> diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S
> index 802231772608..819f2e3fc7a9 100644
> --- a/arch/arm64/lib/copy_to_user.S
> +++ b/arch/arm64/lib/copy_to_user.S
> @@ -51,6 +51,13 @@
> user_stp 9997f, \reg1, \reg2, \ptr, \val
> .endm
>
> + .macro cpy1 dst, src, count
> + .arch_extension mops
> + USER_CPY(9997f, 1, cpyfpwt [\dst]!, [\src]!, \count!)
> + USER_CPY(9996f, 1, cpyfmwt [\dst]!, [\src]!, \count!)
> + USER_CPY(9996f, 1, cpyfewt [\dst]!, [\src]!, \count!)
> + .endm
> +
> end .req x5
> srcin .req x15
> SYM_FUNC_START(__arch_copy_to_user)
> @@ -61,6 +68,9 @@ SYM_FUNC_START(__arch_copy_to_user)
> ret
>
> // Exception fixups
> +9996: b.cs 9997f
> + // Registers are in Option A format
> + add dst, dst, count
> 9997: cmp dst, dstin
> b.ne 9998f
> // Before being absolutely sure we couldn't copy anything, try harder
More information about the linux-arm-kernel
mailing list