[PATCH 3/3] arm64: Garbage-collect usercopy leftovers
Robin Murphy
robin.murphy at arm.com
Wed Sep 28 04:58:53 PDT 2022
With both usercopy routines replaced, remove the now-unused template
and supporting macros.
Signed-off-by: Robin Murphy <robin.murphy at arm.com>
---
arch/arm64/include/asm/asm-uaccess.h | 30 -----
arch/arm64/lib/copy_template.S | 181 ---------------------------
2 files changed, 211 deletions(-)
delete mode 100644 arch/arm64/lib/copy_template.S
diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h
index 75b211c98dea..c2dea9b6b9e9 100644
--- a/arch/arm64/include/asm/asm-uaccess.h
+++ b/arch/arm64/include/asm/asm-uaccess.h
@@ -62,34 +62,4 @@ alternative_else_nop_endif
#define USER(l, x...) \
9999: x; \
_asm_extable_uaccess 9999b, l
-
-/*
- * Generate the assembly for LDTR/STTR with exception table entries.
- * This is complicated as there is no post-increment or pair versions of the
- * unprivileged instructions, and USER() only works for single instructions.
- */
- .macro user_ldp l, reg1, reg2, addr, post_inc
-8888: ldtr \reg1, [\addr];
-8889: ldtr \reg2, [\addr, #8];
- add \addr, \addr, \post_inc;
-
- _asm_extable_uaccess 8888b, \l;
- _asm_extable_uaccess 8889b, \l;
- .endm
-
- .macro user_stp l, reg1, reg2, addr, post_inc
-8888: sttr \reg1, [\addr];
-8889: sttr \reg2, [\addr, #8];
- add \addr, \addr, \post_inc;
-
- _asm_extable_uaccess 8888b,\l;
- _asm_extable_uaccess 8889b,\l;
- .endm
-
- .macro user_ldst l, inst, reg, addr, post_inc
-8888: \inst \reg, [\addr];
- add \addr, \addr, \post_inc;
-
- _asm_extable_uaccess 8888b, \l;
- .endm
#endif
diff --git a/arch/arm64/lib/copy_template.S b/arch/arm64/lib/copy_template.S
deleted file mode 100644
index 488df234c49a..000000000000
--- a/arch/arm64/lib/copy_template.S
+++ /dev/null
@@ -1,181 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2013 ARM Ltd.
- * Copyright (C) 2013 Linaro.
- *
- * This code is based on glibc cortex strings work originally authored by Linaro
- * be found @
- *
- * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
- * files/head:/src/aarch64/
- */
-
-
-/*
- * Copy a buffer from src to dest (alignment handled by the hardware)
- *
- * Parameters:
- * x0 - dest
- * x1 - src
- * x2 - n
- * Returns:
- * x0 - dest
- */
-dstin .req x0
-src .req x1
-count .req x2
-tmp1 .req x3
-tmp1w .req w3
-tmp2 .req x4
-tmp2w .req w4
-dst .req x6
-
-A_l .req x7
-A_h .req x8
-B_l .req x9
-B_h .req x10
-C_l .req x11
-C_h .req x12
-D_l .req x13
-D_h .req x14
-
- mov dst, dstin
- cmp count, #16
- /*When memory length is less than 16, the accessed are not aligned.*/
- b.lo .Ltiny15
-
- neg tmp2, src
- ands tmp2, tmp2, #15/* Bytes to reach alignment. */
- b.eq .LSrcAligned
- sub count, count, tmp2
- /*
- * Copy the leading memory data from src to dst in an increasing
- * address order.By this way,the risk of overwriting the source
- * memory data is eliminated when the distance between src and
- * dst is less than 16. The memory accesses here are alignment.
- */
- tbz tmp2, #0, 1f
- ldrb1 tmp1w, src, #1
- strb1 tmp1w, dst, #1
-1:
- tbz tmp2, #1, 2f
- ldrh1 tmp1w, src, #2
- strh1 tmp1w, dst, #2
-2:
- tbz tmp2, #2, 3f
- ldr1 tmp1w, src, #4
- str1 tmp1w, dst, #4
-3:
- tbz tmp2, #3, .LSrcAligned
- ldr1 tmp1, src, #8
- str1 tmp1, dst, #8
-
-.LSrcAligned:
- cmp count, #64
- b.ge .Lcpy_over64
- /*
- * Deal with small copies quickly by dropping straight into the
- * exit block.
- */
-.Ltail63:
- /*
- * Copy up to 48 bytes of data. At this point we only need the
- * bottom 6 bits of count to be accurate.
- */
- ands tmp1, count, #0x30
- b.eq .Ltiny15
- cmp tmp1w, #0x20
- b.eq 1f
- b.lt 2f
- ldp1 A_l, A_h, src, #16
- stp1 A_l, A_h, dst, #16
-1:
- ldp1 A_l, A_h, src, #16
- stp1 A_l, A_h, dst, #16
-2:
- ldp1 A_l, A_h, src, #16
- stp1 A_l, A_h, dst, #16
-.Ltiny15:
- /*
- * Prefer to break one ldp/stp into several load/store to access
- * memory in an increasing address order,rather than to load/store 16
- * bytes from (src-16) to (dst-16) and to backward the src to aligned
- * address,which way is used in original cortex memcpy. If keeping
- * the original memcpy process here, memmove need to satisfy the
- * precondition that src address is at least 16 bytes bigger than dst
- * address,otherwise some source data will be overwritten when memove
- * call memcpy directly. To make memmove simpler and decouple the
- * memcpy's dependency on memmove, withdrew the original process.
- */
- tbz count, #3, 1f
- ldr1 tmp1, src, #8
- str1 tmp1, dst, #8
-1:
- tbz count, #2, 2f
- ldr1 tmp1w, src, #4
- str1 tmp1w, dst, #4
-2:
- tbz count, #1, 3f
- ldrh1 tmp1w, src, #2
- strh1 tmp1w, dst, #2
-3:
- tbz count, #0, .Lexitfunc
- ldrb1 tmp1w, src, #1
- strb1 tmp1w, dst, #1
-
- b .Lexitfunc
-
-.Lcpy_over64:
- subs count, count, #128
- b.ge .Lcpy_body_large
- /*
- * Less than 128 bytes to copy, so handle 64 here and then jump
- * to the tail.
- */
- ldp1 A_l, A_h, src, #16
- stp1 A_l, A_h, dst, #16
- ldp1 B_l, B_h, src, #16
- ldp1 C_l, C_h, src, #16
- stp1 B_l, B_h, dst, #16
- stp1 C_l, C_h, dst, #16
- ldp1 D_l, D_h, src, #16
- stp1 D_l, D_h, dst, #16
-
- tst count, #0x3f
- b.ne .Ltail63
- b .Lexitfunc
-
- /*
- * Critical loop. Start at a new cache line boundary. Assuming
- * 64 bytes per line this ensures the entire loop is in one line.
- */
- .p2align L1_CACHE_SHIFT
-.Lcpy_body_large:
- /* pre-get 64 bytes data. */
- ldp1 A_l, A_h, src, #16
- ldp1 B_l, B_h, src, #16
- ldp1 C_l, C_h, src, #16
- ldp1 D_l, D_h, src, #16
-1:
- /*
- * interlace the load of next 64 bytes data block with store of the last
- * loaded 64 bytes data.
- */
- stp1 A_l, A_h, dst, #16
- ldp1 A_l, A_h, src, #16
- stp1 B_l, B_h, dst, #16
- ldp1 B_l, B_h, src, #16
- stp1 C_l, C_h, dst, #16
- ldp1 C_l, C_h, src, #16
- stp1 D_l, D_h, dst, #16
- ldp1 D_l, D_h, src, #16
- subs count, count, #64
- b.ge 1b
- stp1 A_l, A_h, dst, #16
- stp1 B_l, B_h, dst, #16
- stp1 C_l, C_h, dst, #16
- stp1 D_l, D_h, dst, #16
-
- tst count, #0x3f
- b.ne .Ltail63
-.Lexitfunc:
--
2.36.1.dirty
More information about the linux-arm-kernel
mailing list