[PATCH v2 1/1] riscv: __asm_copy_to-from_user: Improve using word copy, if size is < 9*SZREG
Akira Tsukamoto
akira.tsukamoto at gmail.com
Thu Nov 11 00:13:04 PST 2021
Reduce the number of slow byte_copy being used.
Currently byte_copy is used for all the cases when the size is smaller than
9*SZREG. When the size is in between 2*SZREG to 9*SZREG, use faster
unrolled word_copy.
Signed-off-by: Akira Tsukamoto <akira.tsukamoto at gmail.com>
---
arch/riscv/lib/uaccess.S | 46 ++++++++++++++++++++++++++++++++++++----
1 file changed, 42 insertions(+), 4 deletions(-)
diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S
index 63bc691cff91..50013479cb86 100644
--- a/arch/riscv/lib/uaccess.S
+++ b/arch/riscv/lib/uaccess.S
@@ -34,8 +34,10 @@ ENTRY(__asm_copy_from_user)
/*
* Use byte copy only if too small.
* SZREG holds 4 for RV32 and 8 for RV64
+ * a3 - 2*SZREG is minimum size for word_copy
+ * 1*SZREG for aligning dst + 1*SZREG for word_copy
*/
- li a3, 9*SZREG /* size must be larger than size in word_copy */
+ li a3, 2*SZREG
bltu a2, a3, .Lbyte_copy_tail
/*
@@ -66,9 +68,40 @@ ENTRY(__asm_copy_from_user)
andi a3, a1, SZREG-1
bnez a3, .Lshift_copy
+.Lcheck_size_bulk:
+ /*
+ * Evaluate the size if possible to use unrolled.
+ * The word_copy_unlrolled requires larger than 8*SZREG
+ */
+ li a3, 8*SZREG
+ add a4, a0, a3
+ bltu a4, t0, .Lword_copy_unlrolled
+
.Lword_copy:
- /*
- * Both src and dst are aligned, unrolled word copy
+ /*
+ * Both src and dst are aligned
+ * Not unrolled word copy with every 1*SZREG iteration
+ *
+ * a0 - start of aligned dst
+ * a1 - start of aligned src
+ * t0 - end of aligned dst
+ */
+ bgeu a0, t0, .Lbyte_copy_tail /* check if end of copy */
+ addi t0, t0, -(SZREG) /* not to over run */
+1:
+ fixup REG_L a5, 0(a1)
+ addi a1, a1, SZREG
+ fixup REG_S a5, 0(a0)
+ addi a0, a0, SZREG
+ bltu a0, t0, 1b
+
+ addi t0, t0, SZREG /* revert to original value */
+ j .Lbyte_copy_tail
+
+.Lword_copy_unlrolled:
+ /*
+ * Both src and dst are aligned
+ * Unrolled word copy with every 8*SZREG iteration
*
* a0 - start of aligned dst
* a1 - start of aligned src
@@ -97,7 +130,12 @@ ENTRY(__asm_copy_from_user)
bltu a0, t0, 2b
addi t0, t0, 8*SZREG /* revert to original value */
- j .Lbyte_copy_tail
+
+ /*
+ * Remaining might large enough for word_copy to reduce slow byte
+ * copy
+ */
+ j .Lcheck_size_bulk
.Lshift_copy:
--
2.17.1
More information about the linux-riscv
mailing list