[PATCH 2/2] riscv: Optimize memset
zhangfei
zhang_fei_0403 at 163.com
Tue May 9 20:52:42 PDT 2023
From: zhangfei <zhangfei at nj.iscas.ac.cn>
This patch has been optimized for memset data sizes less than 16 bytes.
Compared to byte by byte storage, significant performance improvement has been achieved.
It allows storage instructions to be executed in parallel and reduces the number of jumps.
Signed-off-by: Fei Zhang <zhangfei at nj.iscas.ac.cn>
---
arch/riscv/lib/memset.S | 33 ++++++++++++++++++++++++++++++---
1 file changed, 30 insertions(+), 3 deletions(-)
diff --git a/arch/riscv/lib/memset.S b/arch/riscv/lib/memset.S
index e613c5c27998..6113a2696e79 100644
--- a/arch/riscv/lib/memset.S
+++ b/arch/riscv/lib/memset.S
@@ -106,9 +106,36 @@ WEAK(memset)
beqz a2, 6f
add a3, t0, a2
5:
- sb a1, 0(t0)
- addi t0, t0, 1
- bltu t0, a3, 5b
+ sb a1, 0(t0)
+ sb a1, -1(a3)
+ li a4, 2
+ bgeu a4, a2, 6f
+
+ sb a1, 1(t0)
+ sb a1, 2(t0)
+ sb a1, -2(a3)
+ sb a1, -3(a3)
+ li a4, 6
+ bgeu a4, a2, 6f
+
+ sb a1, 3(t0)
+ sb a1, -4(a3)
+ li a4, 8
+ bgeu a4, a2, 6f
+
+ sb a1, 4(t0)
+ sb a1, -5(a3)
+ li a4, 10
+ bgeu a4, a2, 6f
+
+ sb a1, 5(t0)
+ sb a1, 6(t0)
+ sb a1, -6(a3)
+ sb a1, -7(a3)
+ li a4, 14
+ bgeu a4, a2, 6f
+
+ sb a1, 7(t0)
6:
ret
END(__memset)
--
2.33.0
More information about the linux-riscv
mailing list