[PATCH v2] riscv: lib: optimize memcmp with ld insn

Yipeng Zou zouyipeng at huawei.com
Fri Sep 2 04:00:39 PDT 2022


Currently memcmp was implemented in c code(lib/string.c), which compare
memory per byte.

This patch use ld insn compare memory per word to improve. From the test
Results, this will take several times optimized.

Alloc 8,4,1KB buffer to compare, each loop 10k times:

Size(B) Min(ns) AVG(ns) //before

8k      40800   46316
4k      26500   32302
1k      15600   17965

Size(B) Min(ns) AVG(ns) //after

8k      16100   21281
4k      14200   16446
1k      12400   14316

Signed-off-by: Yipeng Zou <zouyipeng at huawei.com>
Reviewed-by: Conor Dooley <conor.dooley at microchip.com>
---
V2: Patch test data into the commit message,and collect Reviewed-by Tags.

 arch/riscv/include/asm/string.h |  3 ++
 arch/riscv/lib/Makefile         |  1 +
 arch/riscv/lib/memcmp.S         | 59 +++++++++++++++++++++++++++++++++
 3 files changed, 63 insertions(+)
 create mode 100644 arch/riscv/lib/memcmp.S

diff --git a/arch/riscv/include/asm/string.h b/arch/riscv/include/asm/string.h
index 909049366555..3337b43d3803 100644
--- a/arch/riscv/include/asm/string.h
+++ b/arch/riscv/include/asm/string.h
@@ -18,6 +18,9 @@ extern asmlinkage void *__memcpy(void *, const void *, size_t);
 #define __HAVE_ARCH_MEMMOVE
 extern asmlinkage void *memmove(void *, const void *, size_t);
 extern asmlinkage void *__memmove(void *, const void *, size_t);
+#define __HAVE_ARCH_MEMCMP
+extern int memcmp(const void *, const void *, size_t);
+
 /* For those files which don't want to check by kasan. */
 #if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
 #define memcpy(dst, src, len) __memcpy(dst, src, len)
diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
index 25d5c9664e57..70773bf0c471 100644
--- a/arch/riscv/lib/Makefile
+++ b/arch/riscv/lib/Makefile
@@ -3,6 +3,7 @@ lib-y			+= delay.o
 lib-y			+= memcpy.o
 lib-y			+= memset.o
 lib-y			+= memmove.o
+lib-y			+= memcmp.o
 lib-$(CONFIG_MMU)	+= uaccess.o
 lib-$(CONFIG_64BIT)	+= tishift.o
 
diff --git a/arch/riscv/lib/memcmp.S b/arch/riscv/lib/memcmp.S
new file mode 100644
index 000000000000..83af1c433e6f
--- /dev/null
+++ b/arch/riscv/lib/memcmp.S
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2022 zouyipeng at huawei.com
+ */
+#include <linux/linkage.h>
+#include <asm-generic/export.h>
+#include <asm/asm.h>
+
+/* argrments:
+* a0: addr0
+* a1: addr1
+* a2: size
+*/
+#define addr0	a0
+#define addr1	a1
+#define limit	a2
+
+#define data0	a3
+#define data1	a4
+#define tmp	t3
+#define aaddr	t4
+#define return	a0
+
+/* load and compare */
+.macro LD_CMP op d0 d1 a0 a1 offset
+	\op \d0, 0(\a0)
+	\op \d1, 0(\a1)
+	addi \a0, \a0, \offset
+	addi \a1, \a1, \offset
+	sub tmp, \d0, \d1
+.endm
+
+ENTRY(memcmp)
+	/* test limit aligend with SZREG */
+	andi tmp, limit, SZREG - 1
+	/* load tail */
+	add aaddr, addr0, limit
+	sub aaddr, aaddr, tmp
+	add limit, addr0, limit
+
+.LloopWord:
+	sltu tmp, addr0, aaddr
+	beqz tmp, .LloopByte
+
+	LD_CMP REG_L data0 data1 addr0 addr1 SZREG
+	beqz tmp, .LloopWord
+	j .Lreturn
+
+.LloopByte:
+	sltu tmp, addr0, limit
+	beqz tmp, .Lreturn
+
+	LD_CMP lbu data0 data1 addr0 addr1 1
+	beqz tmp, .LloopByte
+.Lreturn:
+	mv return, tmp
+	ret
+END(memcmp)
+EXPORT_SYMBOL(memcmp);
-- 
2.17.1




More information about the linux-riscv mailing list