[PATCH v3] riscv: lib: optimize memcmp with ld insn
Yipeng Zou
zouyipeng at huawei.com
Tue Sep 6 04:53:59 PDT 2022
Currently memcmp was implemented in c code(lib/string.c), which compare
memory per byte.
This patch use ld insn compare memory per word to improve. From the test
Results, this will take several times optimized.
Alloc 8,4,1KB buffer to compare, each loop 10k times:
Size(B) Min(ns) AVG(ns) //before
8k 40800 46316
4k 26500 32302
1k 15600 17965
Size(B) Min(ns) AVG(ns) //after
8k 16100 21281
4k 14200 16446
1k 12400 14316
Signed-off-by: Yipeng Zou <zouyipeng at huawei.com>
Reviewed-by: Conor Dooley <conor.dooley at microchip.com>
---
V2: Patch test data into the commit message,and collect Reviewed-by
Tags.
V3: Fix some spelling mistakes. Improve register naming and coding style.
arch/riscv/include/asm/string.h | 3 ++
arch/riscv/lib/Makefile | 1 +
arch/riscv/lib/memcmp.S | 58 +++++++++++++++++++++++++++++++++
3 files changed, 62 insertions(+)
create mode 100644 arch/riscv/lib/memcmp.S
diff --git a/arch/riscv/include/asm/string.h b/arch/riscv/include/asm/string.h
index 909049366555..3337b43d3803 100644
--- a/arch/riscv/include/asm/string.h
+++ b/arch/riscv/include/asm/string.h
@@ -18,6 +18,9 @@ extern asmlinkage void *__memcpy(void *, const void *, size_t);
#define __HAVE_ARCH_MEMMOVE
extern asmlinkage void *memmove(void *, const void *, size_t);
extern asmlinkage void *__memmove(void *, const void *, size_t);
+#define __HAVE_ARCH_MEMCMP
+extern int memcmp(const void *, const void *, size_t);
+
/* For those files which don't want to check by kasan. */
#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
#define memcpy(dst, src, len) __memcpy(dst, src, len)
diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
index 25d5c9664e57..70773bf0c471 100644
--- a/arch/riscv/lib/Makefile
+++ b/arch/riscv/lib/Makefile
@@ -3,6 +3,7 @@ lib-y += delay.o
lib-y += memcpy.o
lib-y += memset.o
lib-y += memmove.o
+lib-y += memcmp.o
lib-$(CONFIG_MMU) += uaccess.o
lib-$(CONFIG_64BIT) += tishift.o
diff --git a/arch/riscv/lib/memcmp.S b/arch/riscv/lib/memcmp.S
new file mode 100644
index 000000000000..eea5cc40e081
--- /dev/null
+++ b/arch/riscv/lib/memcmp.S
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2022 zouyipeng at huawei.com
+ */
+#include <linux/linkage.h>
+#include <asm-generic/export.h>
+#include <asm/asm.h>
+
+/*
+ Input Arguments:
+ a0: addr0
+ a1: addr1
+ a2: buffer size
+
+ Output:
+ a0: return value
+*/
+#define data0 a3
+#define data1 a4
+#define tmp t3
+#define tail t4
+
+/* load and compare */
+.macro LD_CMP op d0 d1 a0 a1 t1 offset
+ \op \d0, 0(\a0)
+ \op \d1, 0(\a1)
+ addi \a0, \a0, \offset
+ addi \a1, \a1, \offset
+ sub \t1, \d0, \d1
+.endm
+
+ENTRY(memcmp)
+ /* test size aligned with SZREG */
+ andi tmp, a2, SZREG - 1
+ /* load tail */
+ add tail, a0, a2
+ sub tail, tail, tmp
+ add a2, a0, a2
+
+.LloopWord:
+ sltu tmp, a0, tail
+ beqz tmp, .LloopByte
+
+ LD_CMP REG_L data0 data1 a0 a1 tmp SZREG
+ beqz tmp, .LloopWord
+ j .Lreturn
+
+.LloopByte:
+ sltu tmp, a0, a2
+ beqz tmp, .Lreturn
+
+ LD_CMP lbu data0 data1 a0 a1 tmp 1
+ beqz tmp, .LloopByte
+.Lreturn:
+ mv a0, tmp
+ ret
+END(memcmp)
+EXPORT_SYMBOL(memcmp);
--
2.17.1
More information about the linux-riscv
mailing list