[PATCH v3] riscv: lib: optimize memcmp with ld insn
Palmer Dabbelt
palmer at dabbelt.com
Wed Oct 12 20:23:42 PDT 2022
On Tue, 06 Sep 2022 05:38:14 PDT (-0700), ajones at ventanamicro.com wrote:
> On Tue, Sep 06, 2022 at 07:53:59PM +0800, Yipeng Zou wrote:
>> Currently memcmp was implemented in c code(lib/string.c), which compare
>> memory per byte.
>>
>> This patch use ld insn compare memory per word to improve. From the test
>> Results, this will take several times optimized.
>>
>> Alloc 8,4,1KB buffer to compare, each loop 10k times:
>>
>> Size(B) Min(ns) AVG(ns) //before
>>
>> 8k 40800 46316
>> 4k 26500 32302
>> 1k 15600 17965
>>
>> Size(B) Min(ns) AVG(ns) //after
>>
>> 8k 16100 21281
>> 4k 14200 16446
>> 1k 12400 14316
>>
>> Signed-off-by: Yipeng Zou <zouyipeng at huawei.com>
>> Reviewed-by: Conor Dooley <conor.dooley at microchip.com>
>> ---
>> V2: Patch test data into the commit message,and collect Reviewed-by
>> Tags.
>> V3: Fix some spelling mistakes. Improve register naming and coding style.
>>
>> arch/riscv/include/asm/string.h | 3 ++
>> arch/riscv/lib/Makefile | 1 +
>> arch/riscv/lib/memcmp.S | 58 +++++++++++++++++++++++++++++++++
>> 3 files changed, 62 insertions(+)
>> create mode 100644 arch/riscv/lib/memcmp.S
>>
>> diff --git a/arch/riscv/include/asm/string.h b/arch/riscv/include/asm/string.h
>> index 909049366555..3337b43d3803 100644
>> --- a/arch/riscv/include/asm/string.h
>> +++ b/arch/riscv/include/asm/string.h
>> @@ -18,6 +18,9 @@ extern asmlinkage void *__memcpy(void *, const void *, size_t);
>> #define __HAVE_ARCH_MEMMOVE
>> extern asmlinkage void *memmove(void *, const void *, size_t);
>> extern asmlinkage void *__memmove(void *, const void *, size_t);
>> +#define __HAVE_ARCH_MEMCMP
>> +extern int memcmp(const void *, const void *, size_t);
>> +
>> /* For those files which don't want to check by kasan. */
>> #if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
>> #define memcpy(dst, src, len) __memcpy(dst, src, len)
>> diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
>> index 25d5c9664e57..70773bf0c471 100644
>> --- a/arch/riscv/lib/Makefile
>> +++ b/arch/riscv/lib/Makefile
>> @@ -3,6 +3,7 @@ lib-y += delay.o
>> lib-y += memcpy.o
>> lib-y += memset.o
>> lib-y += memmove.o
>> +lib-y += memcmp.o
>> lib-$(CONFIG_MMU) += uaccess.o
>> lib-$(CONFIG_64BIT) += tishift.o
>>
>> diff --git a/arch/riscv/lib/memcmp.S b/arch/riscv/lib/memcmp.S
>> new file mode 100644
>> index 000000000000..eea5cc40e081
>> --- /dev/null
>> +++ b/arch/riscv/lib/memcmp.S
>> @@ -0,0 +1,58 @@
>> +/* SPDX-License-Identifier: GPL-2.0-only */
>> +/*
>> + * Copyright (C) 2022 zouyipeng at huawei.com
>> + */
>> +#include <linux/linkage.h>
>> +#include <asm-generic/export.h>
>> +#include <asm/asm.h>
>> +
>> +/*
>> + Input Arguments:
>> + a0: addr0
>> + a1: addr1
>> + a2: buffer size
>> +
>> + Output:
>> + a0: return value
>> +*/
>> +#define data0 a3
>> +#define data1 a4
>> +#define tmp t3
>> +#define tail t4
>> +
>> +/* load and compare */
>> +.macro LD_CMP op d0 d1 a0 a1 t1 offset
>> + \op \d0, 0(\a0)
>> + \op \d1, 0(\a1)
>> + addi \a0, \a0, \offset
>> + addi \a1, \a1, \offset
>> + sub \t1, \d0, \d1
>> +.endm
>> +
>> +ENTRY(memcmp)
>> + /* test size aligned with SZREG */
>> + andi tmp, a2, SZREG - 1
>> + /* load tail */
>> + add tail, a0, a2
>> + sub tail, tail, tmp
>> + add a2, a0, a2
>> +
>> +.LloopWord:
>> + sltu tmp, a0, tail
>> + beqz tmp, .LloopByte
>> +
>> + LD_CMP REG_L data0 data1 a0 a1 tmp SZREG
>> + beqz tmp, .LloopWord
>> + j .Lreturn
>> +
>> +.LloopByte:
>> + sltu tmp, a0, a2
>> + beqz tmp, .Lreturn
>> +
>> + LD_CMP lbu data0 data1 a0 a1 tmp 1
>> + beqz tmp, .LloopByte
>> +.Lreturn:
>> + mv a0, tmp
>> + ret
>> +END(memcmp)
>> +EXPORT_SYMBOL(memcmp);
>> --
>> 2.17.1
>>
>
> Thanks, looks nice.
Ya, I think normally for performance improvements I'd want a bit more
(like "what was this benchmarked on", for example). This one seems
straight-forward enough to just handle open-loop, though, so I think
it's OK this time. It's also something we could probably do in C, but
IIUC we're meant to have arch-specific routines for these anyway and
we're going to end up with assembly at some point so we might as well
just start now.
It does trigger a bunch of build failures, though. This fixes most of the
RISC-V related issues:
diff --git a/arch/riscv/include/asm/string.h b/arch/riscv/include/asm/string.h
index 3337b43d3803..33bd88e17469 100644
--- a/arch/riscv/include/asm/string.h
+++ b/arch/riscv/include/asm/string.h
@@ -19,13 +19,15 @@ extern asmlinkage void *__memcpy(void *, const void *, size_t);
extern asmlinkage void *memmove(void *, const void *, size_t);
extern asmlinkage void *__memmove(void *, const void *, size_t);
#define __HAVE_ARCH_MEMCMP
-extern int memcmp(const void *, const void *, size_t);
+extern asmlinkage int memcmp(const void *, const void *, size_t);
+extern asmlinkage int __memcmp(const void *, const void *, size_t);
/* For those files which don't want to check by kasan. */
#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
#define memcpy(dst, src, len) __memcpy(dst, src, len)
#define memset(s, c, n) __memset(s, c, n)
#define memmove(dst, src, len) __memmove(dst, src, len)
+#define memcmp(a, b, len) __memcmp(a, b, len)
#ifndef __NO_FORTIFY
#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */
diff --git a/arch/riscv/lib/memcmp.S b/arch/riscv/lib/memcmp.S
index eea5cc40e081..518797cd6dda 100644
--- a/arch/riscv/lib/memcmp.S
+++ b/arch/riscv/lib/memcmp.S
@@ -29,7 +29,8 @@
sub \t1, \d0, \d1
.endm
-ENTRY(memcmp)
+SYM_FUNC_START(__memcmp)
+SYM_FUNC_START_WEAK(memcmp)
/* test size aligned with SZREG */
andi tmp, a2, SZREG - 1
/* load tail */
@@ -54,5 +55,9 @@ ENTRY(memcmp)
.Lreturn:
mv a0, tmp
ret
-END(memcmp)
-EXPORT_SYMBOL(memcmp);
+
+SYM_FUNC_END(memcmp)
+SYM_FUNC_END(__memcmp)
+
+EXPORT_SYMBOL(memcmp)
+EXPORT_SYMBOL(__memcmp)
diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile
index dd58e1d99397..359227e392b2 100644
--- a/arch/riscv/purgatory/Makefile
+++ b/arch/riscv/purgatory/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
OBJECT_FILES_NON_STANDARD := y
-purgatory-y := purgatory.o sha256.o entry.o string.o ctype.o memcpy.o memset.o
+purgatory-y := purgatory.o sha256.o entry.o string.o ctype.o memcpy.o memset.o memcmp.o
targets += $(purgatory-y)
PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y))
@@ -18,6 +18,9 @@ $(obj)/memcpy.o: $(srctree)/arch/riscv/lib/memcpy.S FORCE
$(obj)/memset.o: $(srctree)/arch/riscv/lib/memset.S FORCE
$(call if_changed_rule,as_o_S)
+$(obj)/memcmp.o: $(srctree)/arch/riscv/lib/memcmp.S FORCE
+ $(call if_changed_rule,as_o_S)
+
$(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE
$(call if_changed_rule,cc_o_c)
but I'm still getting some EFI stub failures
riscv64-unknown-linux-gnu-ld: ./drivers/firmware/efi/libstub/lib-fdt.stub.o: in function `__efistub_.L130':
__efistub_fdt.c:(.init.text+0x686): undefined reference to `__efistub___memcmp'
riscv64-unknown-linux-gnu-ld: ./drivers/firmware/efi/libstub/lib-fdt_ro.stub.o: in function `__efistub_.L87':
__efistub_fdt_ro.c:(.init.text+0x554): undefined reference to `__efistub___memcmp'
riscv64-unknown-linux-gnu-ld: ./drivers/firmware/efi/libstub/lib-fdt_ro.stub.o: in function `__efistub_.L108':
__efistub_fdt_ro.c:(.init.text+0x6fc): undefined reference to `__efistub___memcmp'
riscv64-unknown-linux-gnu-ld: ./drivers/firmware/efi/libstub/lib-fdt_ro.stub.o: in function `__efistub_.L263':
__efistub_fdt_ro.c:(.init.text+0xfe0): undefined reference to `__efistub___memcmp'
riscv64-unknown-linux-gnu-ld: ./drivers/firmware/efi/libstub/lib-fdt_ro.stub.o: in function `__efistub_.L284':
__efistub_fdt_ro.c:(.init.text+0x10c0): undefined reference to `__efistub___memcmp'
riscv64-unknown-linux-gnu-ld: ./drivers/firmware/efi/libstub/lib-fdt_ro.stub.o:__efistub_fdt_ro.c:(.init.text+0x11fc): more undefined references to `__efistub___memcmp' follow
riscv64-unknown-linux-gnu-ld: .tmp_vmlinux.kallsyms1: hidden symbol `__efistub___memcmp' isn't defined
riscv64-unknown-linux-gnu-ld: final link failed: bad value
make[2]: *** [/scratch/merges/ko-linux-next/linux/Makefile:1240: vmlinux] Error 1
make[2]: Leaving directory '/scratch/merges/ko-linux-next/kernel/rv64gc/allyesconfig'
so I think it's going to be too late for 6.1 for this one, sorry!
>
> Reviewed-by: Andrew Jones <ajones at ventanamicro.com>
More information about the linux-riscv
mailing list