[PATCH v3] riscv: lib: optimize memcmp with ld insn

Palmer Dabbelt palmer at dabbelt.com
Wed Oct 12 20:23:42 PDT 2022


On Tue, 06 Sep 2022 05:38:14 PDT (-0700), ajones at ventanamicro.com wrote:
> On Tue, Sep 06, 2022 at 07:53:59PM +0800, Yipeng Zou wrote:
>> Currently memcmp was implemented in c code(lib/string.c), which compare
>> memory per byte.
>>
>> This patch use ld insn compare memory per word to improve. From the test
>> Results, this will take several times optimized.
>>
>> Alloc 8,4,1KB buffer to compare, each loop 10k times:
>>
>> Size(B) Min(ns) AVG(ns) //before
>>
>> 8k      40800   46316
>> 4k      26500   32302
>> 1k      15600   17965
>>
>> Size(B) Min(ns) AVG(ns) //after
>>
>> 8k      16100   21281
>> 4k      14200   16446
>> 1k      12400   14316
>>
>> Signed-off-by: Yipeng Zou <zouyipeng at huawei.com>
>> Reviewed-by: Conor Dooley <conor.dooley at microchip.com>
>> ---
>> V2: Patch test data into the commit message,and collect Reviewed-by
>> Tags.
>> V3: Fix some spelling mistakes. Improve register naming and coding style.
>>
>>  arch/riscv/include/asm/string.h |  3 ++
>>  arch/riscv/lib/Makefile         |  1 +
>>  arch/riscv/lib/memcmp.S         | 58 +++++++++++++++++++++++++++++++++
>>  3 files changed, 62 insertions(+)
>>  create mode 100644 arch/riscv/lib/memcmp.S
>>
>> diff --git a/arch/riscv/include/asm/string.h b/arch/riscv/include/asm/string.h
>> index 909049366555..3337b43d3803 100644
>> --- a/arch/riscv/include/asm/string.h
>> +++ b/arch/riscv/include/asm/string.h
>> @@ -18,6 +18,9 @@ extern asmlinkage void *__memcpy(void *, const void *, size_t);
>>  #define __HAVE_ARCH_MEMMOVE
>>  extern asmlinkage void *memmove(void *, const void *, size_t);
>>  extern asmlinkage void *__memmove(void *, const void *, size_t);
>> +#define __HAVE_ARCH_MEMCMP
>> +extern int memcmp(const void *, const void *, size_t);
>> +
>>  /* For those files which don't want to check by kasan. */
>>  #if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
>>  #define memcpy(dst, src, len) __memcpy(dst, src, len)
>> diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
>> index 25d5c9664e57..70773bf0c471 100644
>> --- a/arch/riscv/lib/Makefile
>> +++ b/arch/riscv/lib/Makefile
>> @@ -3,6 +3,7 @@ lib-y			+= delay.o
>>  lib-y			+= memcpy.o
>>  lib-y			+= memset.o
>>  lib-y			+= memmove.o
>> +lib-y			+= memcmp.o
>>  lib-$(CONFIG_MMU)	+= uaccess.o
>>  lib-$(CONFIG_64BIT)	+= tishift.o
>>
>> diff --git a/arch/riscv/lib/memcmp.S b/arch/riscv/lib/memcmp.S
>> new file mode 100644
>> index 000000000000..eea5cc40e081
>> --- /dev/null
>> +++ b/arch/riscv/lib/memcmp.S
>> @@ -0,0 +1,58 @@
>> +/* SPDX-License-Identifier: GPL-2.0-only */
>> +/*
>> + * Copyright (C) 2022 zouyipeng at huawei.com
>> + */
>> +#include <linux/linkage.h>
>> +#include <asm-generic/export.h>
>> +#include <asm/asm.h>
>> +
>> +/*
>> + Input Arguments:
>> + a0: addr0
>> + a1: addr1
>> + a2: buffer size
>> +
>> + Output:
>> + a0: return value
>> +*/
>> +#define data0	a3
>> +#define data1	a4
>> +#define tmp	t3
>> +#define tail	t4
>> +
>> +/* load and compare */
>> +.macro LD_CMP op d0 d1 a0 a1 t1 offset
>> +	\op	\d0, 0(\a0)
>> +	\op	\d1, 0(\a1)
>> +	addi	\a0, \a0, \offset
>> +	addi	\a1, \a1, \offset
>> +	sub	\t1, \d0, \d1
>> +.endm
>> +
>> +ENTRY(memcmp)
>> +	/* test size aligned with SZREG */
>> +	andi	tmp, a2, SZREG - 1
>> +	/* load tail */
>> +	add	tail, a0, a2
>> +	sub	tail, tail, tmp
>> +	add	a2, a0, a2
>> +
>> +.LloopWord:
>> +	sltu	tmp, a0, tail
>> +	beqz	tmp, .LloopByte
>> +
>> +	LD_CMP	REG_L data0 data1 a0 a1 tmp SZREG
>> +	beqz	tmp, .LloopWord
>> +	j	.Lreturn
>> +
>> +.LloopByte:
>> +	sltu	tmp, a0, a2
>> +	beqz	tmp, .Lreturn
>> +
>> +	LD_CMP	lbu data0 data1 a0 a1 tmp 1
>> +	beqz	tmp, .LloopByte
>> +.Lreturn:
>> +	mv	a0, tmp
>> +	ret
>> +END(memcmp)
>> +EXPORT_SYMBOL(memcmp);
>> --
>> 2.17.1
>>
>
> Thanks, looks nice.

Ya, I think normally for performance improvements I'd want a bit more 
(like "what was this benchmarked on", for example).  This one seems 
straight-forward enough to just handle open-loop, though, so I think 
it's OK this time.  It's also something we could probably do in C, but 
IIUC we're meant to have arch-specific routines for these anyway and 
we're going to end up with assembly at some point so we might as well 
just start now.

It does trigger a bunch of build failures, though.  This fixes most of the
RISC-V related issues:

    diff --git a/arch/riscv/include/asm/string.h b/arch/riscv/include/asm/string.h
    index 3337b43d3803..33bd88e17469 100644
    --- a/arch/riscv/include/asm/string.h
    +++ b/arch/riscv/include/asm/string.h
    @@ -19,13 +19,15 @@ extern asmlinkage void *__memcpy(void *, const void *, size_t);
     extern asmlinkage void *memmove(void *, const void *, size_t);
     extern asmlinkage void *__memmove(void *, const void *, size_t);
     #define __HAVE_ARCH_MEMCMP
    -extern int memcmp(const void *, const void *, size_t);
    +extern asmlinkage int memcmp(const void *, const void *, size_t);
    +extern asmlinkage int __memcmp(const void *, const void *, size_t);
     
     /* For those files which don't want to check by kasan. */
     #if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
     #define memcpy(dst, src, len) __memcpy(dst, src, len)
     #define memset(s, c, n) __memset(s, c, n)
     #define memmove(dst, src, len) __memmove(dst, src, len)
    +#define memcmp(a, b, len) __memcmp(a, b, len)
     
     #ifndef __NO_FORTIFY
     #define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */
    diff --git a/arch/riscv/lib/memcmp.S b/arch/riscv/lib/memcmp.S
    index eea5cc40e081..518797cd6dda 100644
    --- a/arch/riscv/lib/memcmp.S
    +++ b/arch/riscv/lib/memcmp.S
    @@ -29,7 +29,8 @@
     	sub	\t1, \d0, \d1
     .endm
     
    -ENTRY(memcmp)
    +SYM_FUNC_START(__memcmp)
    +SYM_FUNC_START_WEAK(memcmp)
     	/* test size aligned with SZREG */
     	andi	tmp, a2, SZREG - 1
     	/* load tail */
    @@ -54,5 +55,9 @@ ENTRY(memcmp)
     .Lreturn:
     	mv	a0, tmp
     	ret
    -END(memcmp)
    -EXPORT_SYMBOL(memcmp);
    +
    +SYM_FUNC_END(memcmp)
    +SYM_FUNC_END(__memcmp)
    +
    +EXPORT_SYMBOL(memcmp)
    +EXPORT_SYMBOL(__memcmp)
    diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile
    index dd58e1d99397..359227e392b2 100644
    --- a/arch/riscv/purgatory/Makefile
    +++ b/arch/riscv/purgatory/Makefile
    @@ -1,7 +1,7 @@
     # SPDX-License-Identifier: GPL-2.0
     OBJECT_FILES_NON_STANDARD := y
     
    -purgatory-y := purgatory.o sha256.o entry.o string.o ctype.o memcpy.o memset.o
    +purgatory-y := purgatory.o sha256.o entry.o string.o ctype.o memcpy.o memset.o memcmp.o
     
     targets += $(purgatory-y)
     PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y))
    @@ -18,6 +18,9 @@ $(obj)/memcpy.o: $(srctree)/arch/riscv/lib/memcpy.S FORCE
     $(obj)/memset.o: $(srctree)/arch/riscv/lib/memset.S FORCE
     	$(call if_changed_rule,as_o_S)
     
    +$(obj)/memcmp.o: $(srctree)/arch/riscv/lib/memcmp.S FORCE
    +	$(call if_changed_rule,as_o_S)
    +
     $(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE
     	$(call if_changed_rule,cc_o_c)

but I'm still getting some EFI stub failures

    riscv64-unknown-linux-gnu-ld: ./drivers/firmware/efi/libstub/lib-fdt.stub.o: in function `__efistub_.L130':
    __efistub_fdt.c:(.init.text+0x686): undefined reference to `__efistub___memcmp'
    riscv64-unknown-linux-gnu-ld: ./drivers/firmware/efi/libstub/lib-fdt_ro.stub.o: in function `__efistub_.L87':
    __efistub_fdt_ro.c:(.init.text+0x554): undefined reference to `__efistub___memcmp'
    riscv64-unknown-linux-gnu-ld: ./drivers/firmware/efi/libstub/lib-fdt_ro.stub.o: in function `__efistub_.L108':
    __efistub_fdt_ro.c:(.init.text+0x6fc): undefined reference to `__efistub___memcmp'
    riscv64-unknown-linux-gnu-ld: ./drivers/firmware/efi/libstub/lib-fdt_ro.stub.o: in function `__efistub_.L263':
    __efistub_fdt_ro.c:(.init.text+0xfe0): undefined reference to `__efistub___memcmp'
    riscv64-unknown-linux-gnu-ld: ./drivers/firmware/efi/libstub/lib-fdt_ro.stub.o: in function `__efistub_.L284':
    __efistub_fdt_ro.c:(.init.text+0x10c0): undefined reference to `__efistub___memcmp'
    riscv64-unknown-linux-gnu-ld: ./drivers/firmware/efi/libstub/lib-fdt_ro.stub.o:__efistub_fdt_ro.c:(.init.text+0x11fc): more undefined references to `__efistub___memcmp' follow
    riscv64-unknown-linux-gnu-ld: .tmp_vmlinux.kallsyms1: hidden symbol `__efistub___memcmp' isn't defined
    riscv64-unknown-linux-gnu-ld: final link failed: bad value
    make[2]: *** [/scratch/merges/ko-linux-next/linux/Makefile:1240: vmlinux] Error 1
    make[2]: Leaving directory '/scratch/merges/ko-linux-next/kernel/rv64gc/allyesconfig'

so I think it's going to be too late for 6.1 for this one, sorry!

>
> Reviewed-by: Andrew Jones <ajones at ventanamicro.com>



More information about the linux-riscv mailing list