[PATCH v3] riscv: lib: optimize memcmp with ld insn

Andrew Jones ajones at ventanamicro.com
Tue Sep 6 05:38:14 PDT 2022


On Tue, Sep 06, 2022 at 07:53:59PM +0800, Yipeng Zou wrote:
> Currently memcmp was implemented in c code(lib/string.c), which compare
> memory per byte.
> 
> This patch use ld insn compare memory per word to improve. From the test
> Results, this will take several times optimized.
> 
> Alloc 8,4,1KB buffer to compare, each loop 10k times:
> 
> Size(B) Min(ns) AVG(ns) //before
> 
> 8k      40800   46316
> 4k      26500   32302
> 1k      15600   17965
> 
> Size(B) Min(ns) AVG(ns) //after
> 
> 8k      16100   21281
> 4k      14200   16446
> 1k      12400   14316
> 
> Signed-off-by: Yipeng Zou <zouyipeng at huawei.com>
> Reviewed-by: Conor Dooley <conor.dooley at microchip.com>
> ---
> V2: Patch test data into the commit message,and collect Reviewed-by
> Tags.
> V3: Fix some spelling mistakes. Improve register naming and coding style.
> 
>  arch/riscv/include/asm/string.h |  3 ++
>  arch/riscv/lib/Makefile         |  1 +
>  arch/riscv/lib/memcmp.S         | 58 +++++++++++++++++++++++++++++++++
>  3 files changed, 62 insertions(+)
>  create mode 100644 arch/riscv/lib/memcmp.S
> 
> diff --git a/arch/riscv/include/asm/string.h b/arch/riscv/include/asm/string.h
> index 909049366555..3337b43d3803 100644
> --- a/arch/riscv/include/asm/string.h
> +++ b/arch/riscv/include/asm/string.h
> @@ -18,6 +18,9 @@ extern asmlinkage void *__memcpy(void *, const void *, size_t);
>  #define __HAVE_ARCH_MEMMOVE
>  extern asmlinkage void *memmove(void *, const void *, size_t);
>  extern asmlinkage void *__memmove(void *, const void *, size_t);
> +#define __HAVE_ARCH_MEMCMP
> +extern int memcmp(const void *, const void *, size_t);
> +
>  /* For those files which don't want to check by kasan. */
>  #if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
>  #define memcpy(dst, src, len) __memcpy(dst, src, len)
> diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
> index 25d5c9664e57..70773bf0c471 100644
> --- a/arch/riscv/lib/Makefile
> +++ b/arch/riscv/lib/Makefile
> @@ -3,6 +3,7 @@ lib-y			+= delay.o
>  lib-y			+= memcpy.o
>  lib-y			+= memset.o
>  lib-y			+= memmove.o
> +lib-y			+= memcmp.o
>  lib-$(CONFIG_MMU)	+= uaccess.o
>  lib-$(CONFIG_64BIT)	+= tishift.o
>  
> diff --git a/arch/riscv/lib/memcmp.S b/arch/riscv/lib/memcmp.S
> new file mode 100644
> index 000000000000..eea5cc40e081
> --- /dev/null
> +++ b/arch/riscv/lib/memcmp.S
> @@ -0,0 +1,58 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * Copyright (C) 2022 zouyipeng at huawei.com
> + */
> +#include <linux/linkage.h>
> +#include <asm-generic/export.h>
> +#include <asm/asm.h>
> +
> +/*
> + Input Arguments:
> + a0: addr0
> + a1: addr1
> + a2: buffer size
> +
> + Output:
> + a0: return value
> +*/
> +#define data0	a3
> +#define data1	a4
> +#define tmp	t3
> +#define tail	t4
> +
> +/* load and compare */
> +.macro LD_CMP op d0 d1 a0 a1 t1 offset
> +	\op	\d0, 0(\a0)
> +	\op	\d1, 0(\a1)
> +	addi	\a0, \a0, \offset
> +	addi	\a1, \a1, \offset
> +	sub	\t1, \d0, \d1
> +.endm
> +
> +ENTRY(memcmp)
> +	/* test size aligned with SZREG */
> +	andi	tmp, a2, SZREG - 1
> +	/* load tail */
> +	add	tail, a0, a2
> +	sub	tail, tail, tmp
> +	add	a2, a0, a2
> +
> +.LloopWord:
> +	sltu	tmp, a0, tail
> +	beqz	tmp, .LloopByte
> +
> +	LD_CMP	REG_L data0 data1 a0 a1 tmp SZREG
> +	beqz	tmp, .LloopWord
> +	j	.Lreturn
> +
> +.LloopByte:
> +	sltu	tmp, a0, a2
> +	beqz	tmp, .Lreturn
> +
> +	LD_CMP	lbu data0 data1 a0 a1 tmp 1
> +	beqz	tmp, .LloopByte
> +.Lreturn:
> +	mv	a0, tmp
> +	ret
> +END(memcmp)
> +EXPORT_SYMBOL(memcmp);
> -- 
> 2.17.1
>

Thanks, looks nice.

Reviewed-by: Andrew Jones <ajones at ventanamicro.com>



More information about the linux-riscv mailing list