[PATCH] riscv:lib: optimize memcmp with ld insn

Conor.Dooley at microchip.com Conor.Dooley at microchip.com
Thu Sep 1 08:40:06 PDT 2022


On 01/09/2022 14:53, Yipeng Zou wrote:
> EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe
> 
> 在 2022/8/31 21:16, Conor.Dooley at microchip.com 写道:
>> On 31/08/2022 14:07, Yipeng Zou wrote:
>>> riscv:lib: optimize memcmp with ld insn
>> Minor nit: "riscv: lib:
> ok
>>> EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe
>>>
>>> Currently memcmp was implemented in c code(lib/string.c), which compare
>>> memory per byte.
>>>
>>> This patch use ld insn compare memory per word to improve. From the test
>>> Results, this will take several times optimized.
>> Hey Yipeng,
>> Could you share some more information about the tests you did?
>> The test results showing the % improvement would be nice :)
>> Thanks,
>> Conor.
> 
> Ofcourse, But My board was not ready, So i just test this patch on qemu
> RV64 & RV32 .
> 
> Alloc 8,4,1KB buffer to compare, each loop 10k times.

I fixed that up since it was fairly unreadable..

Size(B) Min(ns) AVG(ns) //before

8k      40800   46316
4k      26500   32302
1k      15600   17965

Size(B) Min(ns) AVG(ns) //after

8k      16100   21281
4k      14200   16446
1k      12400   14316


I think putting this into the commit message would be nice.

I am no whizz on these kinds of things, but with the commit message
fixed up:
Reviewed-by: Conor Dooley <conor.dooley at microchip.com>

>>> Signed-off-by: Yipeng Zou <zouyipeng at huawei.com>
>>> ---
>>>    arch/riscv/include/asm/string.h |  3 ++
>>>    arch/riscv/lib/Makefile         |  1 +
>>>    arch/riscv/lib/memcmp.S         | 59 +++++++++++++++++++++++++++++++++
>>>    3 files changed, 63 insertions(+)
>>>    create mode 100644 arch/riscv/lib/memcmp.S
>>>
>>> diff --git a/arch/riscv/include/asm/string.h b/arch/riscv/include/asm/string.h
>>> index 909049366555..3337b43d3803 100644
>>> --- a/arch/riscv/include/asm/string.h
>>> +++ b/arch/riscv/include/asm/string.h
>>> @@ -18,6 +18,9 @@ extern asmlinkage void *__memcpy(void *, const void *, size_t);
>>>    #define __HAVE_ARCH_MEMMOVE
>>>    extern asmlinkage void *memmove(void *, const void *, size_t);
>>>    extern asmlinkage void *__memmove(void *, const void *, size_t);
>>> +#define __HAVE_ARCH_MEMCMP
>>> +extern int memcmp(const void *, const void *, size_t);
>>> +
>>>    /* For those files which don't want to check by kasan. */
>>>    #if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
>>>    #define memcpy(dst, src, len) __memcpy(dst, src, len)
>>> diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
>>> index 25d5c9664e57..70773bf0c471 100644
>>> --- a/arch/riscv/lib/Makefile
>>> +++ b/arch/riscv/lib/Makefile
>>> @@ -3,6 +3,7 @@ lib-y                   += delay.o
>>>    lib-y                  += memcpy.o
>>>    lib-y                  += memset.o
>>>    lib-y                  += memmove.o
>>> +lib-y                  += memcmp.o
>>>    lib-$(CONFIG_MMU)      += uaccess.o
>>>    lib-$(CONFIG_64BIT)    += tishift.o
>>>
>>> diff --git a/arch/riscv/lib/memcmp.S b/arch/riscv/lib/memcmp.S
>>> new file mode 100644
>>> index 000000000000..83af1c433e6f
>>> --- /dev/null
>>> +++ b/arch/riscv/lib/memcmp.S
>>> @@ -0,0 +1,59 @@
>>> +/* SPDX-License-Identifier: GPL-2.0-only */
>>> +/*
>>> + * Copyright (C) 2022 zouyipeng at huawei.com
>>> + */
>>> +#include <linux/linkage.h>
>>> +#include <asm-generic/export.h>
>>> +#include <asm/asm.h>
>>> +
>>> +/* argrments:
>>> +* a0: addr0
>>> +* a1: addr1
>>> +* a2: size
>>> +*/
>>> +#define addr0  a0
>>> +#define addr1  a1
>>> +#define limit  a2
>>> +
>>> +#define data0  a3
>>> +#define data1  a4
>>> +#define tmp    t3
>>> +#define aaddr  t4
>>> +#define return a0
>>> +
>>> +/* load and compare */
>>> +.macro LD_CMP op d0 d1 a0 a1 offset
>>> +       \op \d0, 0(\a0)
>>> +       \op \d1, 0(\a1)
>>> +       addi \a0, \a0, \offset
>>> +       addi \a1, \a1, \offset
>>> +       sub tmp, \d0, \d1
>>> +.endm
>>> +
>>> +ENTRY(memcmp)
>>> +       /* test limit aligend with SZREG */
>>> +       andi tmp, limit, SZREG - 1
>>> +       /* load tail */
>>> +       add aaddr, addr0, limit
>>> +       sub aaddr, aaddr, tmp
>>> +       add limit, addr0, limit
>>> +
>>> +.LloopWord:
>>> +       sltu tmp, addr0, aaddr
>>> +       beqz tmp, .LloopByte
>>> +
>>> +       LD_CMP REG_L data0 data1 addr0 addr1 SZREG
>>> +       beqz tmp, .LloopWord
>>> +       j .Lreturn
>>> +
>>> +.LloopByte:
>>> +       sltu tmp, addr0, limit
>>> +       beqz tmp, .Lreturn
>>> +
>>> +       LD_CMP lbu data0 data1 addr0 addr1 1
>>> +       beqz tmp, .LloopByte
>>> +.Lreturn:
>>> +       mv return, tmp
>>> +       ret
>>> +END(memcmp)
>>> +EXPORT_SYMBOL(memcmp);
>>> -- 
>>> 2.17.1
>>>
> -- 
> Regards,
> Yipeng Zou
> 



More information about the linux-riscv mailing list