[PATCH] riscv:lib: optimize memcmp with ld insn

Yipeng Zou zouyipeng at huawei.com
Fri Sep 2 03:32:24 PDT 2022


在 2022/9/1 23:40, Conor.Dooley at microchip.com 写道:
> On 01/09/2022 14:53, Yipeng Zou wrote:
>> EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe
>>
>> 在 2022/8/31 21:16, Conor.Dooley at microchip.com 写道:
>>> On 31/08/2022 14:07, Yipeng Zou wrote:
>>>> riscv:lib: optimize memcmp with ld insn
>>> Minor nit: "riscv: lib:
>> ok
>>>> EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe
>>>>
>>>> Currently memcmp was implemented in c code(lib/string.c), which compare
>>>> memory per byte.
>>>>
>>>> This patch use ld insn compare memory per word to improve. From the test
>>>> Results, this will take several times optimized.
>>> Hey Yipeng,
>>> Could you share some more information about the tests you did?
>>> The test results showing the % improvement would be nice :)
>>> Thanks,
>>> Conor.
>> Ofcourse, But My board was not ready, So i just test this patch on qemu
>> RV64 & RV32 .
>>
>> Alloc 8,4,1KB buffer to compare, each loop 10k times.
> I fixed that up since it was fairly unreadable..
>
> Size(B) Min(ns) AVG(ns) //before
>
> 8k      40800   46316
> 4k      26500   32302
> 1k      15600   17965
>
> Size(B) Min(ns) AVG(ns) //after
>
> 8k      16100   21281
> 4k      14200   16446
> 1k      12400   14316
Sorry, the text format is broken and thank you very much for fix that up.
>
> I think putting this into the commit message would be nice.
>
> I am no whizz on these kinds of things, but with the commit message
> fixed up:
> Reviewed-by: Conor Dooley <conor.dooley at microchip.com>
Will putting this in v2 and thanks again.
>>>> Signed-off-by: Yipeng Zou <zouyipeng at huawei.com>
>>>> ---
>>>>     arch/riscv/include/asm/string.h |  3 ++
>>>>     arch/riscv/lib/Makefile         |  1 +
>>>>     arch/riscv/lib/memcmp.S         | 59 +++++++++++++++++++++++++++++++++
>>>>     3 files changed, 63 insertions(+)
>>>>     create mode 100644 arch/riscv/lib/memcmp.S
>>>>
>>>> diff --git a/arch/riscv/include/asm/string.h b/arch/riscv/include/asm/string.h
>>>> index 909049366555..3337b43d3803 100644
>>>> --- a/arch/riscv/include/asm/string.h
>>>> +++ b/arch/riscv/include/asm/string.h
>>>> @@ -18,6 +18,9 @@ extern asmlinkage void *__memcpy(void *, const void *, size_t);
>>>>     #define __HAVE_ARCH_MEMMOVE
>>>>     extern asmlinkage void *memmove(void *, const void *, size_t);
>>>>     extern asmlinkage void *__memmove(void *, const void *, size_t);
>>>> +#define __HAVE_ARCH_MEMCMP
>>>> +extern int memcmp(const void *, const void *, size_t);
>>>> +
>>>>     /* For those files which don't want to check by kasan. */
>>>>     #if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
>>>>     #define memcpy(dst, src, len) __memcpy(dst, src, len)
>>>> diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
>>>> index 25d5c9664e57..70773bf0c471 100644
>>>> --- a/arch/riscv/lib/Makefile
>>>> +++ b/arch/riscv/lib/Makefile
>>>> @@ -3,6 +3,7 @@ lib-y                   += delay.o
>>>>     lib-y                  += memcpy.o
>>>>     lib-y                  += memset.o
>>>>     lib-y                  += memmove.o
>>>> +lib-y                  += memcmp.o
>>>>     lib-$(CONFIG_MMU)      += uaccess.o
>>>>     lib-$(CONFIG_64BIT)    += tishift.o
>>>>
>>>> diff --git a/arch/riscv/lib/memcmp.S b/arch/riscv/lib/memcmp.S
>>>> new file mode 100644
>>>> index 000000000000..83af1c433e6f
>>>> --- /dev/null
>>>> +++ b/arch/riscv/lib/memcmp.S
>>>> @@ -0,0 +1,59 @@
>>>> +/* SPDX-License-Identifier: GPL-2.0-only */
>>>> +/*
>>>> + * Copyright (C) 2022 zouyipeng at huawei.com
>>>> + */
>>>> +#include <linux/linkage.h>
>>>> +#include <asm-generic/export.h>
>>>> +#include <asm/asm.h>
>>>> +
>>>> +/* argrments:
>>>> +* a0: addr0
>>>> +* a1: addr1
>>>> +* a2: size
>>>> +*/
>>>> +#define addr0  a0
>>>> +#define addr1  a1
>>>> +#define limit  a2
>>>> +
>>>> +#define data0  a3
>>>> +#define data1  a4
>>>> +#define tmp    t3
>>>> +#define aaddr  t4
>>>> +#define return a0
>>>> +
>>>> +/* load and compare */
>>>> +.macro LD_CMP op d0 d1 a0 a1 offset
>>>> +       \op \d0, 0(\a0)
>>>> +       \op \d1, 0(\a1)
>>>> +       addi \a0, \a0, \offset
>>>> +       addi \a1, \a1, \offset
>>>> +       sub tmp, \d0, \d1
>>>> +.endm
>>>> +
>>>> +ENTRY(memcmp)
>>>> +       /* test limit aligend with SZREG */
>>>> +       andi tmp, limit, SZREG - 1
>>>> +       /* load tail */
>>>> +       add aaddr, addr0, limit
>>>> +       sub aaddr, aaddr, tmp
>>>> +       add limit, addr0, limit
>>>> +
>>>> +.LloopWord:
>>>> +       sltu tmp, addr0, aaddr
>>>> +       beqz tmp, .LloopByte
>>>> +
>>>> +       LD_CMP REG_L data0 data1 addr0 addr1 SZREG
>>>> +       beqz tmp, .LloopWord
>>>> +       j .Lreturn
>>>> +
>>>> +.LloopByte:
>>>> +       sltu tmp, addr0, limit
>>>> +       beqz tmp, .Lreturn
>>>> +
>>>> +       LD_CMP lbu data0 data1 addr0 addr1 1
>>>> +       beqz tmp, .LloopByte
>>>> +.Lreturn:
>>>> +       mv return, tmp
>>>> +       ret
>>>> +END(memcmp)
>>>> +EXPORT_SYMBOL(memcmp);
>>>> -- 
>>>> 2.17.1
>>>>
>> -- 
>> Regards,
>> Yipeng Zou
>>
-- 
Regards,
Yipeng Zou




More information about the linux-riscv mailing list