[PATCH 1/2] riscv: lib: add memcmp() implementation
David Laight
david.laight.linux at gmail.com
Wed May 13 02:49:01 PDT 2026
On Tue, 12 May 2026 16:10:06 +0200
Milan Tripkovic <milant2002 at gmail.com> wrote:
> From: Milan Tripkovic <Milan.Tripkovic at rt-rk.com>
>
> Add an assembly implementation of memcmp() for RISC-V. The implementation
> uses the ZBB extension for word-at-a-time comparison and an assembly
> fallback for non-ZBB systems.
>
> Benchmark results (QEMU TCG, rv64):
>
> Len | Def | NoZBB | ZBB | %NoZBB | %ZBB
> -----|-------|-------|-------|--------|-------
> 1 B | 22.4 | 24.6 | 23.2 | +9.8% | +3.5%
> 7 B | 96.9 | 108.5 | 107.3 | +12.0% | +10.7%
> 8 B | 107.0 | 116.3 | 176.7 | +8.7% | +65.1%
> 16 B | 148.4 | 172.8 | 315.6 | +16.4% | +112.6%
> 31 B | 182.2 | 217.1 | 377.6 | +19.2% | +107.2%
> 64 B | 220.6 | 239.4 | 874.2 | +8.5% | +296.2%
> 127 B| 213.7 | 254.8 | 1042.9| +19.2% | +388.0%
> 512 B| 255.1 | 269.0 | 1778.6| +5.4% | +597.2%
> 1024B| 252.3 | 280.9 | 1887.7| +11.3% | +648.1%
> 3173B| 241.3 | 288.7 | 2063.2| +19.6% | +755.0%
> 4096B| 240.9 | 280.5 | 2064.5| +16.4% | +756.9%
>
> Signed-off-by: Milan Tripkovic <Milan.Tripkovic at rt-rk.com>
> ---
> arch/riscv/include/asm/string.h | 2 +
> arch/riscv/lib/Makefile | 1 +
> arch/riscv/lib/memcmp.S | 103 ++++++++++++++++++++++++++++++++
> arch/riscv/purgatory/Makefile | 5 +-
> 4 files changed, 110 insertions(+), 1 deletion(-)
> create mode 100644 arch/riscv/lib/memcmp.S
>
> diff --git a/arch/riscv/include/asm/string.h b/arch/riscv/include/asm/string.h
> index 764ffe8f6..5c5299678 100644
> --- a/arch/riscv/include/asm/string.h
> +++ b/arch/riscv/include/asm/string.h
> @@ -18,6 +18,8 @@ extern asmlinkage void *__memcpy(void *, const void *, size_t);
> #define __HAVE_ARCH_MEMMOVE
> extern asmlinkage void *memmove(void *, const void *, size_t);
> extern asmlinkage void *__memmove(void *, const void *, size_t);
> +#define __HAVE_ARCH_MEMCMP
> +extern asmlinkage int memcmp(const void *, const void *, size_t);
>
> #if !(defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS))
> #define __HAVE_ARCH_STRCMP
> diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
> index 6f767b2a3..b529e1be1 100644
> --- a/arch/riscv/lib/Makefile
> +++ b/arch/riscv/lib/Makefile
> @@ -3,6 +3,7 @@ lib-y += delay.o
> lib-y += memcpy.o
> lib-y += memset.o
> lib-y += memmove.o
> +lib-y += memcmp.o
> ifeq ($(CONFIG_KASAN_GENERIC)$(CONFIG_KASAN_SW_TAGS),)
> lib-y += strcmp.o
> lib-y += strlen.o
> diff --git a/arch/riscv/lib/memcmp.S b/arch/riscv/lib/memcmp.S
> new file mode 100644
> index 000000000..444b082d9
> --- /dev/null
> +++ b/arch/riscv/lib/memcmp.S
> @@ -0,0 +1,103 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +
> +#include <linux/linkage.h>
> +#include <asm/asm.h>
> +#include <asm/alternative-macros.h>
> +#include <asm/hwcap.h>
> +
> +/* int memcmp(const void *cs, const void *ct, size_t n) */
> +SYM_FUNC_START(memcmp)
> +
> + __ALTERNATIVE_CFG("nop", "j memcmp_zbb", 0, RISCV_ISA_EXT_ZBB,
> + IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB))
> +/*
> + * Parameters
> + * a0 - Pointer to first memory block (cs), also return value
> + * a1 - Pointer to second memory block (ct)
> + * a2 - Number of bytes to compare (n), transformed to end pointer (a0 + n)
> + *
> + * Returns
> + * a0 - 0 if equal, positive if cs > ct, negative if cs < ct
> + *
> + * Clobbers
> + * t0, t1
> + */
> + beqz a2, 2f
> + add a2, a0, a2
> +1:
> + lbu t0, 0(a0)
> + lbu t1, 0(a1)
> + bne t0, t1, 3f
> + addi a0, a0, 1
> + addi a1, a1, 1
> + bne a0, a2, 1b
> +2:
> + li a0, 0
> + ret
> +3:
> + sub a0, t0, t1
> + ret
> +
> +
> +memcmp_zbb:
> +.option push
> +.option arch,+zbb
> +/*
> + * Parameters
> + * a0 - Pointer to first memory block (cs), also return value
> + * a1 - Pointer to second memory block (ct)
> + * a2 - Number of bytes to compare (n), decremented during loop
> + *
> + * Returns
> + * a0 - 0 if equal, positive if cs > ct, negative if cs < ct
> + *
> + * Clobbers
> + * t0, t1, t2
> + */
> + beq a0, a1, 4f
There is no point optimising for equal pointers.
> +
> + li t0, SZREG
> + bltu a2, t0, 5f
> +
> +1:
> + REG_L t1, 0(a0)
> + REG_L t2, 0(a1)
Aren't there some systems where misaligned reads are very expensive?
You might want to fall back to byte compares for misaligned buffers.
> + bne t1, t2, 2f
> +
> + addi a0, a0, SZREG
> + addi a1, a1, SZREG
> + addi a2, a2, -SZREG
> + bgeu a2, t0, 1b
You've a loop with two comparisons it in.
Move the length one to the top and the check before the loop shouldn't
be needed.
If you calculate the end address of one of the buffers you only
need two increments in the loop, not three.
You might need to access -SZREG(a0) to get the data.
> +
> +5:
> + beqz a2, 4f
If a0 and a1 are aligned you can read the next full word,
shift right (LE, left BE) and then compare.
> +6:
> + lbu t1, 0(a0)
> + lbu t2, 0(a1)
> + bne t1, t2, 3f
> + addi a0, a0, 1
> + addi a1, a1, 1
> + addi a2, a2, -1
> + bnez a2, 6b
> +
> +4: li a0, 0
> + ret
> +2:
> +#ifndef CONFIG_CPU_BIG_ENDIAN
> + rev8 t1, t1
> + rev8 t2, t2
> +#endif
That looks like the only bit that needs zbb?
Is BIG_ENDIAN common enough to actually worry about?
You could just fall back to byte accesses (rereading memory) on BE.
-- David
> + sltu a0, t2, t1
> + sltu t0, t1, t2
> + sub a0, a0, t0
> + ret
> +
> +3:
> + sub a0, t1, t2
> + ret
> +
> +.option pop
> +
> +SYM_FUNC_END(memcmp)
> +SYM_FUNC_ALIAS(__pi_memcmp, memcmp)
> +EXPORT_SYMBOL(memcmp)
> diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile
> index b0358a78f..456929971 100644
> --- a/arch/riscv/purgatory/Makefile
> +++ b/arch/riscv/purgatory/Makefile
> @@ -1,6 +1,6 @@
> # SPDX-License-Identifier: GPL-2.0
>
> -purgatory-y := purgatory.o sha256.o entry.o string.o ctype.o memcpy.o memset.o
> +purgatory-y := purgatory.o sha256.o entry.o string.o ctype.o memcpy.o memset.o memcmp.o
> ifeq ($(CONFIG_KASAN_GENERIC)$(CONFIG_KASAN_SW_TAGS),)
> purgatory-y += strcmp.o strlen.o strncmp.o strnlen.o strchr.o strrchr.o
> endif
> @@ -41,6 +41,9 @@ $(obj)/strchr.o: $(srctree)/arch/riscv/lib/strchr.S FORCE
> $(obj)/strrchr.o: $(srctree)/arch/riscv/lib/strrchr.S FORCE
> $(call if_changed_rule,as_o_S)
>
> +$(obj)/memcmp.o: $(srctree)/arch/riscv/lib/memcmp.S FORCE
> + $(call if_changed_rule,as_o_S)
> +
> CFLAGS_sha256.o := -D__DISABLE_EXPORTS -D__NO_FORTIFY
> CFLAGS_string.o := -D__DISABLE_EXPORTS
> CFLAGS_ctype.o := -D__DISABLE_EXPORTS
More information about the linux-riscv
mailing list