[PATCH v4 1/2] riscv: lib: add memcmp() implementation

David Laight david.laight.linux at gmail.com
Tue May 19 02:09:37 PDT 2026


On Mon, 18 May 2026 15:14:06 +0200
Milan Tripkovic <milant2002 at gmail.com> wrote:

> From: Milan Tripkovic <Milan.Tripkovic at rt-rk.com>
> 
> Add an assembly implementation of memcmp() for RISC-V. The implementation
> uses the ZBB extension for word-at-a-time comparison and an assembly
> fallback for non-ZBB systems.

I think I mentioned before that the only ZBB bit I can see is the
byte reverse at the end needed to get the correct sign.
For non-ZBB it would be better to fall back to a byte compare
at that point.

Oh - and there should be change info for this patch in this email.

-- David

> 
> Benchmark results (QEMU TCG, rv64, Aligned):
> 
>   Len   | Default | NoZBB  | ZBB    | %NoZBB | %ZBB
>   ------|---------|--------|--------|--------|-------
>   1 B   | 20.3    | 25.0   | 20.9   | +23.2% | +3.0%
>   7 B   | 88.9    | 107.5  | 155.7  | +20.9% | +75.1%
>   8 B   | 89.6    | 110.9  | 176.2  | +23.8% | +96.7%
>   16 B  | 134.4   | 172.4  | 334.8  | +28.3% | +149.1%
>   31 B  | 163.5   | 220.5  | 606.2  | +34.9% | +270.8%
>   64 B  | 203.8   | 235.9  | 968.6  | +15.8% | +375.3%
>   127 B | 224.6   | 268.7  | 1362.8 | +19.6% | +506.8%
>   512 B | 235.7   | 271.1  | 1913.7 | +15.0% | +711.9%
>   1024 B| 256.8   | 290.6  | 2123.6 | +13.2% | +726.9%
>   4096 B| 263.8   | 302.9  | 2290.4 | +14.8% | +768.2%
> 
> Benchmark results (QEMU TCG, rv64, Unaligned - Offset 3):
> 
>   Len   | Default | NoZBB  | ZBB    | %NoZBB | %ZBB
>   ------|---------|--------|--------|--------|-------
>   1 B   | 20.7    | 21.7   | 21.5   | +4.8%  | +3.9%
>   7 B   | 96.2    | 99.1   | 96.9   | +3.0%  | +0.7%
>   8 B   | 97.5    | 118.5  | 110.5  | +21.5% | +13.3%
>   16 B  | 136.7   | 166.6  | 172.8  | +21.9% | +26.4%
>   31 B  | 167.6   | 206.5  | 211.9  | +23.2% | +26.4%
>   64 B  | 204.4   | 229.9  | 240.3  | +12.5% | +17.6%
>   127 B | 229.6   | 261.7  | 269.0  | +14.0% | +17.2%
>   512 B | 245.5   | 260.8  | 269.9  | +6.2%  | +9.9%
>   1024 B| 246.9   | 261.2  | 283.5  | +5.8%  | +14.8%
>   4096 B| 250.7   | 295.8  | 299.7  | +18.0% | +19.5%
> 
> Signed-off-by: Milan Tripkovic <Milan.Tripkovic at rt-rk.com>
> ---
>  arch/riscv/include/asm/string.h |   2 +
>  arch/riscv/lib/Makefile         |   1 +
>  arch/riscv/lib/memcmp.S         | 125 ++++++++++++++++++++++++++++++++
>  arch/riscv/purgatory/Makefile   |   5 +-
>  4 files changed, 132 insertions(+), 1 deletion(-)
>  create mode 100644 arch/riscv/lib/memcmp.S
> 
> diff --git a/arch/riscv/include/asm/string.h b/arch/riscv/include/asm/string.h
> index 764ffe8f6..5c5299678 100644
> --- a/arch/riscv/include/asm/string.h
> +++ b/arch/riscv/include/asm/string.h
> @@ -18,6 +18,8 @@ extern asmlinkage void *__memcpy(void *, const void *, size_t);
>  #define __HAVE_ARCH_MEMMOVE
>  extern asmlinkage void *memmove(void *, const void *, size_t);
>  extern asmlinkage void *__memmove(void *, const void *, size_t);
> +#define __HAVE_ARCH_MEMCMP
> +extern asmlinkage int memcmp(const void *, const void *, size_t);
>  
>  #if !(defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS))
>  #define __HAVE_ARCH_STRCMP
> diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
> index 6f767b2a3..b529e1be1 100644
> --- a/arch/riscv/lib/Makefile
> +++ b/arch/riscv/lib/Makefile
> @@ -3,6 +3,7 @@ lib-y			+= delay.o
>  lib-y			+= memcpy.o
>  lib-y			+= memset.o
>  lib-y			+= memmove.o
> +lib-y			+= memcmp.o
>  ifeq ($(CONFIG_KASAN_GENERIC)$(CONFIG_KASAN_SW_TAGS),)
>  lib-y			+= strcmp.o
>  lib-y			+= strlen.o
> diff --git a/arch/riscv/lib/memcmp.S b/arch/riscv/lib/memcmp.S
> new file mode 100644
> index 000000000..a531e481c
> --- /dev/null
> +++ b/arch/riscv/lib/memcmp.S
> @@ -0,0 +1,125 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +
> +#include <linux/linkage.h>
> +#include <asm/asm.h>
> +#include <asm/alternative-macros.h>
> +#include <asm/hwcap.h>
> +
> +/* int memcmp(const void *cs, const void *ct, size_t n) */
> +SYM_FUNC_START(memcmp)
> +
> +	__ALTERNATIVE_CFG("nop", "j memcmp_zbb", 0, RISCV_ISA_EXT_ZBB,
> +		IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB))
> +/*
> + * Parameters
> + *	a0 - Pointer to first memory block (cs), also return value
> + *	a1 - Pointer to second memory block (ct)
> + *	a2 - Number of bytes to compare (n), transformed to end pointer (a0 + n)
> + *
> + * Returns
> + *	a0 - 0 if equal, positive if cs > ct, negative if cs < ct
> + *
> + * Clobbers
> + *	t0, t1
> + */
> +	beqz	a2, 2f
> +	add	a2, a0, a2
> +1:
> +	lbu	t0, 0(a0)
> +	lbu	t1, 0(a1)
> +	bne	t0, t1, 3f
> +	addi	a0, a0, 1
> +	addi	a1, a1, 1
> +	bne	a0, a2, 1b
> +2:
> +	li	a0, 0
> +	ret
> +3:
> +	sub	a0, t0, t1
> +	ret
> +
> +#if defined(CONFIG_RISCV_ISA_ZBB) && defined(CONFIG_TOOLCHAIN_HAS_ZBB)
> +memcmp_zbb:
> +
> +.option push
> +.option arch,+zbb
> +/*
> + * Parameters
> + *	a0 - Pointer to first memory block (cs), also return value
> + *	a1 - Pointer to second memory block (ct)
> + *	a2 - Number of bytes to compare (n), decremented during loop
> + *
> + * Returns
> + *	a0 - 0 if equal, positive if cs > ct, negative if cs < ct
> + *
> + * Clobbers
> + *	t0, t1, t2, t3, t4
> + */
> +	add	t3, a0, a2
> +	or	t0, a0, a1
> +	andi	t0, t0, (SZREG - 1)
> +	bnez	t0, 5f
> +
> +	addi	t4, t3, -SZREG
> +	bltu	t4, a0, 7f
> +
> +1:
> +	REG_L	t1, 0(a0)
> +	REG_L	t2, 0(a1)
> +	bne	t1, t2, 2f
> +	addi	a0, a0, SZREG
> +	addi	a1, a1, SZREG
> +	bleu	a0, t4, 1b
> +
> +7:
> +	beq	a0, t3, 4f
> +	REG_L	t1, 0(a0)
> +	REG_L	t2, 0(a1)
> +
> +	sub	t0, t3, a0
> +	li	t4, SZREG
> +	sub	t0, t4, t0
> +	slli	t0, t0, 3
> +
> +#ifndef CONFIG_CPU_BIG_ENDIAN
> +	rev8	t1, t1
> +	rev8	t2, t2
> +#endif
> +	srl	t1, t1, t0
> +	srl	t2, t2, t0
> +
> +	bne	t1, t2, 8f
> +	li	a0, 0
> +	ret
> +5:
> +	beq	a0, t3, 4f
> +6:
> +	lbu	t1, 0(a0)
> +	lbu	t2, 0(a1)
> +	bne	t1, t2, 3f
> +	addi	a0, a0, 1
> +	addi	a1, a1, 1
> +	bne	a0, t3, 6b
> +
> +4:	li	a0, 0
> +	ret
> +2:
> +#ifndef CONFIG_CPU_BIG_ENDIAN
> +	rev8	t1, t1
> +	rev8	t2, t2
> +#endif
> +8:
> +	sltu	a0, t2, t1
> +	sltu	t0, t1, t2
> +	sub	a0, a0, t0
> +	ret
> +
> +3:
> +	sub	a0, t1, t2
> +	ret
> +
> +.option pop
> +#endif
> +SYM_FUNC_END(memcmp)
> +SYM_FUNC_ALIAS(__pi_memcmp, memcmp)
> +EXPORT_SYMBOL(memcmp)
> diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile
> index b0358a78f..456929971 100644
> --- a/arch/riscv/purgatory/Makefile
> +++ b/arch/riscv/purgatory/Makefile
> @@ -1,6 +1,6 @@
>  # SPDX-License-Identifier: GPL-2.0
>  
> -purgatory-y := purgatory.o sha256.o entry.o string.o ctype.o memcpy.o memset.o
> +purgatory-y := purgatory.o sha256.o entry.o string.o ctype.o memcpy.o memset.o memcmp.o
>  ifeq ($(CONFIG_KASAN_GENERIC)$(CONFIG_KASAN_SW_TAGS),)
>  purgatory-y += strcmp.o strlen.o strncmp.o strnlen.o strchr.o strrchr.o
>  endif
> @@ -41,6 +41,9 @@ $(obj)/strchr.o: $(srctree)/arch/riscv/lib/strchr.S FORCE
>  $(obj)/strrchr.o: $(srctree)/arch/riscv/lib/strrchr.S FORCE
>  	$(call if_changed_rule,as_o_S)
>  
> +$(obj)/memcmp.o: $(srctree)/arch/riscv/lib/memcmp.S FORCE
> +	$(call if_changed_rule,as_o_S)
> +
>  CFLAGS_sha256.o := -D__DISABLE_EXPORTS -D__NO_FORTIFY
>  CFLAGS_string.o := -D__DISABLE_EXPORTS
>  CFLAGS_ctype.o := -D__DISABLE_EXPORTS




More information about the linux-riscv mailing list