[v8, 06/10] riscv: lib: add vectorized mem* routines

Charlie Jenkins charlie at rivosinc.com
Tue Dec 26 17:42:25 PST 2023


On Sat, Dec 23, 2023 at 04:29:10AM +0000, Andy Chiu wrote:
> Provide vectorized memcpy/memset/memmove to accelerate common memory
> operations. Also, group them into V_OPT_TEMPLATE3 macro because their
> setup/tear-down and fallback logics are the same.
> 
> The optimal size for the kernel to preference Vector over scalar,
> riscv_v_mem*_threshold, is only a heuristic for now. We can add DT
> parsing if people feel the need of customizing it.
> 
> The original implementation of Vector operations comes from
> https://github.com/sifive/sifive-libc, which we agree to contribute to
> Linux kernel.
> 
> Signed-off-by: Andy Chiu <andy.chiu at sifive.com>
> ---
> Changelog v7:
>  - add __NO_FORTIFY to prevent conflicting function declaration with
>    macro for mem* functions.
> Changelog v6:
>  - provide kconfig to set threshold for vectorized functions (Charlie)
>  - rename *thres to *threshold (Charlie)
> Changelog v4:
>  - new patch since v4
> ---
>  arch/riscv/Kconfig               | 24 ++++++++++++++++
>  arch/riscv/lib/Makefile          |  3 ++
>  arch/riscv/lib/memcpy_vector.S   | 29 +++++++++++++++++++
>  arch/riscv/lib/memmove_vector.S  | 49 ++++++++++++++++++++++++++++++++
>  arch/riscv/lib/memset_vector.S   | 33 +++++++++++++++++++++
>  arch/riscv/lib/riscv_v_helpers.c | 26 +++++++++++++++++
>  6 files changed, 164 insertions(+)
>  create mode 100644 arch/riscv/lib/memcpy_vector.S
>  create mode 100644 arch/riscv/lib/memmove_vector.S
>  create mode 100644 arch/riscv/lib/memset_vector.S
> 
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index 3c5ba05e8a2d..cba53dcc2ae0 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -533,6 +533,30 @@ config RISCV_ISA_V_UCOPY_THRESHOLD
>  	  Prefer using vectorized copy_to_user()/copy_from_user() when the
>  	  workload size exceeds this value.
>  
> +config RISCV_ISA_V_MEMSET_THRESHOLD
> +	int "Threshold size for vectorized memset()"
> +	depends on RISCV_ISA_V
> +	default 1280
> +	help
> +	  Prefer using vectorized memset() when the workload size exceeds this
> +	  value.
> +
> +config RISCV_ISA_V_MEMCPY_THRESHOLD
> +	int "Threshold size for vectorized memcpy()"
> +	depends on RISCV_ISA_V
> +	default 768
> +	help
> +	  Prefer using vectorized memcpy() when the workload size exceeds this
> +	  value.
> +
> +config RISCV_ISA_V_MEMMOVE_THRESHOLD
> +	int "Threshold size for vectorized memmove()"
> +	depends on RISCV_ISA_V
> +	default 512
> +	help
> +	  Prefer using vectorized memmove() when the workload size exceeds this
> +	  value.
> +
>  config TOOLCHAIN_HAS_ZBB
>  	bool
>  	default y
> diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
> index c8a6787d5827..d389dbf285fe 100644
> --- a/arch/riscv/lib/Makefile
> +++ b/arch/riscv/lib/Makefile
> @@ -16,3 +16,6 @@ lib-$(CONFIG_RISCV_ISA_ZICBOZ)	+= clear_page.o
>  obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
>  lib-$(CONFIG_RISCV_ISA_V)	+= xor.o
>  lib-$(CONFIG_RISCV_ISA_V)	+= riscv_v_helpers.o
> +lib-$(CONFIG_RISCV_ISA_V)	+= memset_vector.o
> +lib-$(CONFIG_RISCV_ISA_V)	+= memcpy_vector.o
> +lib-$(CONFIG_RISCV_ISA_V)	+= memmove_vector.o
> diff --git a/arch/riscv/lib/memcpy_vector.S b/arch/riscv/lib/memcpy_vector.S
> new file mode 100644
> index 000000000000..4176b6e0a53c
> --- /dev/null
> +++ b/arch/riscv/lib/memcpy_vector.S
> @@ -0,0 +1,29 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +
> +#include <linux/linkage.h>
> +#include <asm/asm.h>
> +
> +#define pDst a0
> +#define pSrc a1
> +#define iNum a2
> +
> +#define iVL a3
> +#define pDstPtr a4
> +
> +#define ELEM_LMUL_SETTING m8
> +#define vData v0
> +
> +
> +/* void *memcpy(void *, const void *, size_t) */
> +SYM_FUNC_START(__asm_memcpy_vector)
> +	mv pDstPtr, pDst
> +loop:
> +	vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
> +	vle8.v vData, (pSrc)
> +	sub iNum, iNum, iVL
> +	add pSrc, pSrc, iVL
> +	vse8.v vData, (pDstPtr)
> +	add pDstPtr, pDstPtr, iVL
> +	bnez iNum, loop
> +	ret
> +SYM_FUNC_END(__asm_memcpy_vector)
> diff --git a/arch/riscv/lib/memmove_vector.S b/arch/riscv/lib/memmove_vector.S
> new file mode 100644
> index 000000000000..4cea9d244dc9
> --- /dev/null
> +++ b/arch/riscv/lib/memmove_vector.S
> @@ -0,0 +1,49 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +#include <linux/linkage.h>
> +#include <asm/asm.h>
> +
> +#define pDst a0
> +#define pSrc a1
> +#define iNum a2
> +
> +#define iVL a3
> +#define pDstPtr a4
> +#define pSrcBackwardPtr a5
> +#define pDstBackwardPtr a6
> +
> +#define ELEM_LMUL_SETTING m8
> +#define vData v0
> +
> +SYM_FUNC_START(__asm_memmove_vector)
> +
> +    mv pDstPtr, pDst
> +
> +    bgeu pSrc, pDst, forward_copy_loop
> +    add pSrcBackwardPtr, pSrc, iNum
> +    add pDstBackwardPtr, pDst, iNum
> +    bltu pDst, pSrcBackwardPtr, backward_copy_loop
> +
> +forward_copy_loop:
> +    vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
> +
> +    vle8.v vData, (pSrc)
> +    sub iNum, iNum, iVL
> +    add pSrc, pSrc, iVL
> +    vse8.v vData, (pDstPtr)
> +    add pDstPtr, pDstPtr, iVL
> +
> +    bnez iNum, forward_copy_loop
> +    ret
> +
> +backward_copy_loop:
> +    vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
> +
> +    sub pSrcBackwardPtr, pSrcBackwardPtr, iVL
> +    vle8.v vData, (pSrcBackwardPtr)
> +    sub iNum, iNum, iVL
> +    sub pDstBackwardPtr, pDstBackwardPtr, iVL
> +    vse8.v vData, (pDstBackwardPtr)
> +    bnez iNum, backward_copy_loop
> +    ret
> +
> +SYM_FUNC_END(__asm_memmove_vector)
> diff --git a/arch/riscv/lib/memset_vector.S b/arch/riscv/lib/memset_vector.S
> new file mode 100644
> index 000000000000..4611feed72ac
> --- /dev/null
> +++ b/arch/riscv/lib/memset_vector.S
> @@ -0,0 +1,33 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +#include <linux/linkage.h>
> +#include <asm/asm.h>
> +
> +#define pDst a0
> +#define iValue a1
> +#define iNum a2
> +
> +#define iVL a3
> +#define iTemp a4
> +#define pDstPtr a5
> +
> +#define ELEM_LMUL_SETTING m8
> +#define vData v0
> +
> +/* void *memset(void *, int, size_t) */
> +SYM_FUNC_START(__asm_memset_vector)
> +
> +    mv pDstPtr, pDst
> +
> +    vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
> +    vmv.v.x vData, iValue
> +
> +loop:
> +    vse8.v vData, (pDstPtr)
> +    sub iNum, iNum, iVL
> +    add pDstPtr, pDstPtr, iVL
> +    vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
> +    bnez iNum, loop
> +
> +    ret
> +
> +SYM_FUNC_END(__asm_memset_vector)
> diff --git a/arch/riscv/lib/riscv_v_helpers.c b/arch/riscv/lib/riscv_v_helpers.c
> index 6cac8f4e69e9..c62f333ba557 100644
> --- a/arch/riscv/lib/riscv_v_helpers.c
> +++ b/arch/riscv/lib/riscv_v_helpers.c
> @@ -3,9 +3,13 @@
>   * Copyright (C) 2023 SiFive
>   * Author: Andy Chiu <andy.chiu at sifive.com>
>   */
> +#ifndef __NO_FORTIFY
> +# define __NO_FORTIFY
> +#endif
>  #include <linux/linkage.h>
>  #include <asm/asm.h>
>  
> +#include <asm/string.h>
>  #include <asm/vector.h>
>  #include <asm/simd.h>
>  
> @@ -42,3 +46,25 @@ asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n)
>  	return fallback_scalar_usercopy(dst, src, n);
>  }
>  #endif
> +
> +#define V_OPT_TEMPLATE3(prefix, type_r, type_0, type_1)				\
> +extern type_r __asm_##prefix##_vector(type_0, type_1, size_t n);		\
> +type_r prefix(type_0 a0, type_1 a1, size_t n)					\
> +{										\
> +	type_r ret;								\
> +	if (has_vector() && may_use_simd() &&					\
> +	    n > riscv_v_##prefix##_threshold) {					\
> +		kernel_vector_begin();						\
> +		ret = __asm_##prefix##_vector(a0, a1, n);			\
> +		kernel_vector_end();						\
> +		return ret;							\
> +	}									\
> +	return __##prefix(a0, a1, n);						\
> +}
> +
> +static size_t riscv_v_memset_threshold = CONFIG_RISCV_ISA_V_MEMSET_THRESHOLD;
> +V_OPT_TEMPLATE3(memset, void *, void*, int)
> +static size_t riscv_v_memcpy_threshold = CONFIG_RISCV_ISA_V_MEMCPY_THRESHOLD;
> +V_OPT_TEMPLATE3(memcpy, void *, void*, const void *)
> +static size_t riscv_v_memmove_threshold = CONFIG_RISCV_ISA_V_MEMMOVE_THRESHOLD;
> +V_OPT_TEMPLATE3(memmove, void *, void*, const void *)
> -- 
> 2.17.1
> 

Thank you for adding the kconfigs for the thresholds.

Reviewed-by: Charlie Jenkins <charlie at rivosinc.com>




More information about the linux-riscv mailing list