[v5, 6/6] riscv: lib: add vectorized mem* routines
Charlie Jenkins
charlie at rivosinc.com
Fri Dec 15 11:56:52 PST 2023
On Thu, Dec 14, 2023 at 03:57:21PM +0000, Andy Chiu wrote:
> Provide vectorized memcpy/memset/memmove to accelerate common memory
> operations. Also, group them into V_OPT_TEMPLATE3 macro because their
> setup/tear-down and fallback logics are the same.
>
> The original implementation of Vector operations comes from
> https://github.com/sifive/sifive-libc, which we agree to contribute to
> Linux kernel.
>
> Signed-off-by: Andy Chiu <andy.chiu at sifive.com>
> ---
> Changelog v4:
> - new patch since v4
> ---
> arch/riscv/lib/Makefile | 3 ++
> arch/riscv/lib/memcpy_vector.S | 29 +++++++++++++++++++
> arch/riscv/lib/memmove_vector.S | 49 ++++++++++++++++++++++++++++++++
> arch/riscv/lib/memset_vector.S | 33 +++++++++++++++++++++
> arch/riscv/lib/riscv_v_helpers.c | 21 ++++++++++++++
> 5 files changed, 135 insertions(+)
> create mode 100644 arch/riscv/lib/memcpy_vector.S
> create mode 100644 arch/riscv/lib/memmove_vector.S
> create mode 100644 arch/riscv/lib/memset_vector.S
>
> diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
> index 1fe8d797e0f2..3111863afd2e 100644
> --- a/arch/riscv/lib/Makefile
> +++ b/arch/riscv/lib/Makefile
> @@ -14,3 +14,6 @@ obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
> lib-$(CONFIG_RISCV_ISA_V) += xor.o
> lib-$(CONFIG_RISCV_ISA_V) += riscv_v_helpers.o
> lib-$(CONFIG_RISCV_ISA_V) += uaccess_vector.o
> +lib-$(CONFIG_RISCV_ISA_V) += memset_vector.o
> +lib-$(CONFIG_RISCV_ISA_V) += memcpy_vector.o
> +lib-$(CONFIG_RISCV_ISA_V) += memmove_vector.o
> diff --git a/arch/riscv/lib/memcpy_vector.S b/arch/riscv/lib/memcpy_vector.S
> new file mode 100644
> index 000000000000..4176b6e0a53c
> --- /dev/null
> +++ b/arch/riscv/lib/memcpy_vector.S
> @@ -0,0 +1,29 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +
> +#include <linux/linkage.h>
> +#include <asm/asm.h>
> +
> +#define pDst a0
> +#define pSrc a1
> +#define iNum a2
> +
> +#define iVL a3
> +#define pDstPtr a4
> +
> +#define ELEM_LMUL_SETTING m8
> +#define vData v0
> +
> +
> +/* void *memcpy(void *, const void *, size_t) */
> +SYM_FUNC_START(__asm_memcpy_vector)
> + mv pDstPtr, pDst
> +loop:
> + vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
> + vle8.v vData, (pSrc)
> + sub iNum, iNum, iVL
> + add pSrc, pSrc, iVL
> + vse8.v vData, (pDstPtr)
> + add pDstPtr, pDstPtr, iVL
> + bnez iNum, loop
> + ret
> +SYM_FUNC_END(__asm_memcpy_vector)
> diff --git a/arch/riscv/lib/memmove_vector.S b/arch/riscv/lib/memmove_vector.S
> new file mode 100644
> index 000000000000..4cea9d244dc9
> --- /dev/null
> +++ b/arch/riscv/lib/memmove_vector.S
> @@ -0,0 +1,49 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +#include <linux/linkage.h>
> +#include <asm/asm.h>
> +
> +#define pDst a0
> +#define pSrc a1
> +#define iNum a2
> +
> +#define iVL a3
> +#define pDstPtr a4
> +#define pSrcBackwardPtr a5
> +#define pDstBackwardPtr a6
> +
> +#define ELEM_LMUL_SETTING m8
> +#define vData v0
> +
> +SYM_FUNC_START(__asm_memmove_vector)
> +
> + mv pDstPtr, pDst
> +
> + bgeu pSrc, pDst, forward_copy_loop
> + add pSrcBackwardPtr, pSrc, iNum
> + add pDstBackwardPtr, pDst, iNum
> + bltu pDst, pSrcBackwardPtr, backward_copy_loop
> +
> +forward_copy_loop:
> + vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
> +
> + vle8.v vData, (pSrc)
> + sub iNum, iNum, iVL
> + add pSrc, pSrc, iVL
> + vse8.v vData, (pDstPtr)
> + add pDstPtr, pDstPtr, iVL
> +
> + bnez iNum, forward_copy_loop
> + ret
> +
> +backward_copy_loop:
> + vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
> +
> + sub pSrcBackwardPtr, pSrcBackwardPtr, iVL
> + vle8.v vData, (pSrcBackwardPtr)
> + sub iNum, iNum, iVL
> + sub pDstBackwardPtr, pDstBackwardPtr, iVL
> + vse8.v vData, (pDstBackwardPtr)
> + bnez iNum, backward_copy_loop
> + ret
> +
> +SYM_FUNC_END(__asm_memmove_vector)
> diff --git a/arch/riscv/lib/memset_vector.S b/arch/riscv/lib/memset_vector.S
> new file mode 100644
> index 000000000000..4611feed72ac
> --- /dev/null
> +++ b/arch/riscv/lib/memset_vector.S
> @@ -0,0 +1,33 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +#include <linux/linkage.h>
> +#include <asm/asm.h>
> +
> +#define pDst a0
> +#define iValue a1
> +#define iNum a2
> +
> +#define iVL a3
> +#define iTemp a4
> +#define pDstPtr a5
> +
> +#define ELEM_LMUL_SETTING m8
> +#define vData v0
> +
> +/* void *memset(void *, int, size_t) */
> +SYM_FUNC_START(__asm_memset_vector)
> +
> + mv pDstPtr, pDst
> +
> + vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
> + vmv.v.x vData, iValue
> +
> +loop:
> + vse8.v vData, (pDstPtr)
> + sub iNum, iNum, iVL
> + add pDstPtr, pDstPtr, iVL
> + vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
> + bnez iNum, loop
> +
> + ret
> +
> +SYM_FUNC_END(__asm_memset_vector)
> diff --git a/arch/riscv/lib/riscv_v_helpers.c b/arch/riscv/lib/riscv_v_helpers.c
> index d763b9c69fb7..12e8c5deb013 100644
> --- a/arch/riscv/lib/riscv_v_helpers.c
> +++ b/arch/riscv/lib/riscv_v_helpers.c
> @@ -36,3 +36,24 @@ asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n)
> fallback:
> return fallback_scalar_usercopy(dst, src, n);
> }
> +
> +#define V_OPT_TEMPLATE3(prefix, type_r, type_0, type_1) \
> +extern type_r __asm_##prefix##_vector(type_0, type_1, size_t n); \
> +type_r prefix(type_0 a0, type_1 a1, size_t n) \
> +{ \
> + type_r ret; \
> + if (has_vector() && may_use_simd() && n > riscv_v_##prefix##_thres) { \
I forgot to bring it up on the other patch, but the phrase "thres" is
not intuitive to me. I think spelling threshold out is better, or using
"thresh" instead would make this much more clear.
> + kernel_vector_begin(); \
> + ret = __asm_##prefix##_vector(a0, a1, n); \
> + kernel_vector_end(); \
> + return ret; \
> + } \
> + return __##prefix(a0, a1, n); \
> +}
> +
> +static size_t riscv_v_memset_thres = 1280;
> +V_OPT_TEMPLATE3(memset, void *, void*, int)
> +static size_t riscv_v_memcpy_thres = 768;
> +V_OPT_TEMPLATE3(memcpy, void *, void*, const void *)
> +static size_t riscv_v_memmove_thres = 512;
How were these values selected? I would imagine that this could be
different for different vector hardware and it might be valuable to make
these the default values but allow a kconfig option to change it.
- Charlie
> +V_OPT_TEMPLATE3(memmove, void *, void*, const void *)
> --
> 2.17.1
>
More information about the linux-riscv
mailing list