[v8, 06/10] riscv: lib: add vectorized mem* routines
Andy Chiu
andy.chiu at sifive.com
Fri Dec 22 20:29:10 PST 2023
Provide vectorized memcpy/memset/memmove to accelerate common memory
operations. Also, group them into V_OPT_TEMPLATE3 macro because their
setup/tear-down and fallback logics are the same.
The optimal size for the kernel to preference Vector over scalar,
riscv_v_mem*_threshold, is only a heuristic for now. We can add DT
parsing if people feel the need of customizing it.
The original implementation of Vector operations comes from
https://github.com/sifive/sifive-libc, which we agree to contribute to
Linux kernel.
Signed-off-by: Andy Chiu <andy.chiu at sifive.com>
---
Changelog v7:
- add __NO_FORTIFY to prevent conflicting function declaration with
macro for mem* functions.
Changelog v6:
- provide kconfig to set threshold for vectorized functions (Charlie)
- rename *thres to *threshold (Charlie)
Changelog v4:
- new patch since v4
---
arch/riscv/Kconfig | 24 ++++++++++++++++
arch/riscv/lib/Makefile | 3 ++
arch/riscv/lib/memcpy_vector.S | 29 +++++++++++++++++++
arch/riscv/lib/memmove_vector.S | 49 ++++++++++++++++++++++++++++++++
arch/riscv/lib/memset_vector.S | 33 +++++++++++++++++++++
arch/riscv/lib/riscv_v_helpers.c | 26 +++++++++++++++++
6 files changed, 164 insertions(+)
create mode 100644 arch/riscv/lib/memcpy_vector.S
create mode 100644 arch/riscv/lib/memmove_vector.S
create mode 100644 arch/riscv/lib/memset_vector.S
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 3c5ba05e8a2d..cba53dcc2ae0 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -533,6 +533,30 @@ config RISCV_ISA_V_UCOPY_THRESHOLD
Prefer using vectorized copy_to_user()/copy_from_user() when the
workload size exceeds this value.
+config RISCV_ISA_V_MEMSET_THRESHOLD
+ int "Threshold size for vectorized memset()"
+ depends on RISCV_ISA_V
+ default 1280
+ help
+ Prefer using vectorized memset() when the workload size exceeds this
+ value.
+
+config RISCV_ISA_V_MEMCPY_THRESHOLD
+ int "Threshold size for vectorized memcpy()"
+ depends on RISCV_ISA_V
+ default 768
+ help
+ Prefer using vectorized memcpy() when the workload size exceeds this
+ value.
+
+config RISCV_ISA_V_MEMMOVE_THRESHOLD
+ int "Threshold size for vectorized memmove()"
+ depends on RISCV_ISA_V
+ default 512
+ help
+ Prefer using vectorized memmove() when the workload size exceeds this
+ value.
+
config TOOLCHAIN_HAS_ZBB
bool
default y
diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
index c8a6787d5827..d389dbf285fe 100644
--- a/arch/riscv/lib/Makefile
+++ b/arch/riscv/lib/Makefile
@@ -16,3 +16,6 @@ lib-$(CONFIG_RISCV_ISA_ZICBOZ) += clear_page.o
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
lib-$(CONFIG_RISCV_ISA_V) += xor.o
lib-$(CONFIG_RISCV_ISA_V) += riscv_v_helpers.o
+lib-$(CONFIG_RISCV_ISA_V) += memset_vector.o
+lib-$(CONFIG_RISCV_ISA_V) += memcpy_vector.o
+lib-$(CONFIG_RISCV_ISA_V) += memmove_vector.o
diff --git a/arch/riscv/lib/memcpy_vector.S b/arch/riscv/lib/memcpy_vector.S
new file mode 100644
index 000000000000..4176b6e0a53c
--- /dev/null
+++ b/arch/riscv/lib/memcpy_vector.S
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include <linux/linkage.h>
+#include <asm/asm.h>
+
+#define pDst a0
+#define pSrc a1
+#define iNum a2
+
+#define iVL a3
+#define pDstPtr a4
+
+#define ELEM_LMUL_SETTING m8
+#define vData v0
+
+
+/* void *memcpy(void *, const void *, size_t) */
+SYM_FUNC_START(__asm_memcpy_vector)
+ mv pDstPtr, pDst
+loop:
+ vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
+ vle8.v vData, (pSrc)
+ sub iNum, iNum, iVL
+ add pSrc, pSrc, iVL
+ vse8.v vData, (pDstPtr)
+ add pDstPtr, pDstPtr, iVL
+ bnez iNum, loop
+ ret
+SYM_FUNC_END(__asm_memcpy_vector)
diff --git a/arch/riscv/lib/memmove_vector.S b/arch/riscv/lib/memmove_vector.S
new file mode 100644
index 000000000000..4cea9d244dc9
--- /dev/null
+++ b/arch/riscv/lib/memmove_vector.S
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#include <linux/linkage.h>
+#include <asm/asm.h>
+
+#define pDst a0
+#define pSrc a1
+#define iNum a2
+
+#define iVL a3
+#define pDstPtr a4
+#define pSrcBackwardPtr a5
+#define pDstBackwardPtr a6
+
+#define ELEM_LMUL_SETTING m8
+#define vData v0
+
+SYM_FUNC_START(__asm_memmove_vector)
+
+ mv pDstPtr, pDst
+
+ bgeu pSrc, pDst, forward_copy_loop
+ add pSrcBackwardPtr, pSrc, iNum
+ add pDstBackwardPtr, pDst, iNum
+ bltu pDst, pSrcBackwardPtr, backward_copy_loop
+
+forward_copy_loop:
+ vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
+
+ vle8.v vData, (pSrc)
+ sub iNum, iNum, iVL
+ add pSrc, pSrc, iVL
+ vse8.v vData, (pDstPtr)
+ add pDstPtr, pDstPtr, iVL
+
+ bnez iNum, forward_copy_loop
+ ret
+
+backward_copy_loop:
+ vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
+
+ sub pSrcBackwardPtr, pSrcBackwardPtr, iVL
+ vle8.v vData, (pSrcBackwardPtr)
+ sub iNum, iNum, iVL
+ sub pDstBackwardPtr, pDstBackwardPtr, iVL
+ vse8.v vData, (pDstBackwardPtr)
+ bnez iNum, backward_copy_loop
+ ret
+
+SYM_FUNC_END(__asm_memmove_vector)
diff --git a/arch/riscv/lib/memset_vector.S b/arch/riscv/lib/memset_vector.S
new file mode 100644
index 000000000000..4611feed72ac
--- /dev/null
+++ b/arch/riscv/lib/memset_vector.S
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#include <linux/linkage.h>
+#include <asm/asm.h>
+
+#define pDst a0
+#define iValue a1
+#define iNum a2
+
+#define iVL a3
+#define iTemp a4
+#define pDstPtr a5
+
+#define ELEM_LMUL_SETTING m8
+#define vData v0
+
+/* void *memset(void *, int, size_t) */
+SYM_FUNC_START(__asm_memset_vector)
+
+ mv pDstPtr, pDst
+
+ vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
+ vmv.v.x vData, iValue
+
+loop:
+ vse8.v vData, (pDstPtr)
+ sub iNum, iNum, iVL
+ add pDstPtr, pDstPtr, iVL
+ vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
+ bnez iNum, loop
+
+ ret
+
+SYM_FUNC_END(__asm_memset_vector)
diff --git a/arch/riscv/lib/riscv_v_helpers.c b/arch/riscv/lib/riscv_v_helpers.c
index 6cac8f4e69e9..c62f333ba557 100644
--- a/arch/riscv/lib/riscv_v_helpers.c
+++ b/arch/riscv/lib/riscv_v_helpers.c
@@ -3,9 +3,13 @@
* Copyright (C) 2023 SiFive
* Author: Andy Chiu <andy.chiu at sifive.com>
*/
+#ifndef __NO_FORTIFY
+# define __NO_FORTIFY
+#endif
#include <linux/linkage.h>
#include <asm/asm.h>
+#include <asm/string.h>
#include <asm/vector.h>
#include <asm/simd.h>
@@ -42,3 +46,25 @@ asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n)
return fallback_scalar_usercopy(dst, src, n);
}
#endif
+
+#define V_OPT_TEMPLATE3(prefix, type_r, type_0, type_1) \
+extern type_r __asm_##prefix##_vector(type_0, type_1, size_t n); \
+type_r prefix(type_0 a0, type_1 a1, size_t n) \
+{ \
+ type_r ret; \
+ if (has_vector() && may_use_simd() && \
+ n > riscv_v_##prefix##_threshold) { \
+ kernel_vector_begin(); \
+ ret = __asm_##prefix##_vector(a0, a1, n); \
+ kernel_vector_end(); \
+ return ret; \
+ } \
+ return __##prefix(a0, a1, n); \
+}
+
+static size_t riscv_v_memset_threshold = CONFIG_RISCV_ISA_V_MEMSET_THRESHOLD;
+V_OPT_TEMPLATE3(memset, void *, void*, int)
+static size_t riscv_v_memcpy_threshold = CONFIG_RISCV_ISA_V_MEMCPY_THRESHOLD;
+V_OPT_TEMPLATE3(memcpy, void *, void*, const void *)
+static size_t riscv_v_memmove_threshold = CONFIG_RISCV_ISA_V_MEMMOVE_THRESHOLD;
+V_OPT_TEMPLATE3(memmove, void *, void*, const void *)
--
2.17.1
More information about the linux-riscv
mailing list