[v8, 05/10] riscv: lib: vectorize copy_to_user/copy_from_user

Andy Chiu andy.chiu at sifive.com
Sun Jan 14 21:42:46 PST 2024


Hi Guo,

On Wed, Dec 27, 2023 at 11:15 AM Andy Chiu <andy.chiu at sifive.com> wrote:
>
> On Wed, Dec 27, 2023 at 9:34 AM Guo Ren <guoren at kernel.org> wrote:
> >
> > On Sat, Dec 23, 2023 at 12:30 PM Andy Chiu <andy.chiu at sifive.com> wrote:
> > >
> > > This patch utilizes Vector to perform copy_to_user/copy_from_user. If
> > > Vector is available and the size of copy is large enough for Vector to
> > > perform better than scalar, then direct the kernel to do Vector copies
> > > for userspace. Though the best programming practice for users is to
> > > reduce the copy, this provides a faster variant when copies are
> > > inevitable.
> > >
> > > The optimal size for using Vector, copy_to_user_thres, is only a
> > > heuristic for now. We can add DT parsing if people feel the need of
> > > customizing it.
> > >
> > > The exception fixup code of the __asm_vector_usercopy must fallback to
> > > the scalar one because accessing user pages might fault, and must be
> > > sleepable. Current kernel-mode Vector does not allow tasks to be
> > > preemptible, so we must disactivate Vector and perform a scalar fallback
> > > in such case.
> > >
> > > The original implementation of Vector operations comes from
> > > https://github.com/sifive/sifive-libc, which we agree to contribute to
> > > Linux kernel.
> > >
> > > Signed-off-by: Andy Chiu <andy.chiu at sifive.com>
> > > ---
> > > Changelog v8:
> > >  - fix no-mmu build
> > > Changelog v6:
> > >  - Add a kconfig entry to configure threshold values (Charlie)
> > >  - Refine assembly code (Charlie)
> > > Changelog v4:
> > >  - new patch since v4
> > > ---
> > >  arch/riscv/Kconfig                      |  8 ++++
> > >  arch/riscv/include/asm/asm-prototypes.h |  4 ++
> > >  arch/riscv/lib/Makefile                 |  6 ++-
> > >  arch/riscv/lib/riscv_v_helpers.c        | 44 ++++++++++++++++++++++
> > >  arch/riscv/lib/uaccess.S                | 10 +++++
> > >  arch/riscv/lib/uaccess_vector.S         | 50 +++++++++++++++++++++++++
> > >  6 files changed, 121 insertions(+), 1 deletion(-)
> > >  create mode 100644 arch/riscv/lib/riscv_v_helpers.c
> > >  create mode 100644 arch/riscv/lib/uaccess_vector.S
> > >
> > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > > index 95a2a06acc6a..3c5ba05e8a2d 100644
> > > --- a/arch/riscv/Kconfig
> > > +++ b/arch/riscv/Kconfig
> > > @@ -525,6 +525,14 @@ config RISCV_ISA_V_DEFAULT_ENABLE
> > >
> > >           If you don't know what to do here, say Y.
> > >
> > > +config RISCV_ISA_V_UCOPY_THRESHOLD
> > > +       int "Threshold size for vectorized user copies"
> > > +       depends on RISCV_ISA_V
> > > +       default 768
> > > +       help
> > > +         Prefer using vectorized copy_to_user()/copy_from_user() when the
> > > +         workload size exceeds this value.
> > > +
> > >  config TOOLCHAIN_HAS_ZBB
> > >         bool
> > >         default y
> > > diff --git a/arch/riscv/include/asm/asm-prototypes.h b/arch/riscv/include/asm/asm-prototypes.h
> > > index 6db1a9bbff4c..be438932f321 100644
> > > --- a/arch/riscv/include/asm/asm-prototypes.h
> > > +++ b/arch/riscv/include/asm/asm-prototypes.h
> > > @@ -11,6 +11,10 @@ long long __ashlti3(long long a, int b);
> > >
> > >  #ifdef CONFIG_RISCV_ISA_V
> > >
> > > +#ifdef CONFIG_MMU
> > > +asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n);
> > > +#endif /* CONFIG_MMU  */
> > > +
> > >  void xor_regs_2_(unsigned long bytes, unsigned long *__restrict p1,
> > >                  const unsigned long *__restrict p2);
> > >  void xor_regs_3_(unsigned long bytes, unsigned long *__restrict p1,
> > > diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
> > > index 494f9cd1a00c..c8a6787d5827 100644
> > > --- a/arch/riscv/lib/Makefile
> > > +++ b/arch/riscv/lib/Makefile
> > > @@ -6,9 +6,13 @@ lib-y                  += memmove.o
> > >  lib-y                  += strcmp.o
> > >  lib-y                  += strlen.o
> > >  lib-y                  += strncmp.o
> > > -lib-$(CONFIG_MMU)      += uaccess.o
> > > +ifeq ($(CONFIG_MMU), y)
> > > +lib-y                          += uaccess.o
> > > +lib-$(CONFIG_RISCV_ISA_V)      += uaccess_vector.o
> > > +endif
> > >  lib-$(CONFIG_64BIT)    += tishift.o
> > >  lib-$(CONFIG_RISCV_ISA_ZICBOZ) += clear_page.o
> > >
> > >  obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
> > >  lib-$(CONFIG_RISCV_ISA_V)      += xor.o
> > > +lib-$(CONFIG_RISCV_ISA_V)      += riscv_v_helpers.o
> > > diff --git a/arch/riscv/lib/riscv_v_helpers.c b/arch/riscv/lib/riscv_v_helpers.c
> > > new file mode 100644
> > > index 000000000000..6cac8f4e69e9
> > > --- /dev/null
> > > +++ b/arch/riscv/lib/riscv_v_helpers.c
> > > @@ -0,0 +1,44 @@
> > > +// SPDX-License-Identifier: GPL-2.0-or-later
> > > +/*
> > > + * Copyright (C) 2023 SiFive
> > > + * Author: Andy Chiu <andy.chiu at sifive.com>
> > > + */
> > > +#include <linux/linkage.h>
> > > +#include <asm/asm.h>
> > > +
> > > +#include <asm/vector.h>
> > > +#include <asm/simd.h>
> > > +
> > > +#ifdef CONFIG_MMU
> > > +#include <asm/asm-prototypes.h>
> > > +#endif
> > > +
> > > +#ifdef CONFIG_MMU
> > > +size_t riscv_v_usercopy_threshold = CONFIG_RISCV_ISA_V_UCOPY_THRESHOLD;
> > > +int __asm_vector_usercopy(void *dst, void *src, size_t n);
> > > +int fallback_scalar_usercopy(void *dst, void *src, size_t n);
> > > +asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n)
> > > +{
> > > +       size_t remain, copied;
> > > +
> > > +       /* skip has_vector() check because it has been done by the asm  */
> > > +       if (!may_use_simd())
> > > +               goto fallback;
> > > +
> > > +       kernel_vector_begin();
> > > +       remain = __asm_vector_usercopy(dst, src, n);
> > > +       kernel_vector_end();
> > > +
> > > +       if (remain) {
> > > +               copied = n - remain;
> > > +               dst += copied;
> > > +               src += copied;
> > > +               goto fallback;
> > > +       }
> > > +
> > > +       return remain;
> > > +
> > > +fallback:
> > > +       return fallback_scalar_usercopy(dst, src, n);
> > > +}
> > > +#endif
> > > diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S
> > > index 3ab438f30d13..a1e4a3c42925 100644
> > > --- a/arch/riscv/lib/uaccess.S
> > > +++ b/arch/riscv/lib/uaccess.S
> > > @@ -3,6 +3,8 @@
> > >  #include <asm/asm.h>
> > >  #include <asm/asm-extable.h>
> > >  #include <asm/csr.h>
> > > +#include <asm/hwcap.h>
> > > +#include <asm/alternative-macros.h>
> > >
> > >         .macro fixup op reg addr lbl
> > >  100:
> > > @@ -11,6 +13,13 @@
> > >         .endm
> > >
> > >  SYM_FUNC_START(__asm_copy_to_user)
> > > +#ifdef CONFIG_RISCV_ISA_V
> > > +       ALTERNATIVE("j fallback_scalar_usercopy", "nop", 0, RISCV_ISA_EXT_v, CONFIG_RISCV_ISA_V)
> > > +       REG_L   t0, riscv_v_usercopy_threshold
> > > +       bltu    a2, t0, fallback_scalar_usercopy
> > > +       tail enter_vector_usercopy
> > > +#endif
> > > +SYM_FUNC_START(fallback_scalar_usercopy)
> > >
> > >         /* Enable access to user memory */
> > >         li t6, SR_SUM
> > > @@ -181,6 +190,7 @@ SYM_FUNC_START(__asm_copy_to_user)
> > >         sub a0, t5, a0
> > >         ret
> > >  SYM_FUNC_END(__asm_copy_to_user)
> > > +SYM_FUNC_END(fallback_scalar_usercopy)
> > >  EXPORT_SYMBOL(__asm_copy_to_user)
> > >  SYM_FUNC_ALIAS(__asm_copy_from_user, __asm_copy_to_user)
> > >  EXPORT_SYMBOL(__asm_copy_from_user)
> > > diff --git a/arch/riscv/lib/uaccess_vector.S b/arch/riscv/lib/uaccess_vector.S
> > > new file mode 100644
> > > index 000000000000..7bd96cee39e4
> > > --- /dev/null
> > > +++ b/arch/riscv/lib/uaccess_vector.S
> > > @@ -0,0 +1,50 @@
> > > +/* SPDX-License-Identifier: GPL-2.0-only */
> > > +
> > > +#include <linux/linkage.h>
> > > +#include <asm-generic/export.h>
> > > +#include <asm/asm.h>
> > > +#include <asm/asm-extable.h>
> > > +#include <asm/csr.h>
> > > +
> > > +#define pDst a0
> > > +#define pSrc a1
> > > +#define iNum a2
> > > +
> > > +#define iVL a3
> > > +
> > > +#define ELEM_LMUL_SETTING m8
> > > +#define vData v0
> > > +
> > > +       .macro fixup op reg addr lbl
> > > +100:
> > > +       \op \reg, \addr
> > > +       _asm_extable    100b, \lbl
> > > +       .endm
> > > +
> > > +SYM_FUNC_START(__asm_vector_usercopy)
> > > +       /* Enable access to user memory */
> > > +       li t6, SR_SUM
> > > +       csrs CSR_STATUS, t6
> > > +
> > > +loop:
> > > +       vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
> > > +       fixup vle8.v vData, (pSrc), 10f
> > > +       fixup vse8.v vData, (pDst), 10f
> > > +       sub iNum, iNum, iVL
> > > +       add pSrc, pSrc, iVL
> > > +       add pDst, pDst, iVL
> > > +       bnez iNum, loop
> > > +
> > > +.Lout_copy_user:
> > > +       /* Disable access to user memory */
> > > +       csrc CSR_STATUS, t6
> > > +       li      a0, 0
> > > +       ret
> > > +
> > > +       /* Exception fixup code */
> > > +10:
> > > +       /* Disable access to user memory */
> > > +       csrc    CSR_STATUS, t6
> > > +       mv      a0, iNum
> > Shall we check CSR_VSTART to find out how many elements were copied?
>
> This is a good idea! But is it possible to find out if we were trapped
> at the load or the store instruction? IIUC if we trap at the load then
> we could not derive the number of copied bytes from CSR_VSTART.

Actually we can!, by separating out the fuxup for vle8.v and vse8.v. I
am going to include the update for this in v11. Thanks for the
suggestion!

>
> Thanks,
> Andy

Regards,
Andy



More information about the linux-riscv mailing list