[RFC v2 PATCH 1/4] ARM: add support for kernel mode NEON in atomic context
Nicolas Pitre
nicolas.pitre at linaro.org
Wed Oct 9 15:24:18 EDT 2013
On Wed, 9 Oct 2013, Ard Biesheuvel wrote:
> Some applications, such as WPA CCMP encryption, do substantial
> amounts of work in non-process context. In order to support
> accelerated NEON implementations under these circumstances, we
> need a way to preserve the NEON context that may
> (a) belong to a completely unrelated userland process (if the
> NEON unit is turned off atm);
> (b) belong to current userland;
> (c) belong to current kernel mode in process context.
>
> The best way to deal with this is to just stack whatever registers
> we are going to use, and unstack them when we are done.
>
> This patch adds kernel_neon_begin_atomic() and kernel_neon_end_atomic(),
> which may be called from any context. In !in_interrupt() case, they
> just call their non-_atomic counterparts. In atomic context, they
> stack resp. unstack the number of NEON registers declared when setting
> up the stack area using DEFINE_NEON_REG_STACK().
>
> Signed-off-by: Ard Biesheuvel <ard.biesheuvel at linaro.org>
> ---
> arch/arm/include/asm/fpstate.h | 15 +++++++++++++-
> arch/arm/include/asm/neon.h | 34 +++++++++++++++++++++++++++++++
> arch/arm/vfp/vfphw.S | 46 ++++++++++++++++++++++++++++++++++++++++++
> arch/arm/vfp/vfpmodule.c | 3 +++
> 4 files changed, 97 insertions(+), 1 deletion(-)
>
> diff --git a/arch/arm/include/asm/fpstate.h b/arch/arm/include/asm/fpstate.h
> index 3ad4c10..7a6e100 100644
> --- a/arch/arm/include/asm/fpstate.h
> +++ b/arch/arm/include/asm/fpstate.h
> @@ -19,7 +19,7 @@
> * - FPEXC, FPSCR, FPINST and FPINST2.
> * - 16 or 32 double precision data registers
> * - an implementation-dependent word of state for FLDMX/FSTMX (pre-ARMv6)
> - *
> + *
> * FPEXC will always be non-zero once the VFP has been used in this process.
> */
>
> @@ -52,6 +52,19 @@ union vfp_state {
> extern void vfp_flush_thread(union vfp_state *);
> extern void vfp_release_thread(union vfp_state *);
>
> +/*
> + * Variable sized struct for stacking the bottom 'n' NEON registers.
> + */
> +struct vfp_partial_state {
> + const __u32 num_regs;
> + __u32 fpexc;
> + __u32 fpscr;
> + __u8 qregs[] __aligned(16);
> +} __aligned(16);
> +
> +extern void vfp_load_partial_state(struct vfp_partial_state *);
> +extern void vfp_save_partial_state(struct vfp_partial_state *);
> +
> #define FP_HARD_SIZE 35
>
> struct fp_hard_struct {
> diff --git a/arch/arm/include/asm/neon.h b/arch/arm/include/asm/neon.h
> index 8f730fe..1efd9fc 100644
> --- a/arch/arm/include/asm/neon.h
> +++ b/arch/arm/include/asm/neon.h
> @@ -8,10 +8,21 @@
> * published by the Free Software Foundation.
> */
>
> +#include <linux/types.h>
> +#include <linux/hardirq.h>
> +#include <asm/fpstate.h>
> #include <asm/hwcap.h>
>
> #define cpu_has_neon() (!!(elf_hwcap & HWCAP_NEON))
>
> +#define DEFINE_NEON_STACK_REGS(v, num) \
> + struct { \
> + struct vfp_partial_state regs; \
> + u8 qregs[(num) > 16 ? 256 : 16 * (((num) + 1) & ~1U)]; \
> + } v = { .regs.num_regs = sizeof(v.qregs)/16 }
> +
> +#define DEFINE_NEON_STACK_REGS_ALL(name) DEFINE_NEON_STACK_REGS(name,16)
> +
> #ifdef __ARM_NEON__
>
> /*
> @@ -30,7 +41,30 @@
> #define kernel_neon_begin() \
> BUILD_BUG_ON_MSG(1, "kernel_neon_begin() called from NEON code")
>
> +#define kernel_neon_begin_atomic(a) \
> + BUILD_BUG_ON_MSG(1, "kernel_neon_begin_atomic() called from NEON code")
> +
> #else
> void kernel_neon_begin(void);
> +#define kernel_neon_begin_atomic(name) __kernel_neon_begin_atomic(&(name).regs)
> #endif
> +
> +#define kernel_neon_end_atomic(name) __kernel_neon_end_atomic(&(name).regs)
> +
> void kernel_neon_end(void);
> +
> +static inline void __kernel_neon_begin_atomic(struct vfp_partial_state *regs)
> +{
> + if (!in_interrupt())
> + kernel_neon_begin();
Surely you want "if (!in_atomic())" here?
> + else
> + vfp_save_partial_state(regs);
> +}
> +
> +static inline void __kernel_neon_end_atomic(struct vfp_partial_state *regs)
> +{
> + if (!in_interrupt())
> + kernel_neon_end();
Ditto.
> + else
> + vfp_load_partial_state(regs);
> +}
> diff --git a/arch/arm/vfp/vfphw.S b/arch/arm/vfp/vfphw.S
> + VFPFMXR FPSCR, r3
> + VFPFMXR FPEXC, r2
> + bx lr
> +ENDPROC(vfp_load_partial_state)
> +
> +#endif
> diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
> index 52b8f40..3dea5ba 100644
> --- a/arch/arm/vfp/vfpmodule.c
> +++ b/arch/arm/vfp/vfpmodule.c
> @@ -713,6 +713,9 @@ void kernel_neon_end(void)
> }
> EXPORT_SYMBOL(kernel_neon_end);
>
> +EXPORT_SYMBOL(vfp_save_partial_state);
> +EXPORT_SYMBOL(vfp_load_partial_state);
> +
> #endif /* CONFIG_KERNEL_MODE_NEON */
>
> /*
> --
> 1.8.1.2
>
More information about the linux-arm-kernel
mailing list