[RFC v2 PATCH 1/4] ARM: add support for kernel mode NEON in atomic context

Nicolas Pitre nicolas.pitre at linaro.org
Wed Oct 9 15:24:18 EDT 2013


On Wed, 9 Oct 2013, Ard Biesheuvel wrote:

> Some applications, such as WPA CCMP encryption, do substantial
> amounts of work in non-process context. In order to support
> accelerated NEON implementations under these circumstances, we
> need a way to preserve the NEON context that may
> (a) belong to a completely unrelated userland process (if the
>     NEON unit is turned off atm);
> (b) belong to current userland;
> (c) belong to current kernel mode in process context.
> 
> The best way to deal with this is to just stack whatever registers
> we are going to use, and unstack them when we are done.
> 
> This patch adds kernel_neon_begin_atomic() and kernel_neon_end_atomic(),
> which may be called from any context. In !in_interrupt() case, they
> just call their non-_atomic counterparts. In atomic context, they
> stack resp. unstack the number of NEON registers declared when setting
> up the stack area using DEFINE_NEON_REG_STACK().
> 
> Signed-off-by: Ard Biesheuvel <ard.biesheuvel at linaro.org>
> ---
>  arch/arm/include/asm/fpstate.h | 15 +++++++++++++-
>  arch/arm/include/asm/neon.h    | 34 +++++++++++++++++++++++++++++++
>  arch/arm/vfp/vfphw.S           | 46 ++++++++++++++++++++++++++++++++++++++++++
>  arch/arm/vfp/vfpmodule.c       |  3 +++
>  4 files changed, 97 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/arm/include/asm/fpstate.h b/arch/arm/include/asm/fpstate.h
> index 3ad4c10..7a6e100 100644
> --- a/arch/arm/include/asm/fpstate.h
> +++ b/arch/arm/include/asm/fpstate.h
> @@ -19,7 +19,7 @@
>   *  - FPEXC, FPSCR, FPINST and FPINST2.
>   *  - 16 or 32 double precision data registers
>   *  - an implementation-dependent word of state for FLDMX/FSTMX (pre-ARMv6)
> - * 
> + *
>   *  FPEXC will always be non-zero once the VFP has been used in this process.
>   */
>  
> @@ -52,6 +52,19 @@ union vfp_state {
>  extern void vfp_flush_thread(union vfp_state *);
>  extern void vfp_release_thread(union vfp_state *);
>  
> +/*
> + * Variable sized struct for stacking the bottom 'n' NEON registers.
> + */
> +struct vfp_partial_state {
> +	const __u32	num_regs;
> +	__u32		fpexc;
> +	__u32		fpscr;
> +	__u8		qregs[] __aligned(16);
> +} __aligned(16);
> +
> +extern void vfp_load_partial_state(struct vfp_partial_state *);
> +extern void vfp_save_partial_state(struct vfp_partial_state *);
> +
>  #define FP_HARD_SIZE 35
>  
>  struct fp_hard_struct {
> diff --git a/arch/arm/include/asm/neon.h b/arch/arm/include/asm/neon.h
> index 8f730fe..1efd9fc 100644
> --- a/arch/arm/include/asm/neon.h
> +++ b/arch/arm/include/asm/neon.h
> @@ -8,10 +8,21 @@
>   * published by the Free Software Foundation.
>   */
>  
> +#include <linux/types.h>
> +#include <linux/hardirq.h>
> +#include <asm/fpstate.h>
>  #include <asm/hwcap.h>
>  
>  #define cpu_has_neon()		(!!(elf_hwcap & HWCAP_NEON))
>  
> +#define DEFINE_NEON_STACK_REGS(v, num)					\
> +	struct {							\
> +		struct vfp_partial_state regs;				\
> +		u8 qregs[(num) > 16 ? 256 : 16 * (((num) + 1) & ~1U)];	\
> +	} v = { .regs.num_regs = sizeof(v.qregs)/16 }
> +
> +#define DEFINE_NEON_STACK_REGS_ALL(name)	DEFINE_NEON_STACK_REGS(name,16)
> +
>  #ifdef __ARM_NEON__
>  
>  /*
> @@ -30,7 +41,30 @@
>  #define kernel_neon_begin() \
>  	BUILD_BUG_ON_MSG(1, "kernel_neon_begin() called from NEON code")
>  
> +#define kernel_neon_begin_atomic(a) \
> +	BUILD_BUG_ON_MSG(1, "kernel_neon_begin_atomic() called from NEON code")
> +
>  #else
>  void kernel_neon_begin(void);
> +#define kernel_neon_begin_atomic(name) __kernel_neon_begin_atomic(&(name).regs)
>  #endif
> +
> +#define kernel_neon_end_atomic(name) __kernel_neon_end_atomic(&(name).regs)
> +
>  void kernel_neon_end(void);
> +
> +static inline void __kernel_neon_begin_atomic(struct vfp_partial_state *regs)
> +{
> +	if (!in_interrupt())
> +		kernel_neon_begin();

Surely you want "if (!in_atomic())" here?

> +	else
> +		vfp_save_partial_state(regs);
> +}
> +
> +static inline void __kernel_neon_end_atomic(struct vfp_partial_state *regs)
> +{
> +	if (!in_interrupt())
> +		kernel_neon_end();

Ditto.

> +	else
> +		vfp_load_partial_state(regs);
> +}
> diff --git a/arch/arm/vfp/vfphw.S b/arch/arm/vfp/vfphw.S
> +	VFPFMXR	FPSCR, r3
> +	VFPFMXR	FPEXC, r2
> +	bx	lr
> +ENDPROC(vfp_load_partial_state)
> +
> +#endif
> diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
> index 52b8f40..3dea5ba 100644
> --- a/arch/arm/vfp/vfpmodule.c
> +++ b/arch/arm/vfp/vfpmodule.c
> @@ -713,6 +713,9 @@ void kernel_neon_end(void)
>  }
>  EXPORT_SYMBOL(kernel_neon_end);
>  
> +EXPORT_SYMBOL(vfp_save_partial_state);
> +EXPORT_SYMBOL(vfp_load_partial_state);
> +
>  #endif /* CONFIG_KERNEL_MODE_NEON */
>  
>  /*
> -- 
> 1.8.1.2
> 



More information about the linux-arm-kernel mailing list