[RFC v2 PATCH 1/4] ARM: add support for kernel mode NEON in atomic context

Ard Biesheuvel ard.biesheuvel at linaro.org
Wed Oct 9 15:32:18 EDT 2013


On 9 October 2013 21:24, Nicolas Pitre <nicolas.pitre at linaro.org> wrote:
> On Wed, 9 Oct 2013, Ard Biesheuvel wrote:
>
>> Some applications, such as WPA CCMP encryption, do substantial
>> amounts of work in non-process context. In order to support
>> accelerated NEON implementations under these circumstances, we
>> need a way to preserve the NEON context that may
>> (a) belong to a completely unrelated userland process (if the
>>     NEON unit is turned off atm);
>> (b) belong to current userland;
>> (c) belong to current kernel mode in process context.
>>
>> The best way to deal with this is to just stack whatever registers
>> we are going to use, and unstack them when we are done.
>>
>> This patch adds kernel_neon_begin_atomic() and kernel_neon_end_atomic(),
>> which may be called from any context. In !in_interrupt() case, they
>> just call their non-_atomic counterparts. In atomic context, they
>> stack resp. unstack the number of NEON registers declared when setting
>> up the stack area using DEFINE_NEON_REG_STACK().
>>
>> Signed-off-by: Ard Biesheuvel <ard.biesheuvel at linaro.org>
>> ---
>>  arch/arm/include/asm/fpstate.h | 15 +++++++++++++-
>>  arch/arm/include/asm/neon.h    | 34 +++++++++++++++++++++++++++++++
>>  arch/arm/vfp/vfphw.S           | 46 ++++++++++++++++++++++++++++++++++++++++++
>>  arch/arm/vfp/vfpmodule.c       |  3 +++
>>  4 files changed, 97 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/arm/include/asm/fpstate.h b/arch/arm/include/asm/fpstate.h
>> index 3ad4c10..7a6e100 100644
>> --- a/arch/arm/include/asm/fpstate.h
>> +++ b/arch/arm/include/asm/fpstate.h
>> @@ -19,7 +19,7 @@
>>   *  - FPEXC, FPSCR, FPINST and FPINST2.
>>   *  - 16 or 32 double precision data registers
>>   *  - an implementation-dependent word of state for FLDMX/FSTMX (pre-ARMv6)
>> - *
>> + *
>>   *  FPEXC will always be non-zero once the VFP has been used in this process.
>>   */
>>
>> @@ -52,6 +52,19 @@ union vfp_state {
>>  extern void vfp_flush_thread(union vfp_state *);
>>  extern void vfp_release_thread(union vfp_state *);
>>
>> +/*
>> + * Variable sized struct for stacking the bottom 'n' NEON registers.
>> + */
>> +struct vfp_partial_state {
>> +     const __u32     num_regs;
>> +     __u32           fpexc;
>> +     __u32           fpscr;
>> +     __u8            qregs[] __aligned(16);
>> +} __aligned(16);
>> +
>> +extern void vfp_load_partial_state(struct vfp_partial_state *);
>> +extern void vfp_save_partial_state(struct vfp_partial_state *);
>> +
>>  #define FP_HARD_SIZE 35
>>
>>  struct fp_hard_struct {
>> diff --git a/arch/arm/include/asm/neon.h b/arch/arm/include/asm/neon.h
>> index 8f730fe..1efd9fc 100644
>> --- a/arch/arm/include/asm/neon.h
>> +++ b/arch/arm/include/asm/neon.h
>> @@ -8,10 +8,21 @@
>>   * published by the Free Software Foundation.
>>   */
>>
>> +#include <linux/types.h>
>> +#include <linux/hardirq.h>
>> +#include <asm/fpstate.h>
>>  #include <asm/hwcap.h>
>>
>>  #define cpu_has_neon()               (!!(elf_hwcap & HWCAP_NEON))
>>
>> +#define DEFINE_NEON_STACK_REGS(v, num)                                       \
>> +     struct {                                                        \
>> +             struct vfp_partial_state regs;                          \
>> +             u8 qregs[(num) > 16 ? 256 : 16 * (((num) + 1) & ~1U)];  \
>> +     } v = { .regs.num_regs = sizeof(v.qregs)/16 }
>> +
>> +#define DEFINE_NEON_STACK_REGS_ALL(name)     DEFINE_NEON_STACK_REGS(name,16)
>> +
>>  #ifdef __ARM_NEON__
>>
>>  /*
>> @@ -30,7 +41,30 @@
>>  #define kernel_neon_begin() \
>>       BUILD_BUG_ON_MSG(1, "kernel_neon_begin() called from NEON code")
>>
>> +#define kernel_neon_begin_atomic(a) \
>> +     BUILD_BUG_ON_MSG(1, "kernel_neon_begin_atomic() called from NEON code")
>> +
>>  #else
>>  void kernel_neon_begin(void);
>> +#define kernel_neon_begin_atomic(name) __kernel_neon_begin_atomic(&(name).regs)
>>  #endif
>> +
>> +#define kernel_neon_end_atomic(name) __kernel_neon_end_atomic(&(name).regs)
>> +
>>  void kernel_neon_end(void);
>> +
>> +static inline void __kernel_neon_begin_atomic(struct vfp_partial_state *regs)
>> +{
>> +     if (!in_interrupt())
>> +             kernel_neon_begin();
>
> Surely you want "if (!in_atomic())" here?
>
>> +     else
>> +             vfp_save_partial_state(regs);
>> +}
>> +
>> +static inline void __kernel_neon_end_atomic(struct vfp_partial_state *regs)
>> +{
>> +     if (!in_interrupt())
>> +             kernel_neon_end();
>
> Ditto.
>

If I am reading (and understanding) the source correctly, in_atomic()
is also true when running with preemption disabled, and in that case,
you should be able to just do a normal preserve / lazy restore.

-- 
Ard.


>> +     else
>> +             vfp_load_partial_state(regs);
>> +}
>> diff --git a/arch/arm/vfp/vfphw.S b/arch/arm/vfp/vfphw.S
>> +     VFPFMXR FPSCR, r3
>> +     VFPFMXR FPEXC, r2
>> +     bx      lr
>> +ENDPROC(vfp_load_partial_state)
>> +
>> +#endif
>> diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
>> index 52b8f40..3dea5ba 100644
>> --- a/arch/arm/vfp/vfpmodule.c
>> +++ b/arch/arm/vfp/vfpmodule.c
>> @@ -713,6 +713,9 @@ void kernel_neon_end(void)
>>  }
>>  EXPORT_SYMBOL(kernel_neon_end);
>>
>> +EXPORT_SYMBOL(vfp_save_partial_state);
>> +EXPORT_SYMBOL(vfp_load_partial_state);
>> +
>>  #endif /* CONFIG_KERNEL_MODE_NEON */
>>
>>  /*
>> --
>> 1.8.1.2
>>



More information about the linux-arm-kernel mailing list