[PATCH resend 04/15] arm64: add support for kernel mode NEON in interrupt context

Catalin Marinas catalin.marinas at arm.com
Tue May 6 09:49:19 PDT 2014


On Thu, May 01, 2014 at 04:49:36PM +0100, Ard Biesheuvel wrote:
> diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
> index 7a900142dbc8..05e1b24aca4c 100644
> --- a/arch/arm64/include/asm/fpsimd.h
> +++ b/arch/arm64/include/asm/fpsimd.h
> @@ -41,6 +41,17 @@ struct fpsimd_state {
>  	unsigned int cpu;
>  };
>  
> +/*
> + * Struct for stacking the bottom 'n' FP/SIMD registers.
> + */
> +struct fpsimd_partial_state {
> +	u32		num_regs;
> +	u32		fpsr;
> +	u32		fpcr;
> +	__uint128_t	vregs[32] __aligned(16);
> +} __aligned(16);

Do we need this explicit alignment here?

> diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h
> index bbec599c96bd..69e75134689d 100644
> --- a/arch/arm64/include/asm/fpsimdmacros.h
> +++ b/arch/arm64/include/asm/fpsimdmacros.h
> @@ -62,3 +62,38 @@
>  	ldr	w\tmpnr, [\state, #16 * 2 + 4]
>  	msr	fpcr, x\tmpnr
>  .endm
> +
> +.altmacro
> +.macro fpsimd_save_partial state, numnr, tmpnr1, tmpnr2
> +	mrs	x\tmpnr1, fpsr
> +	str	w\numnr, [\state]
> +	mrs	x\tmpnr2, fpcr
> +	stp	w\tmpnr1, w\tmpnr2, [\state, #4]
> +	adr	x\tmpnr1, 0f
> +	add	\state, \state, x\numnr, lsl #4
> +	sub	x\tmpnr1, x\tmpnr1, x\numnr, lsl #1
> +	br	x\tmpnr1
> +	.irp	qa, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0
> +	.irp	qb, %(qa + 1)
> +	stp	q\qa, q\qb, [\state, # -16 * \qa - 16]
> +	.endr
> +	.endr
> +0:
> +.endm
> +
> +.macro fpsimd_restore_partial state, tmpnr1, tmpnr2
> +	ldp	w\tmpnr1, w\tmpnr2, [\state, #4]
> +	msr	fpsr, x\tmpnr1
> +	msr	fpcr, x\tmpnr2
> +	adr	x\tmpnr1, 0f
> +	ldr	w\tmpnr2, [\state]
> +	add	\state, \state, x\tmpnr2, lsl #4
> +	sub	x\tmpnr1, x\tmpnr1, x\tmpnr2, lsl #1
> +	br	x\tmpnr1
> +	.irp	qa, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0
> +	.irp	qb, %(qa + 1)
> +	ldp	q\qa, q\qb, [\state, # -16 * \qa - 16]
> +	.endr
> +	.endr
> +0:
> +.endm

BTW, it may be better if num_regs is placed at the end of the structure,
especially since you use stp to store both fpsr and fpcr (though I
haven't rewritten the above to see how they look).

-- 
Catalin



More information about the linux-arm-kernel mailing list