[PATCH v2 4/4] arm64: accelerate crc32_be
Ard Biesheuvel
ardb at kernel.org
Mon Jan 17 23:46:00 PST 2022
On Thu, 13 Jan 2022 at 17:58, Kevin Bracey <kevin at bracey.fi> wrote:
>
> It makes no sense to leave crc32_be using the generic code while we
> only accelerate the little-endian ops.
>
> Even though the big-endian form doesn't fit as smoothly into the arm64,
> we can speed it up and avoid hitting the D cache.
>
> Tested on Cortex-A53. Without acceleration:
>
> crc32: CRC_LE_BITS = 64, CRC_BE BITS = 64
> crc32: self tests passed, processed 225944 bytes in 192240 nsec
> crc32c: CRC_LE_BITS = 64
> crc32c: self tests passed, processed 112972 bytes in 21360 nsec
>
> With acceleration:
>
> crc32: CRC_LE_BITS = 64, CRC_BE BITS = 64
> crc32: self tests passed, processed 225944 bytes in 53480 nsec
> crc32c: CRC_LE_BITS = 64
> crc32c: self tests passed, processed 112972 bytes in 21480 nsec
>
> Signed-off-by: Kevin Bracey <kevin at bracey.fi>
> Tested-by: Ard Biesheuvel <ardb at kernel.org>
> Reviewed-by: Ard Biesheuvel <ardb at kernel.org>
> Cc: Catalin Marinas <catalin.marinas at arm.com>
> Cc: Will Deacon <will at kernel.org>
Catalin, Will,
We'll need an ack from you so we can take this whole set via Herbert's tree.
Thanks,
Ard.
> ---
> arch/arm64/lib/crc32.S | 87 +++++++++++++++++++++++++++++++++++-------
> 1 file changed, 73 insertions(+), 14 deletions(-)
>
> diff --git a/arch/arm64/lib/crc32.S b/arch/arm64/lib/crc32.S
> index 0f9e10ecda23..8340dccff46f 100644
> --- a/arch/arm64/lib/crc32.S
> +++ b/arch/arm64/lib/crc32.S
> @@ -11,7 +11,44 @@
>
> .arch armv8-a+crc
>
> - .macro __crc32, c
> + .macro byteorder, reg, be
> + .if \be
> +CPU_LE( rev \reg, \reg )
> + .else
> +CPU_BE( rev \reg, \reg )
> + .endif
> + .endm
> +
> + .macro byteorder16, reg, be
> + .if \be
> +CPU_LE( rev16 \reg, \reg )
> + .else
> +CPU_BE( rev16 \reg, \reg )
> + .endif
> + .endm
> +
> + .macro bitorder, reg, be
> + .if \be
> + rbit \reg, \reg
> + .endif
> + .endm
> +
> + .macro bitorder16, reg, be
> + .if \be
> + rbit \reg, \reg
> + lsr \reg, \reg, #16
> + .endif
> + .endm
> +
> + .macro bitorder8, reg, be
> + .if \be
> + rbit \reg, \reg
> + lsr \reg, \reg, #24
> + .endif
> + .endm
> +
> + .macro __crc32, c, be=0
> + bitorder w0, \be
> cmp x2, #16
> b.lt 8f // less than 16 bytes
>
> @@ -24,10 +61,14 @@
> add x8, x8, x1
> add x1, x1, x7
> ldp x5, x6, [x8]
> -CPU_BE( rev x3, x3 )
> -CPU_BE( rev x4, x4 )
> -CPU_BE( rev x5, x5 )
> -CPU_BE( rev x6, x6 )
> + byteorder x3, \be
> + byteorder x4, \be
> + byteorder x5, \be
> + byteorder x6, \be
> + bitorder x3, \be
> + bitorder x4, \be
> + bitorder x5, \be
> + bitorder x6, \be
>
> tst x7, #8
> crc32\c\()x w8, w0, x3
> @@ -55,33 +96,43 @@ CPU_BE( rev x6, x6 )
> 32: ldp x3, x4, [x1], #32
> sub x2, x2, #32
> ldp x5, x6, [x1, #-16]
> -CPU_BE( rev x3, x3 )
> -CPU_BE( rev x4, x4 )
> -CPU_BE( rev x5, x5 )
> -CPU_BE( rev x6, x6 )
> + byteorder x3, \be
> + byteorder x4, \be
> + byteorder x5, \be
> + byteorder x6, \be
> + bitorder x3, \be
> + bitorder x4, \be
> + bitorder x5, \be
> + bitorder x6, \be
> crc32\c\()x w0, w0, x3
> crc32\c\()x w0, w0, x4
> crc32\c\()x w0, w0, x5
> crc32\c\()x w0, w0, x6
> cbnz x2, 32b
> -0: ret
> +0: bitorder w0, \be
> + ret
>
> 8: tbz x2, #3, 4f
> ldr x3, [x1], #8
> -CPU_BE( rev x3, x3 )
> + byteorder x3, \be
> + bitorder x3, \be
> crc32\c\()x w0, w0, x3
> 4: tbz x2, #2, 2f
> ldr w3, [x1], #4
> -CPU_BE( rev w3, w3 )
> + byteorder w3, \be
> + bitorder w3, \be
> crc32\c\()w w0, w0, w3
> 2: tbz x2, #1, 1f
> ldrh w3, [x1], #2
> -CPU_BE( rev16 w3, w3 )
> + byteorder16 w3, \be
> + bitorder16 w3, \be
> crc32\c\()h w0, w0, w3
> 1: tbz x2, #0, 0f
> ldrb w3, [x1]
> + bitorder8 w3, \be
> crc32\c\()b w0, w0, w3
> -0: ret
> +0: bitorder w0, \be
> + ret
> .endm
>
> .align 5
> @@ -99,3 +150,11 @@ alternative_if_not ARM64_HAS_CRC32
> alternative_else_nop_endif
> __crc32 c
> SYM_FUNC_END(__crc32c_le)
> +
> + .align 5
> +SYM_FUNC_START(crc32_be)
> +alternative_if_not ARM64_HAS_CRC32
> + b crc32_be_base
> +alternative_else_nop_endif
> + __crc32 be=1
> +SYM_FUNC_END(crc32_be)
> --
> 2.25.1
>
More information about the linux-arm-kernel
mailing list