[PATCH] arm/crypto: Make asm SHA-1 and AES code Thumb-2 compatible
Nicolas Pitre
nicolas.pitre at linaro.org
Thu Nov 15 17:26:44 EST 2012
On Mon, 5 Nov 2012, Dave Martin wrote:
> This patch fixes aes-armv4.S and sha1-armv4-large.S to work
> natively in Thumb. This allows ARM/Thumb interworking workarounds
> to be removed.
>
> I also take the opportunity to convert some explicit assembler
> directives for exported functions to the standard
> ENTRY()/ENDPROC().
>
> For the code itself:
>
> * In sha1_block_data_order, use of TEQ with sp is deprecated in
> ARMv7 and not supported in Thumb. For the branches back to
> .L_00_15 and .L_40_59, the TEQ is converted to a CMP, under the
> assumption that clobbering the C flag here will not cause
> incorrect behaviour.
>
> For the first branch back to .L_20_39_or_60_79 the C flag is
> important, so sp is moved temporarily into another register so
> that TEQ can be used for the comparison.
>
> * In the AES code, most forms of register-indexed addressing with
> shifts and rotates are not permitted for loads and stores in
> Thumb, so the address calculation is done using a separate
> instruction for the Thumb case.
>
> The resulting code is unlikely to be optimally scheduled, but
> it should not have a large impact given the overall size of the
> code. I haven't run any benchmarks.
>
> Signed-off-by: Dave Martin <dave.martin at linaro.org>
Acked-by: Nicolas Pitre <nico at linaro.org>
I didn't test it either, only reviewed the patch. Looks obvious enough.
And if something is wrong, then it is very unlikely to be unnoticed in
practice.
> ---
>
> For now, I have built the code but not tested it. I'll consider the
> patch an RFC until someone gives me a Tested-by (or failing that, when I
> get around to testing it myself...)
>
> Cheers
> ---Dave
>
> arch/arm/crypto/aes-armv4.S | 64 +++++++++++------------------------
> arch/arm/crypto/sha1-armv4-large.S | 24 +++++--------
> 2 files changed, 29 insertions(+), 59 deletions(-)
>
> diff --git a/arch/arm/crypto/aes-armv4.S b/arch/arm/crypto/aes-armv4.S
> index e59b1d5..19d6cd6 100644
> --- a/arch/arm/crypto/aes-armv4.S
> +++ b/arch/arm/crypto/aes-armv4.S
> @@ -34,8 +34,9 @@
> @ A little glue here to select the correct code below for the ARM CPU
> @ that is being targetted.
>
> +#include <linux/linkage.h>
> +
> .text
> -.code 32
>
> .type AES_Te,%object
> .align 5
> @@ -145,10 +146,8 @@ AES_Te:
>
> @ void AES_encrypt(const unsigned char *in, unsigned char *out,
> @ const AES_KEY *key) {
> -.global AES_encrypt
> -.type AES_encrypt,%function
> .align 5
> -AES_encrypt:
> +ENTRY(AES_encrypt)
> sub r3,pc,#8 @ AES_encrypt
> stmdb sp!,{r1,r4-r12,lr}
> mov r12,r0 @ inp
> @@ -239,15 +238,8 @@ AES_encrypt:
> strb r6,[r12,#14]
> strb r3,[r12,#15]
> #endif
> -#if __ARM_ARCH__>=5
> ldmia sp!,{r4-r12,pc}
> -#else
> - ldmia sp!,{r4-r12,lr}
> - tst lr,#1
> - moveq pc,lr @ be binary compatible with V4, yet
> - .word 0xe12fff1e @ interoperable with Thumb ISA:-)
> -#endif
> -.size AES_encrypt,.-AES_encrypt
> +ENDPROC(AES_encrypt)
>
> .type _armv4_AES_encrypt,%function
> .align 2
> @@ -386,10 +378,8 @@ _armv4_AES_encrypt:
> ldr pc,[sp],#4 @ pop and return
> .size _armv4_AES_encrypt,.-_armv4_AES_encrypt
>
> -.global private_AES_set_encrypt_key
> -.type private_AES_set_encrypt_key,%function
> .align 5
> -private_AES_set_encrypt_key:
> +ENTRY(private_AES_set_encrypt_key)
> _armv4_AES_set_encrypt_key:
> sub r3,pc,#8 @ AES_set_encrypt_key
> teq r0,#0
> @@ -658,15 +648,11 @@ _armv4_AES_set_encrypt_key:
>
> .Ldone: mov r0,#0
> ldmia sp!,{r4-r12,lr}
> -.Labrt: tst lr,#1
> - moveq pc,lr @ be binary compatible with V4, yet
> - .word 0xe12fff1e @ interoperable with Thumb ISA:-)
> -.size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key
> +.Labrt: mov pc,lr
> +ENDPROC(private_AES_set_encrypt_key)
>
> -.global private_AES_set_decrypt_key
> -.type private_AES_set_decrypt_key,%function
> .align 5
> -private_AES_set_decrypt_key:
> +ENTRY(private_AES_set_decrypt_key)
> str lr,[sp,#-4]! @ push lr
> #if 0
> @ kernel does both of these in setkey so optimise this bit out by
> @@ -748,15 +734,8 @@ private_AES_set_decrypt_key:
> bne .Lmix
>
> mov r0,#0
> -#if __ARM_ARCH__>=5
> ldmia sp!,{r4-r12,pc}
> -#else
> - ldmia sp!,{r4-r12,lr}
> - tst lr,#1
> - moveq pc,lr @ be binary compatible with V4, yet
> - .word 0xe12fff1e @ interoperable with Thumb ISA:-)
> -#endif
> -.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key
> +ENDPROC(private_AES_set_decrypt_key)
>
> .type AES_Td,%object
> .align 5
> @@ -862,10 +841,8 @@ AES_Td:
>
> @ void AES_decrypt(const unsigned char *in, unsigned char *out,
> @ const AES_KEY *key) {
> -.global AES_decrypt
> -.type AES_decrypt,%function
> .align 5
> -AES_decrypt:
> +ENTRY(AES_decrypt)
> sub r3,pc,#8 @ AES_decrypt
> stmdb sp!,{r1,r4-r12,lr}
> mov r12,r0 @ inp
> @@ -956,15 +933,8 @@ AES_decrypt:
> strb r6,[r12,#14]
> strb r3,[r12,#15]
> #endif
> -#if __ARM_ARCH__>=5
> ldmia sp!,{r4-r12,pc}
> -#else
> - ldmia sp!,{r4-r12,lr}
> - tst lr,#1
> - moveq pc,lr @ be binary compatible with V4, yet
> - .word 0xe12fff1e @ interoperable with Thumb ISA:-)
> -#endif
> -.size AES_decrypt,.-AES_decrypt
> +ENDPROC(AES_decrypt)
>
> .type _armv4_AES_decrypt,%function
> .align 2
> @@ -1064,7 +1034,9 @@ _armv4_AES_decrypt:
> and r9,lr,r1,lsr#8
>
> ldrb r7,[r10,r7] @ Td4[s1>>0]
> - ldrb r1,[r10,r1,lsr#24] @ Td4[s1>>24]
> + ARM( ldrb r1,[r10,r1,lsr#24] ) @ Td4[s1>>24]
> + THUMB( add r1,r10,r1,lsr#24 ) @ Td4[s1>>24]
> + THUMB( ldrb r1,[r1] )
> ldrb r8,[r10,r8] @ Td4[s1>>16]
> eor r0,r7,r0,lsl#24
> ldrb r9,[r10,r9] @ Td4[s1>>8]
> @@ -1077,7 +1049,9 @@ _armv4_AES_decrypt:
> ldrb r8,[r10,r8] @ Td4[s2>>0]
> and r9,lr,r2,lsr#16
>
> - ldrb r2,[r10,r2,lsr#24] @ Td4[s2>>24]
> + ARM( ldrb r2,[r10,r2,lsr#24] ) @ Td4[s2>>24]
> + THUMB( add r2,r10,r2,lsr#24 ) @ Td4[s2>>24]
> + THUMB( ldrb r2,[r2] )
> eor r0,r0,r7,lsl#8
> ldrb r9,[r10,r9] @ Td4[s2>>16]
> eor r1,r8,r1,lsl#16
> @@ -1090,7 +1064,9 @@ _armv4_AES_decrypt:
> and r9,lr,r3 @ i2
>
> ldrb r9,[r10,r9] @ Td4[s3>>0]
> - ldrb r3,[r10,r3,lsr#24] @ Td4[s3>>24]
> + ARM( ldrb r3,[r10,r3,lsr#24] ) @ Td4[s3>>24]
> + THUMB( add r3,r10,r3,lsr#24 ) @ Td4[s3>>24]
> + THUMB( ldrb r3,[r3] )
> eor r0,r0,r7,lsl#16
> ldr r7,[r11,#0]
> eor r1,r1,r8,lsl#8
> diff --git a/arch/arm/crypto/sha1-armv4-large.S b/arch/arm/crypto/sha1-armv4-large.S
> index 7050ab1..92c6eed 100644
> --- a/arch/arm/crypto/sha1-armv4-large.S
> +++ b/arch/arm/crypto/sha1-armv4-large.S
> @@ -51,13 +51,12 @@
> @ Profiler-assisted and platform-specific optimization resulted in 10%
> @ improvement on Cortex A8 core and 12.2 cycles per byte.
>
> -.text
> +#include <linux/linkage.h>
>
> -.global sha1_block_data_order
> -.type sha1_block_data_order,%function
> +.text
>
> .align 2
> -sha1_block_data_order:
> +ENTRY(sha1_block_data_order)
> stmdb sp!,{r4-r12,lr}
> add r2,r1,r2,lsl#6 @ r2 to point at the end of r1
> ldmia r0,{r3,r4,r5,r6,r7}
> @@ -194,7 +193,7 @@ sha1_block_data_order:
> eor r10,r10,r7,ror#2 @ F_00_19(B,C,D)
> str r9,[r14,#-4]!
> add r3,r3,r10 @ E+=F_00_19(B,C,D)
> - teq r14,sp
> + cmp r14,sp
> bne .L_00_15 @ [((11+4)*5+2)*3]
> #if __ARM_ARCH__<7
> ldrb r10,[r1,#2]
> @@ -374,7 +373,9 @@ sha1_block_data_order:
> @ F_xx_xx
> add r3,r3,r9 @ E+=X[i]
> add r3,r3,r10 @ E+=F_20_39(B,C,D)
> - teq r14,sp @ preserve carry
> + ARM( teq r14,sp ) @ preserve carry
> + THUMB( mov r11,sp )
> + THUMB( teq r14,r11 ) @ preserve carry
> bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4]
> bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes
>
> @@ -466,7 +467,7 @@ sha1_block_data_order:
> add r3,r3,r9 @ E+=X[i]
> add r3,r3,r10 @ E+=F_40_59(B,C,D)
> add r3,r3,r11,ror#2
> - teq r14,sp
> + cmp r14,sp
> bne .L_40_59 @ [+((12+5)*5+2)*4]
>
> ldr r8,.LK_60_79
> @@ -485,19 +486,12 @@ sha1_block_data_order:
> teq r1,r2
> bne .Lloop @ [+18], total 1307
>
> -#if __ARM_ARCH__>=5
> ldmia sp!,{r4-r12,pc}
> -#else
> - ldmia sp!,{r4-r12,lr}
> - tst lr,#1
> - moveq pc,lr @ be binary compatible with V4, yet
> - .word 0xe12fff1e @ interoperable with Thumb ISA:-)
> -#endif
> .align 2
> .LK_00_19: .word 0x5a827999
> .LK_20_39: .word 0x6ed9eba1
> .LK_40_59: .word 0x8f1bbcdc
> .LK_60_79: .word 0xca62c1d6
> -.size sha1_block_data_order,.-sha1_block_data_order
> +ENDPROC(sha1_block_data_order)
> .asciz "SHA1 block transform for ARMv4, CRYPTOGAMS by <appro at openssl.org>"
> .align 2
> --
> 1.7.4.1
>
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
>
More information about the linux-arm-kernel
mailing list