[PATCH v2] ARM: crypto: enable NEON SHA-1 for big endian

Ard Biesheuvel ard.biesheuvel at linaro.org
Tue Aug 5 13:34:52 PDT 2014


This tweaks the SHA-1 NEON code slightly so it works correctly under big endian,
and removes the Kconfig condition preventing it from being selected if
CONFIG_CPU_BIG_ENDIAN is set.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel at linaro.org>
---

I accidentally submitted the version below to the patch system (#8125/1) rather
than the version I had posted to LAKML for review.

The difference between the two versions is that the first one just changed some
vld1.32 calls into vld1.8 calls, resulting in the data being byte swapped twice
after being read from memory: once by vld1.8 and once by the subsequent vrev32.8
instruction.

Instead, this version retains the vld1.32 calls and makes the vrev32.8 calls
conditional on !CPU_BIG_ENDIAN. As the vrev32.8 instruction did an implicit move
as well, some register names had to be reshuffled to avoid having to move values
between registers instead.

Both versions pass the tcrypt built-in test suite for SHA1, in both big-endian
and little-endian modes.

 arch/arm/crypto/sha1-armv7-neon.S | 39 ++++++++++++++++++++++-----------------
 crypto/Kconfig                    |  2 +-
 2 files changed, 23 insertions(+), 18 deletions(-)

diff --git a/arch/arm/crypto/sha1-armv7-neon.S b/arch/arm/crypto/sha1-armv7-neon.S
index 50013c0e2864..dcd01f3f0bb0 100644
--- a/arch/arm/crypto/sha1-armv7-neon.S
+++ b/arch/arm/crypto/sha1-armv7-neon.S
@@ -9,7 +9,7 @@
  */
 
 #include <linux/linkage.h>
-
+#include <asm/assembler.h>
 
 .syntax unified
 .code   32
@@ -61,13 +61,13 @@
 #define RT3 r12
 
 #define W0 q0
-#define W1 q1
+#define W1 q7
 #define W2 q2
 #define W3 q3
 #define W4 q4
-#define W5 q5
-#define W6 q6
-#define W7 q7
+#define W5 q6
+#define W6 q5
+#define W7 q1
 
 #define tmp0 q8
 #define tmp1 q9
@@ -79,6 +79,11 @@
 #define qK3 q14
 #define qK4 q15
 
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#define ARM_LE(code...)
+#else
+#define ARM_LE(code...)		code
+#endif
 
 /* Round function macros. */
 
@@ -150,45 +155,45 @@
 #define W_PRECALC_00_15() \
 	add       RWK, sp, #(WK_offs(0));			\
 	\
-	vld1.32   {tmp0, tmp1}, [RDATA]!;			\
-	vrev32.8  W0, tmp0;		/* big => little */	\
-	vld1.32   {tmp2, tmp3}, [RDATA]!;			\
+	vld1.32   {W0, W7}, [RDATA]!;				\
+ ARM_LE(vrev32.8  W0, W0;	)	/* big => little */	\
+	vld1.32   {W6, W5}, [RDATA]!;				\
 	vadd.u32  tmp0, W0, curK;				\
-	vrev32.8  W7, tmp1;		/* big => little */	\
-	vrev32.8  W6, tmp2;		/* big => little */	\
+ ARM_LE(vrev32.8  W7, W7;	)	/* big => little */	\
+ ARM_LE(vrev32.8  W6, W6;	)	/* big => little */	\
 	vadd.u32  tmp1, W7, curK;				\
-	vrev32.8  W5, tmp3;		/* big => little */	\
+ ARM_LE(vrev32.8  W5, W5;	)	/* big => little */	\
 	vadd.u32  tmp2, W6, curK;				\
 	vst1.32   {tmp0, tmp1}, [RWK]!;				\
 	vadd.u32  tmp3, W5, curK;				\
 	vst1.32   {tmp2, tmp3}, [RWK];				\
 
 #define WPRECALC_00_15_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
-	vld1.32   {tmp0, tmp1}, [RDATA]!;			\
+	vld1.32   {W0, W7}, [RDATA]!;				\
 
 #define WPRECALC_00_15_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
 	add       RWK, sp, #(WK_offs(0));			\
 
 #define WPRECALC_00_15_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
-	vrev32.8  W0, tmp0;		/* big => little */	\
+ ARM_LE(vrev32.8  W0, W0;	)	/* big => little */	\
 
 #define WPRECALC_00_15_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
-	vld1.32   {tmp2, tmp3}, [RDATA]!;			\
+	vld1.32   {W6, W5}, [RDATA]!;				\
 
 #define WPRECALC_00_15_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
 	vadd.u32  tmp0, W0, curK;				\
 
 #define WPRECALC_00_15_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
-	vrev32.8  W7, tmp1;		/* big => little */	\
+ ARM_LE(vrev32.8  W7, W7;	)	/* big => little */	\
 
 #define WPRECALC_00_15_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
-	vrev32.8  W6, tmp2;		/* big => little */	\
+ ARM_LE(vrev32.8  W6, W6;	)	/* big => little */	\
 
 #define WPRECALC_00_15_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
 	vadd.u32  tmp1, W7, curK;				\
 
 #define WPRECALC_00_15_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
-	vrev32.8  W5, tmp3;		/* big => little */	\
+ ARM_LE(vrev32.8  W5, W5;	)	/* big => little */	\
 
 #define WPRECALC_00_15_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
 	vadd.u32  tmp2, W6, curK;				\
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 749b1e05c490..deef2a4b6559 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -542,7 +542,7 @@ config CRYPTO_SHA1_ARM
 
 config CRYPTO_SHA1_ARM_NEON
 	tristate "SHA1 digest algorithm (ARM NEON)"
-	depends on ARM && KERNEL_MODE_NEON && !CPU_BIG_ENDIAN
+	depends on ARM && KERNEL_MODE_NEON
 	select CRYPTO_SHA1_ARM
 	select CRYPTO_SHA1
 	select CRYPTO_HASH
-- 
1.8.3.2




More information about the linux-arm-kernel mailing list