[PATCH v5 22/23] crypto: arm64/sm3-ce - yield NEON after every block of input
Ard Biesheuvel
ard.biesheuvel at linaro.org
Sat Mar 10 07:22:07 PST 2018
Avoid excessive scheduling delays under a preemptible kernel by
conditionally yielding the NEON after every block of input.
Signed-off-by: Ard Biesheuvel <ard.biesheuvel at linaro.org>
---
arch/arm64/crypto/sm3-ce-core.S | 30 +++++++++++++++-----
1 file changed, 23 insertions(+), 7 deletions(-)
diff --git a/arch/arm64/crypto/sm3-ce-core.S b/arch/arm64/crypto/sm3-ce-core.S
index 27169fe07a68..5a116c8d0cee 100644
--- a/arch/arm64/crypto/sm3-ce-core.S
+++ b/arch/arm64/crypto/sm3-ce-core.S
@@ -77,19 +77,25 @@
*/
.text
ENTRY(sm3_ce_transform)
+ frame_push 3
+
+ mov x19, x0
+ mov x20, x1
+ mov x21, x2
+
/* load state */
- ld1 {v8.4s-v9.4s}, [x0]
+ ld1 {v8.4s-v9.4s}, [x19]
rev64 v8.4s, v8.4s
rev64 v9.4s, v9.4s
ext v8.16b, v8.16b, v8.16b, #8
ext v9.16b, v9.16b, v9.16b, #8
- adr_l x8, .Lt
+0: adr_l x8, .Lt
ldp s13, s14, [x8]
/* load input */
-0: ld1 {v0.16b-v3.16b}, [x1], #64
- sub w2, w2, #1
+1: ld1 {v0.16b-v3.16b}, [x20], #64
+ sub w21, w21, #1
mov v15.16b, v8.16b
mov v16.16b, v9.16b
@@ -125,14 +131,24 @@ CPU_LE( rev32 v3.16b, v3.16b )
eor v9.16b, v9.16b, v16.16b
/* handled all input blocks? */
- cbnz w2, 0b
+ cbz w21, 2f
+
+ if_will_cond_yield_neon
+ st1 {v8.4s-v9.4s}, [x19]
+ do_cond_yield_neon
+ ld1 {v8.4s-v9.4s}, [x19]
+ b 0b
+ endif_yield_neon
+
+ b 1b
/* save state */
- rev64 v8.4s, v8.4s
+2: rev64 v8.4s, v8.4s
rev64 v9.4s, v9.4s
ext v8.16b, v8.16b, v8.16b, #8
ext v9.16b, v9.16b, v9.16b, #8
- st1 {v8.4s-v9.4s}, [x0]
+ st1 {v8.4s-v9.4s}, [x19]
+ frame_pop
ret
ENDPROC(sm3_ce_transform)
--
2.15.1
More information about the linux-arm-kernel
mailing list