[RFT PATCH] crypto: arm/aes - replace scalar AES cipher
Ard Biesheuvel
ard.biesheuvel at linaro.org
Fri Jan 6 03:53:12 PST 2017
This replaces the scalar AES cipher that originates in the OpenSSL project
with a new implementation that is ~15% (*) faster (on modern cores), and
reuses the lookup tables and the key schedule generation routines from the
generic C implementation (which is usually compiled in anyway due to
networking and other subsystems depending on it).
Note that the bit sliced NEON code for AES still depends on the scalar cipher
that this patch replaces, so it is not removed entirely yet.
* On Cortex-A57, the performance increases from 17.0 to 14.9 cycles per byte
for 128-bit keys.
Signed-off-by: Ard Biesheuvel <ard.biesheuvel at linaro.org>
---
It makes sense to test this on a variety of cores before deciding whether
to merge it or not. Test results welcome. (insmod tcrypt.ko mode=200 sec=1)
arch/arm/crypto/Kconfig | 20 +--
arch/arm/crypto/Makefile | 4 +-
arch/arm/crypto/aes-cipher-core.S | 169 ++++++++++++++++++++
arch/arm/crypto/aes-cipher-glue.c | 69 ++++++++
arch/arm/crypto/aes_glue.c | 98 ------------
5 files changed, 241 insertions(+), 119 deletions(-)
diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
index 2f3339f015d3..f1de658c3c8f 100644
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -62,33 +62,15 @@ config CRYPTO_SHA512_ARM
using optimized ARM assembler and NEON, when available.
config CRYPTO_AES_ARM
- tristate "AES cipher algorithms (ARM-asm)"
- depends on ARM
+ tristate "Scalar AES cipher for ARM"
select CRYPTO_ALGAPI
select CRYPTO_AES
help
Use optimized AES assembler routines for ARM platforms.
- AES cipher algorithms (FIPS-197). AES uses the Rijndael
- algorithm.
-
- Rijndael appears to be consistently a very good performer in
- both hardware and software across a wide range of computing
- environments regardless of its use in feedback or non-feedback
- modes. Its key setup time is excellent, and its key agility is
- good. Rijndael's very low memory requirements make it very well
- suited for restricted-space environments, in which it also
- demonstrates excellent performance. Rijndael's operations are
- among the easiest to defend against power and timing attacks.
-
- The AES specifies three key sizes: 128, 192 and 256 bits
-
- See <http://csrc.nist.gov/encryption/aes/> for more information.
-
config CRYPTO_AES_ARM_BS
tristate "Bit sliced AES using NEON instructions"
depends on KERNEL_MODE_NEON
- select CRYPTO_AES_ARM
select CRYPTO_BLKCIPHER
select CRYPTO_SIMD
help
diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
index 8d74e55eacd4..8f5de2db701c 100644
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile
@@ -27,8 +27,8 @@ $(warning $(ce-obj-y) $(ce-obj-m))
endif
endif
-aes-arm-y := aes-armv4.o aes_glue.o
-aes-arm-bs-y := aesbs-core.o aesbs-glue.o
+aes-arm-y := aes-cipher-core.o aes-cipher-glue.o
+aes-arm-bs-y := aes-armv4.o aesbs-core.o aesbs-glue.o
sha1-arm-y := sha1-armv4-large.o sha1_glue.o
sha1-arm-neon-y := sha1-armv7-neon.o sha1_neon_glue.o
sha256-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha256_neon_glue.o
diff --git a/arch/arm/crypto/aes-cipher-core.S b/arch/arm/crypto/aes-cipher-core.S
new file mode 100644
index 000000000000..8d4a15364d43
--- /dev/null
+++ b/arch/arm/crypto/aes-cipher-core.S
@@ -0,0 +1,169 @@
+/*
+ * Scalar AES core transform
+ *
+ * Copyright (C) 2017 Linaro Ltd <ard.biesheuvel at linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+
+ .text
+ .align 5
+
+ rk .req r0
+ rounds .req r1
+ in .req r2
+ out .req r3
+ tt .req ip
+
+ t0 .req lr
+ t1 .req r2
+ t2 .req r3
+
+ .macro __select, out, in, idx
+ .if __LINUX_ARM_ARCH__ < 7
+ and \out, \in, #0xff << (8 * \idx)
+ .else
+ ubfx \out, \in, #(8 * \idx), #8
+ .endif
+ .endm
+
+ .macro __load, out, in, idx
+ .if __LINUX_ARM_ARCH__ < 7 && \idx > 0
+ ldr \out, [tt, \in, lsr #(8 * \idx) - 2]
+ .else
+ ldr \out, [tt, \in, lsl #2]
+ .endif
+ .endm
+
+ .macro __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc
+ __select \out0, \in0, 0
+ __select t0, \in1, 1
+ __load \out0, \out0, 0
+ __load t0, t0, 1
+
+ .if \enc
+ __select \out1, \in1, 0
+ __select t1, \in2, 1
+ .else
+ __select \out1, \in3, 0
+ __select t1, \in0, 1
+ .endif
+ __load \out1, \out1, 0
+ __select t2, \in2, 2
+ __load t1, t1, 1
+ __load t2, t2, 2
+
+ eor \out0, \out0, t0, ror #24
+
+ __select t0, \in3, 3
+ .if \enc
+ __select \t3, \in3, 2
+ __select \t4, \in0, 3
+ .else
+ __select \t3, \in1, 2
+ __select \t4, \in2, 3
+ .endif
+ __load \t3, \t3, 2
+ __load t0, t0, 3
+ __load \t4, \t4, 3
+
+ eor \out1, \out1, t1, ror #24
+ eor \out0, \out0, t2, ror #16
+ ldm rk!, {t1, t2}
+ eor \out1, \out1, \t3, ror #16
+ eor \out0, \out0, t0, ror #8
+ eor \out1, \out1, \t4, ror #8
+ eor \out0, \out0, t1
+ eor \out1, \out1, t2
+ .endm
+
+ .macro fround, out0, out1, out2, out3, in0, in1, in2, in3
+ __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1
+ __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1
+ .endm
+
+ .macro iround, out0, out1, out2, out3, in0, in1, in2, in3
+ __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0
+ __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0
+ .endm
+
+ .macro __rev, out, in
+ .if __LINUX_ARM_ARCH__ < 7
+ lsl t0, \in, #24
+ and t1, \in, #0xff00
+ and t2, \in, #0xff0000
+ orr \out, t0, \in, lsr #24
+ orr \out, \out, t1, lsl #8
+ orr \out, \out, t2, lsr #8
+ .else
+ rev \out, \in
+ .endif
+ .endm
+
+ .macro do_crypt, round, ttab, ltab
+ push {r3-r11, lr}
+
+ ldr r4, [in]
+ ldr r5, [in, #4]
+ ldr r6, [in, #8]
+ ldr r7, [in, #12]
+
+ ldm rk!, {r8, r9, r10, r11}
+
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ __rev r4, r4
+ __rev r5, r5
+ __rev r6, r6
+ __rev r7, r7
+#endif
+
+ eor r4, r4, r8
+ eor r5, r5, r9
+ eor r6, r6, r10
+ eor r7, r7, r11
+
+ ldr tt, =\ttab
+
+ tst rounds, #2
+ bne 1f
+
+0: \round r8, r9, r10, r11, r4, r5, r6, r7
+ \round r4, r5, r6, r7, r8, r9, r10, r11
+
+1: subs rounds, rounds, #4
+ \round r8, r9, r10, r11, r4, r5, r6, r7
+ ldrls tt, =\ltab
+ \round r4, r5, r6, r7, r8, r9, r10, r11
+ bhi 0b
+
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ __rev r4, r4
+ __rev r5, r5
+ __rev r6, r6
+ __rev r7, r7
+#endif
+
+ ldr out, [sp]
+
+ str r4, [out]
+ str r5, [out, #4]
+ str r6, [out, #8]
+ str r7, [out, #12]
+
+ pop {r3-r11, pc}
+
+ .align 3
+ .ltorg
+ .endm
+
+ENTRY(__aes_arm_encrypt)
+ do_crypt fround, crypto_ft_tab, crypto_fl_tab
+ENDPROC(__aes_arm_encrypt)
+
+ENTRY(__aes_arm_decrypt)
+ do_crypt iround, crypto_it_tab, crypto_il_tab
+ENDPROC(__aes_arm_decrypt)
diff --git a/arch/arm/crypto/aes-cipher-glue.c b/arch/arm/crypto/aes-cipher-glue.c
new file mode 100644
index 000000000000..19545237112a
--- /dev/null
+++ b/arch/arm/crypto/aes-cipher-glue.c
@@ -0,0 +1,69 @@
+/*
+ * Scalar AES core transform
+ *
+ * Copyright (C) 2017 Linaro Ltd <ard.biesheuvel at linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <crypto/aes.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+asmlinkage void __aes_arm_encrypt(u32 *rk, int rounds, const u8 *in, u8 *out);
+EXPORT_SYMBOL(__aes_arm_encrypt);
+
+asmlinkage void __aes_arm_decrypt(u32 *rk, int rounds, const u8 *in, u8 *out);
+EXPORT_SYMBOL(__aes_arm_decrypt);
+
+static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+ struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+ int rounds = 6 + ctx->key_length / 4;
+
+ __aes_arm_encrypt(ctx->key_enc, rounds, in, out);
+}
+
+static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+ struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+ int rounds = 6 + ctx->key_length / 4;
+
+ __aes_arm_decrypt(ctx->key_dec, rounds, in, out);
+}
+
+static struct crypto_alg aes_alg = {
+ .cra_name = "aes",
+ .cra_driver_name = "aes-arm",
+ .cra_priority = 200,
+ .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct crypto_aes_ctx),
+ .cra_module = THIS_MODULE,
+
+ .cra_cipher.cia_min_keysize = AES_MIN_KEY_SIZE,
+ .cra_cipher.cia_max_keysize = AES_MAX_KEY_SIZE,
+ .cra_cipher.cia_setkey = crypto_aes_set_key,
+ .cra_cipher.cia_encrypt = aes_encrypt,
+ .cra_cipher.cia_decrypt = aes_decrypt
+};
+
+static int __init aes_init(void)
+{
+ return crypto_register_alg(&aes_alg);
+}
+
+static void __exit aes_fini(void)
+{
+ crypto_unregister_alg(&aes_alg);
+}
+
+module_init(aes_init);
+module_exit(aes_fini);
+
+MODULE_DESCRIPTION("Scalar AES cipher for ARM");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel at linaro.org>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("aes");
diff --git a/arch/arm/crypto/aes_glue.c b/arch/arm/crypto/aes_glue.c
deleted file mode 100644
index 0409b8f89782..000000000000
--- a/arch/arm/crypto/aes_glue.c
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Glue Code for the asm optimized version of the AES Cipher Algorithm
- */
-
-#include <linux/module.h>
-#include <linux/crypto.h>
-#include <crypto/aes.h>
-
-#include "aes_glue.h"
-
-EXPORT_SYMBOL(AES_encrypt);
-EXPORT_SYMBOL(AES_decrypt);
-EXPORT_SYMBOL(private_AES_set_encrypt_key);
-EXPORT_SYMBOL(private_AES_set_decrypt_key);
-
-static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
- struct AES_CTX *ctx = crypto_tfm_ctx(tfm);
- AES_encrypt(src, dst, &ctx->enc_key);
-}
-
-static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
- struct AES_CTX *ctx = crypto_tfm_ctx(tfm);
- AES_decrypt(src, dst, &ctx->dec_key);
-}
-
-static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
- unsigned int key_len)
-{
- struct AES_CTX *ctx = crypto_tfm_ctx(tfm);
-
- switch (key_len) {
- case AES_KEYSIZE_128:
- key_len = 128;
- break;
- case AES_KEYSIZE_192:
- key_len = 192;
- break;
- case AES_KEYSIZE_256:
- key_len = 256;
- break;
- default:
- tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
- return -EINVAL;
- }
-
- if (private_AES_set_encrypt_key(in_key, key_len, &ctx->enc_key) == -1) {
- tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
- return -EINVAL;
- }
- /* private_AES_set_decrypt_key expects an encryption key as input */
- ctx->dec_key = ctx->enc_key;
- if (private_AES_set_decrypt_key(in_key, key_len, &ctx->dec_key) == -1) {
- tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
- return -EINVAL;
- }
- return 0;
-}
-
-static struct crypto_alg aes_alg = {
- .cra_name = "aes",
- .cra_driver_name = "aes-asm",
- .cra_priority = 200,
- .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct AES_CTX),
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(aes_alg.cra_list),
- .cra_u = {
- .cipher = {
- .cia_min_keysize = AES_MIN_KEY_SIZE,
- .cia_max_keysize = AES_MAX_KEY_SIZE,
- .cia_setkey = aes_set_key,
- .cia_encrypt = aes_encrypt,
- .cia_decrypt = aes_decrypt
- }
- }
-};
-
-static int __init aes_init(void)
-{
- return crypto_register_alg(&aes_alg);
-}
-
-static void __exit aes_fini(void)
-{
- crypto_unregister_alg(&aes_alg);
-}
-
-module_init(aes_init);
-module_exit(aes_fini);
-
-MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm (ASM)");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_CRYPTO("aes");
-MODULE_ALIAS_CRYPTO("aes-asm");
-MODULE_AUTHOR("David McCullough <ucdevel at gmail.com>");
--
2.7.4
More information about the linux-arm-kernel
mailing list