[RFT PATCH v2] crypto: arm64/gcm - implement native driver using v8 Crypto Extensions
Ard Biesheuvel
ard.biesheuvel at linaro.org
Fri Jun 30 12:09:07 PDT 2017
On 30 June 2017 at 11:32, Ard Biesheuvel <ard.biesheuvel at linaro.org> wrote:
> Currently, the AES-GCM implementation for arm64 systems that support the
> ARMv8 Crypto Extensions is based on the generic GCM module, which combines
> the AES-CTR implementation using AES instructions with the PMULL based
> GHASH driver. This is suboptimal, given the fact that the input data needs
> to be loaded twice, once for the encryption and again for the MAC
> calculation.
>
> On Cortex-A57 (r1p2) and other recent cores that implement micro-op fusing
> for the AES instructions, AES executes at less than 1 cycle per byte, which
> means that any cycles wasted on loading the data twice hurt even more.
>
> So implement a new GCM driver that combines the AES and PMULL instructions
> at the block level. This improves performance on Cortex-A57 by ~27% (from
37% not 27%
> 3.5 cpb to 2.6 cpb)
>
> Signed-off-by: Ard Biesheuvel <ard.biesheuvel at linaro.org>
> ---
> v2: - rebase onto non-upstream arm64 SIMD refactoring branch
> (https://git.kernel.org/pub/scm/linux/kernel/git/ardb/linux.git/log/?h=arm64-gcm)
> - implement non-SIMD fallback
> - remove accelerated AES routines from setkey() path
> - use be32() accessors instead of open-coded array assignments
> - remove redundant round key loads
>
> Raw numbers measured on a 2GHz AMD Overdrive B1 can be found after he patch.
>
> arch/arm64/crypto/Kconfig | 3 +-
> arch/arm64/crypto/ghash-ce-core.S | 175 ++++++++
> arch/arm64/crypto/ghash-ce-glue.c | 436 ++++++++++++++++++--
> 3 files changed, 587 insertions(+), 27 deletions(-)
>
> diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
> index a669dedc8767..3e5b39b79fb9 100644
> --- a/arch/arm64/crypto/Kconfig
> +++ b/arch/arm64/crypto/Kconfig
> @@ -29,10 +29,11 @@ config CRYPTO_SHA2_ARM64_CE
> select CRYPTO_SHA256_ARM64
>
> config CRYPTO_GHASH_ARM64_CE
> - tristate "GHASH (for GCM chaining mode) using ARMv8 Crypto Extensions"
> + tristate "GHASH/AES-GCM using ARMv8 Crypto Extensions"
> depends on KERNEL_MODE_NEON
> select CRYPTO_HASH
> select CRYPTO_GF128MUL
> + select CRYPTO_AES
>
> config CRYPTO_CRCT10DIF_ARM64_CE
> tristate "CRCT10DIF digest algorithm using PMULL instructions"
> diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S
> index f0bb9f0b524f..cb22459eba85 100644
> --- a/arch/arm64/crypto/ghash-ce-core.S
> +++ b/arch/arm64/crypto/ghash-ce-core.S
> @@ -77,3 +77,178 @@ CPU_LE( rev64 T1.16b, T1.16b )
> st1 {XL.2d}, [x1]
> ret
> ENDPROC(pmull_ghash_update)
> +
> + KS .req v8
> + CTR .req v9
> + INP .req v10
> +
> + .macro load_round_keys, rounds, rk
> + cmp \rounds, #12
> + blo 2222f /* 128 bits */
> + beq 1111f /* 192 bits */
> + ld1 {v17.4s-v18.4s}, [\rk], #32
> +1111: ld1 {v19.4s-v20.4s}, [\rk], #32
> +2222: ld1 {v21.4s-v24.4s}, [\rk], #64
> + ld1 {v25.4s-v28.4s}, [\rk], #64
> + ld1 {v29.4s-v31.4s}, [\rk]
> + .endm
> +
> + .macro enc_round, state, key
> + aese \state\().16b, \key\().16b
> + aesmc \state\().16b, \state\().16b
> + .endm
> +
> + .macro enc_block, state, rounds
> + cmp \rounds, #12
> + b.lo 2222f /* 128 bits */
> + b.eq 1111f /* 192 bits */
> + enc_round \state, v17
> + enc_round \state, v18
> +1111: enc_round \state, v19
> + enc_round \state, v20
> +2222: .irp key, v21, v22, v23, v24, v25, v26, v27, v28, v29
> + enc_round \state, \key
> + .endr
> + aese \state\().16b, v30.16b
> + eor \state\().16b, \state\().16b, v31.16b
> + .endm
> +
> + .macro pmull_gcm_do_crypt, enc
> + ld1 {SHASH.2d}, [x4]
> + ld1 {XL.2d}, [x1]
> + ldr x8, [x5, #8] // load lower counter
> +
> + movi MASK.16b, #0xe1
> + ext SHASH2.16b, SHASH.16b, SHASH.16b, #8
> +CPU_LE( rev x8, x8 )
> + shl MASK.2d, MASK.2d, #57
> + eor SHASH2.16b, SHASH2.16b, SHASH.16b
> +
> + .if \enc == 1
> + ld1 {KS.16b}, [x7]
> + .endif
> +
> +0: ld1 {CTR.8b}, [x5] // load upper counter
> + ld1 {INP.16b}, [x3], #16
> + rev x9, x8
> + add x8, x8, #1
> + sub w0, w0, #1
> + ins CTR.d[1], x9 // set lower counter
> +
> + .if \enc == 1
> + eor INP.16b, INP.16b, KS.16b // encrypt input
> + st1 {INP.16b}, [x2], #16
> + .endif
> +
> + rev64 T1.16b, INP.16b
> +
> + cmp w6, #12
> + b.ge 2f // AES-192/256?
> +
> +1: enc_round CTR, v21
> +
> + ext T2.16b, XL.16b, XL.16b, #8
> + ext IN1.16b, T1.16b, T1.16b, #8
> +
> + enc_round CTR, v22
> +
> + eor T1.16b, T1.16b, T2.16b
> + eor XL.16b, XL.16b, IN1.16b
> +
> + enc_round CTR, v23
> +
> + pmull2 XH.1q, SHASH.2d, XL.2d // a1 * b1
> + eor T1.16b, T1.16b, XL.16b
> +
> + enc_round CTR, v24
> +
> + pmull XL.1q, SHASH.1d, XL.1d // a0 * b0
> + pmull XM.1q, SHASH2.1d, T1.1d // (a1 + a0)(b1 + b0)
> +
> + enc_round CTR, v25
> +
> + ext T1.16b, XL.16b, XH.16b, #8
> + eor T2.16b, XL.16b, XH.16b
> + eor XM.16b, XM.16b, T1.16b
> +
> + enc_round CTR, v26
> +
> + eor XM.16b, XM.16b, T2.16b
> + pmull T2.1q, XL.1d, MASK.1d
> +
> + enc_round CTR, v27
> +
> + mov XH.d[0], XM.d[1]
> + mov XM.d[1], XL.d[0]
> +
> + enc_round CTR, v28
> +
> + eor XL.16b, XM.16b, T2.16b
> +
> + enc_round CTR, v29
> +
> + ext T2.16b, XL.16b, XL.16b, #8
> +
> + aese CTR.16b, v30.16b
> +
> + pmull XL.1q, XL.1d, MASK.1d
> + eor T2.16b, T2.16b, XH.16b
> +
> + eor KS.16b, CTR.16b, v31.16b
> +
> + eor XL.16b, XL.16b, T2.16b
> +
> + .if \enc == 0
> + eor INP.16b, INP.16b, KS.16b
> + st1 {INP.16b}, [x2], #16
> + .endif
> +
> + cbnz w0, 0b
> +
> +CPU_LE( rev x8, x8 )
> + st1 {XL.2d}, [x1]
> + str x8, [x5, #8] // store lower counter
> +
> + .if \enc == 1
> + st1 {KS.16b}, [x7]
> + .endif
> +
> + ret
> +
> +2: b.eq 3f // AES-192?
> + enc_round CTR, v17
> + enc_round CTR, v18
> +3: enc_round CTR, v19
> + enc_round CTR, v20
> + b 1b
> + .endm
> +
> + /*
> + * void pmull_gcm_encrypt(int blocks, u64 dg[], u8 dst[], const u8 src[],
> + * struct ghash_key const *k, u8 ctr[],
> + * int rounds, u8 ks[])
> + */
> +ENTRY(pmull_gcm_encrypt)
> + pmull_gcm_do_crypt 1
> +ENDPROC(pmull_gcm_encrypt)
> +
> + /*
> + * void pmull_gcm_decrypt(int blocks, u64 dg[], u8 dst[], const u8 src[],
> + * struct ghash_key const *k, u8 ctr[],
> + * int rounds)
> + */
> +ENTRY(pmull_gcm_decrypt)
> + pmull_gcm_do_crypt 0
> +ENDPROC(pmull_gcm_decrypt)
> +
> + /*
> + * void pmull_gcm_encrypt_block(u8 dst[], u8 src[], u8 rk[], int rounds)
> + */
> +ENTRY(pmull_gcm_encrypt_block)
> + cbz x2, 0f
> + load_round_keys w3, x2
> +0: ld1 {v0.16b}, [x1]
> + enc_block v0, w3
> + st1 {v0.16b}, [x0]
> + ret
> +ENDPROC(pmull_gcm_encrypt_block)
> diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
> index 30221ef56e70..85ff57e789ff 100644
> --- a/arch/arm64/crypto/ghash-ce-glue.c
> +++ b/arch/arm64/crypto/ghash-ce-glue.c
> @@ -11,18 +11,25 @@
> #include <asm/neon.h>
> #include <asm/simd.h>
> #include <asm/unaligned.h>
> +#include <crypto/aes.h>
> +#include <crypto/algapi.h>
> +#include <crypto/b128ops.h>
> #include <crypto/gf128mul.h>
> +#include <crypto/internal/aead.h>
> #include <crypto/internal/hash.h>
> +#include <crypto/internal/skcipher.h>
> +#include <crypto/scatterwalk.h>
> #include <linux/cpufeature.h>
> #include <linux/crypto.h>
> #include <linux/module.h>
>
> -MODULE_DESCRIPTION("GHASH secure hash using ARMv8 Crypto Extensions");
> +MODULE_DESCRIPTION("GHASH and AES-GCM using ARMv8 Crypto Extensions");
> MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel at linaro.org>");
> MODULE_LICENSE("GPL v2");
>
> #define GHASH_BLOCK_SIZE 16
> #define GHASH_DIGEST_SIZE 16
> +#define GCM_IV_SIZE 12
>
> struct ghash_key {
> u64 a;
> @@ -36,9 +43,25 @@ struct ghash_desc_ctx {
> u32 count;
> };
>
> +struct gcm_aes_ctx {
> + struct crypto_aes_ctx aes_key;
> + struct ghash_key ghash_key;
> +};
> +
> asmlinkage void pmull_ghash_update(int blocks, u64 dg[], const char *src,
> struct ghash_key const *k, const char *head);
>
> +asmlinkage void pmull_gcm_encrypt(int blocks, u64 dg[], u8 dst[],
> + const u8 src[], struct ghash_key const *k,
> + u8 ctr[], int rounds, u8 ks[]);
> +
> +asmlinkage void pmull_gcm_decrypt(int blocks, u64 dg[], u8 dst[],
> + const u8 src[], struct ghash_key const *k,
> + u8 ctr[], int rounds);
> +
> +asmlinkage void pmull_gcm_encrypt_block(u8 dst[], u8 const src[],
> + u32 const rk[], int rounds);
> +
> static int ghash_init(struct shash_desc *desc)
> {
> struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
> @@ -130,17 +153,11 @@ static int ghash_final(struct shash_desc *desc, u8 *dst)
> return 0;
> }
>
> -static int ghash_setkey(struct crypto_shash *tfm,
> - const u8 *inkey, unsigned int keylen)
> +static int __ghash_setkey(struct ghash_key *key,
> + const u8 *inkey, unsigned int keylen)
> {
> - struct ghash_key *key = crypto_shash_ctx(tfm);
> u64 a, b;
>
> - if (keylen != GHASH_BLOCK_SIZE) {
> - crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
> - return -EINVAL;
> - }
> -
> /* needed for the fallback */
> memcpy(&key->k, inkey, GHASH_BLOCK_SIZE);
>
> @@ -157,31 +174,398 @@ static int ghash_setkey(struct crypto_shash *tfm,
> return 0;
> }
>
> +static int ghash_setkey(struct crypto_shash *tfm,
> + const u8 *inkey, unsigned int keylen)
> +{
> + struct ghash_key *key = crypto_shash_ctx(tfm);
> +
> + if (keylen != GHASH_BLOCK_SIZE) {
> + crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
> + return -EINVAL;
> + }
> +
> + return __ghash_setkey(key, inkey, keylen);
> +}
> +
> static struct shash_alg ghash_alg = {
> - .digestsize = GHASH_DIGEST_SIZE,
> - .init = ghash_init,
> - .update = ghash_update,
> - .final = ghash_final,
> - .setkey = ghash_setkey,
> - .descsize = sizeof(struct ghash_desc_ctx),
> - .base = {
> - .cra_name = "ghash",
> - .cra_driver_name = "ghash-ce",
> - .cra_priority = 200,
> - .cra_flags = CRYPTO_ALG_TYPE_SHASH,
> - .cra_blocksize = GHASH_BLOCK_SIZE,
> - .cra_ctxsize = sizeof(struct ghash_key),
> - .cra_module = THIS_MODULE,
> - },
> + .base.cra_name = "ghash",
> + .base.cra_driver_name = "ghash-ce",
> + .base.cra_priority = 200,
> + .base.cra_flags = CRYPTO_ALG_TYPE_SHASH,
> + .base.cra_blocksize = GHASH_BLOCK_SIZE,
> + .base.cra_ctxsize = sizeof(struct ghash_key),
> + .base.cra_module = THIS_MODULE,
> +
> + .digestsize = GHASH_DIGEST_SIZE,
> + .init = ghash_init,
> + .update = ghash_update,
> + .final = ghash_final,
> + .setkey = ghash_setkey,
> + .descsize = sizeof(struct ghash_desc_ctx),
> };
>
> -static int __init ghash_ce_mod_init(void)
> +static int num_rounds(struct crypto_aes_ctx *ctx)
> +{
> + /*
> + * # of rounds specified by AES:
> + * 128 bit key 10 rounds
> + * 192 bit key 12 rounds
> + * 256 bit key 14 rounds
> + * => n byte key => 6 + (n/4) rounds
> + */
> + return 6 + ctx->key_length / 4;
> +}
> +
> +static int gcm_setkey(struct crypto_aead *tfm, const u8 *inkey,
> + unsigned int keylen)
> +{
> + struct gcm_aes_ctx *ctx = crypto_aead_ctx(tfm);
> + u8 key[GHASH_BLOCK_SIZE];
> + int ret;
> +
> + ret = crypto_aes_expand_key(&ctx->aes_key, inkey, keylen);
> + if (ret) {
> + tfm->base.crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
> + return -EINVAL;
> + }
> +
> + crypto_aes_encrypt(&ctx->aes_key, key, (u8[AES_BLOCK_SIZE]){});
> +
> + return __ghash_setkey(&ctx->ghash_key, key, sizeof(key));
> +}
> +
> +static int gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
> +{
> + switch (authsize) {
> + case 4:
> + case 8:
> + case 12 ... 16:
> + break;
> + default:
> + return -EINVAL;
> + }
> + return 0;
> +}
> +
> +static void gcm_update_mac(u64 dg[], const u8 *src, int count, u8 buf[],
> + int *buf_count, struct gcm_aes_ctx *ctx)
> +{
> + if (*buf_count > 0) {
> + int buf_added = min(count, GHASH_BLOCK_SIZE - *buf_count);
> +
> + memcpy(&buf[*buf_count], src, buf_added);
> +
> + *buf_count += buf_added;
> + src += buf_added;
> + count -= buf_added;
> + }
> +
> + if (count >= GHASH_BLOCK_SIZE || *buf_count == GHASH_BLOCK_SIZE) {
> + int blocks = count / GHASH_BLOCK_SIZE;
> +
> + ghash_do_update(blocks, dg, src, &ctx->ghash_key,
> + *buf_count ? buf : NULL);
> +
> + src += blocks * GHASH_BLOCK_SIZE;
> + count %= GHASH_BLOCK_SIZE;
> + *buf_count = 0;
> + }
> +
> + if (count > 0) {
> + memcpy(buf, src, count);
> + *buf_count = count;
> + }
> +}
> +
> +static void gcm_calculate_auth_mac(struct aead_request *req, u64 dg[])
> +{
> + struct crypto_aead *aead = crypto_aead_reqtfm(req);
> + struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead);
> + u8 buf[GHASH_BLOCK_SIZE];
> + struct scatter_walk walk;
> + u32 len = req->assoclen;
> + int buf_count = 0;
> +
> + scatterwalk_start(&walk, req->src);
> +
> + do {
> + u32 n = scatterwalk_clamp(&walk, len);
> + u8 *p;
> +
> + if (!n) {
> + scatterwalk_start(&walk, sg_next(walk.sg));
> + n = scatterwalk_clamp(&walk, len);
> + }
> + p = scatterwalk_map(&walk);
> +
> + gcm_update_mac(dg, p, n, buf, &buf_count, ctx);
> + len -= n;
> +
> + scatterwalk_unmap(p);
> + scatterwalk_advance(&walk, n);
> + scatterwalk_done(&walk, 0, len);
> + } while (len);
> +
> + if (buf_count) {
> + memset(&buf[buf_count], 0, GHASH_BLOCK_SIZE - buf_count);
> + ghash_do_update(1, dg, buf, &ctx->ghash_key, NULL);
> + }
> +}
> +
> +static void gcm_final(struct aead_request *req, struct gcm_aes_ctx *ctx,
> + u64 dg[], u8 tag[], int cryptlen)
> +{
> + u8 mac[AES_BLOCK_SIZE];
> + u128 lengths;
> +
> + lengths.a = cpu_to_be64(req->assoclen * 8);
> + lengths.b = cpu_to_be64(cryptlen * 8);
> +
> + ghash_do_update(1, dg, (void *)&lengths, &ctx->ghash_key, NULL);
> +
> + put_unaligned_be64(dg[1], mac);
> + put_unaligned_be64(dg[0], mac + 8);
> +
> + crypto_xor(tag, mac, AES_BLOCK_SIZE);
> +}
> +
> +static int gcm_encrypt(struct aead_request *req)
> {
> - return crypto_register_shash(&ghash_alg);
> + struct crypto_aead *aead = crypto_aead_reqtfm(req);
> + struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead);
> + struct skcipher_walk walk;
> + u8 iv[AES_BLOCK_SIZE];
> + u8 ks[AES_BLOCK_SIZE];
> + u8 tag[AES_BLOCK_SIZE];
> + u64 dg[2] = {};
> + int err;
> +
> + if (req->assoclen)
> + gcm_calculate_auth_mac(req, dg);
> +
> + memcpy(iv, req->iv, GCM_IV_SIZE);
> + put_unaligned_be32(1, iv + GCM_IV_SIZE);
> +
> + if (likely(may_use_simd())) {
> + kernel_neon_begin();
> +
> + pmull_gcm_encrypt_block(tag, iv, ctx->aes_key.key_enc,
> + num_rounds(&ctx->aes_key));
> + put_unaligned_be32(2, iv + GCM_IV_SIZE);
> + pmull_gcm_encrypt_block(ks, iv, NULL,
> + num_rounds(&ctx->aes_key));
> + put_unaligned_be32(3, iv + GCM_IV_SIZE);
> +
> + err = skcipher_walk_aead_encrypt(&walk, req, true);
> +
> + while (walk.nbytes >= AES_BLOCK_SIZE) {
> + int blocks = walk.nbytes / AES_BLOCK_SIZE;
> +
> + pmull_gcm_encrypt(blocks, dg, walk.dst.virt.addr,
> + walk.src.virt.addr, &ctx->ghash_key,
> + iv, num_rounds(&ctx->aes_key), ks);
> +
> + err = skcipher_walk_done(&walk,
> + walk.nbytes % AES_BLOCK_SIZE);
> + }
> + kernel_neon_end();
> + } else {
> + crypto_aes_encrypt(&ctx->aes_key, tag, iv);
> + put_unaligned_be32(2, iv + GCM_IV_SIZE);
> +
> + err = skcipher_walk_aead_encrypt(&walk, req, true);
> +
> + while (walk.nbytes >= AES_BLOCK_SIZE) {
> + int blocks = walk.nbytes / AES_BLOCK_SIZE;
> + u8 *dst = walk.dst.virt.addr;
> + u8 *src = walk.src.virt.addr;
> +
> + do {
> + crypto_aes_encrypt(&ctx->aes_key, ks, iv);
> + if (dst != src)
> + memcpy(dst, src, AES_BLOCK_SIZE);
> + crypto_xor(dst, ks, AES_BLOCK_SIZE);
> + crypto_inc(iv, AES_BLOCK_SIZE);
> +
> + dst += AES_BLOCK_SIZE;
> + src += AES_BLOCK_SIZE;
> + } while (--blocks > 0);
> +
> + ghash_do_update(walk.nbytes / AES_BLOCK_SIZE, dg,
> + walk.dst.virt.addr, &ctx->ghash_key,
> + NULL);
> +
> + err = skcipher_walk_done(&walk,
> + walk.nbytes % AES_BLOCK_SIZE);
> + }
> + if (walk.nbytes)
> + crypto_aes_encrypt(&ctx->aes_key, ks, iv);
> + }
> +
> + /* handle the tail */
> + if (walk.nbytes) {
> + u8 buf[GHASH_BLOCK_SIZE];
> +
> + if (walk.dst.virt.addr != walk.src.virt.addr)
> + memcpy(walk.dst.virt.addr, walk.src.virt.addr,
> + walk.nbytes);
> + crypto_xor(walk.dst.virt.addr, ks, walk.nbytes);
> +
> + memcpy(buf, walk.dst.virt.addr, walk.nbytes);
> + memset(buf + walk.nbytes, 0, GHASH_BLOCK_SIZE - walk.nbytes);
> + ghash_do_update(1, dg, buf, &ctx->ghash_key, NULL);
> +
> + err = skcipher_walk_done(&walk, 0);
> + }
> +
> + if (err)
> + return err;
> +
> + gcm_final(req, ctx, dg, tag, req->cryptlen);
> +
> + /* copy authtag to end of dst */
> + scatterwalk_map_and_copy(tag, req->dst, req->assoclen + req->cryptlen,
> + crypto_aead_authsize(aead), 1);
> +
> + return 0;
> +}
> +
> +static int gcm_decrypt(struct aead_request *req)
> +{
> + struct crypto_aead *aead = crypto_aead_reqtfm(req);
> + struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead);
> + unsigned int authsize = crypto_aead_authsize(aead);
> + struct skcipher_walk walk;
> + u8 iv[AES_BLOCK_SIZE];
> + u8 tag[AES_BLOCK_SIZE];
> + u8 buf[GHASH_BLOCK_SIZE];
> + u64 dg[2] = {};
> + int err;
> +
> + if (req->assoclen)
> + gcm_calculate_auth_mac(req, dg);
> +
> + memcpy(iv, req->iv, GCM_IV_SIZE);
> + put_unaligned_be32(1, iv + GCM_IV_SIZE);
> +
> + if (likely(may_use_simd())) {
> + kernel_neon_begin();
> +
> + pmull_gcm_encrypt_block(tag, iv, ctx->aes_key.key_enc,
> + num_rounds(&ctx->aes_key));
> + put_unaligned_be32(2, iv + GCM_IV_SIZE);
> +
> + err = skcipher_walk_aead_decrypt(&walk, req, true);
> +
> + while (walk.nbytes >= AES_BLOCK_SIZE) {
> + int blocks = walk.nbytes / AES_BLOCK_SIZE;
> +
> + pmull_gcm_decrypt(blocks, dg, walk.dst.virt.addr,
> + walk.src.virt.addr, &ctx->ghash_key,
> + iv, num_rounds(&ctx->aes_key));
> +
> + err = skcipher_walk_done(&walk,
> + walk.nbytes % AES_BLOCK_SIZE);
> + }
> + if (walk.nbytes)
> + pmull_gcm_encrypt_block(iv, iv, NULL,
> + num_rounds(&ctx->aes_key));
> +
> + kernel_neon_end();
> + } else {
> + crypto_aes_encrypt(&ctx->aes_key, tag, iv);
> + put_unaligned_be32(2, iv + GCM_IV_SIZE);
> +
> + err = skcipher_walk_aead_decrypt(&walk, req, true);
> +
> + while (walk.nbytes >= AES_BLOCK_SIZE) {
> + int blocks = walk.nbytes / AES_BLOCK_SIZE;
> + u8 *dst = walk.dst.virt.addr;
> + u8 *src = walk.src.virt.addr;
> +
> + ghash_do_update(blocks, dg, walk.src.virt.addr,
> + &ctx->ghash_key, NULL);
> +
> + do {
> + crypto_aes_encrypt(&ctx->aes_key, buf, iv);
> + if (dst != src)
> + memcpy(dst, src, AES_BLOCK_SIZE);
> + crypto_xor(dst, buf, AES_BLOCK_SIZE);
> + crypto_inc(iv, AES_BLOCK_SIZE);
> +
> + dst += AES_BLOCK_SIZE;
> + src += AES_BLOCK_SIZE;
> + } while (--blocks > 0);
> +
> + err = skcipher_walk_done(&walk,
> + walk.nbytes % AES_BLOCK_SIZE);
> + }
> + if (walk.nbytes)
> + crypto_aes_encrypt(&ctx->aes_key, iv, iv);
> + }
> +
> + /* handle the tail */
> + if (walk.nbytes) {
> + memcpy(buf, walk.src.virt.addr, walk.nbytes);
> + memset(buf + walk.nbytes, 0, GHASH_BLOCK_SIZE - walk.nbytes);
> + ghash_do_update(1, dg, buf, &ctx->ghash_key, NULL);
> +
> + if (walk.dst.virt.addr != walk.src.virt.addr)
> + memcpy(walk.dst.virt.addr, walk.src.virt.addr,
> + walk.nbytes);
> + crypto_xor(walk.dst.virt.addr, iv, walk.nbytes);
> +
> + err = skcipher_walk_done(&walk, 0);
> + }
> +
> + if (err)
> + return err;
> +
> + gcm_final(req, ctx, dg, tag, req->cryptlen - authsize);
> +
> + /* compare calculated auth tag with the stored one */
> + scatterwalk_map_and_copy(buf, req->src,
> + req->assoclen + req->cryptlen - authsize,
> + authsize, 0);
> +
> + if (crypto_memneq(tag, buf, authsize))
> + return -EBADMSG;
> + return 0;
> +}
> +
> +static struct aead_alg gcm_aes_alg = {
> + .ivsize = GCM_IV_SIZE,
> + .chunksize = AES_BLOCK_SIZE,
> + .maxauthsize = AES_BLOCK_SIZE,
> + .setkey = gcm_setkey,
> + .setauthsize = gcm_setauthsize,
> + .encrypt = gcm_encrypt,
> + .decrypt = gcm_decrypt,
> +
> + .base.cra_name = "gcm(aes)",
> + .base.cra_driver_name = "gcm-aes-ce",
> + .base.cra_priority = 300,
> + .base.cra_blocksize = 1,
> + .base.cra_ctxsize = sizeof(struct gcm_aes_ctx),
> + .base.cra_module = THIS_MODULE,
> +};
> +
> +static int __init ghash_ce_mod_init(void)
> +{ int ret;
> +
> + ret = crypto_register_shash(&ghash_alg);
> + if (ret)
> + return ret;
> +
> + ret = crypto_register_aead(&gcm_aes_alg);
> + if (ret)
> + crypto_unregister_shash(&ghash_alg);
> + return ret;
> }
>
> static void __exit ghash_ce_mod_exit(void)
> {
> + crypto_unregister_aead(&gcm_aes_alg);
> crypto_unregister_shash(&ghash_alg);
> }
>
> --
> 2.9.3
>
>
> Generic GCM wrapper around AES-CTR and GHASH (using AES and PMULL instructions)
> ===============================================================================
>
> testing speed of gcm(aes) (gcm_base(ctr-aes-ce,ghash-ce)) encryption
> test 0 (128 bit key, 16 byte blocks): 1133407 operations in 1 seconds (18134512 bytes)
> test 1 (128 bit key, 64 byte blocks): 1025997 operations in 1 seconds (65663808 bytes)
> test 2 (128 bit key, 256 byte blocks): 768971 operations in 1 seconds (196856576 bytes)
> test 3 (128 bit key, 512 byte blocks): 577197 operations in 1 seconds (295524864 bytes)
> test 4 (128 bit key, 1024 byte blocks): 390516 operations in 1 seconds (399888384 bytes)
> test 5 (128 bit key, 2048 byte blocks): 237002 operations in 1 seconds (485380096 bytes)
> test 6 (128 bit key, 4096 byte blocks): 132590 operations in 1 seconds (543088640 bytes)
> test 7 (128 bit key, 8192 byte blocks): 69495 operations in 1 seconds (569303040 bytes)
> test 8 (192 bit key, 16 byte blocks): 1108665 operations in 1 seconds (17738640 bytes)
> test 9 (192 bit key, 64 byte blocks): 1054793 operations in 1 seconds (67506752 bytes)
> test 10 (192 bit key, 256 byte blocks): 759134 operations in 1 seconds (194338304 bytes)
> test 11 (192 bit key, 512 byte blocks): 565960 operations in 1 seconds (289771520 bytes)
> test 12 (192 bit key, 1024 byte blocks): 380881 operations in 1 seconds (390022144 bytes)
> test 13 (192 bit key, 2048 byte blocks): 231188 operations in 1 seconds (473473024 bytes)
> test 14 (192 bit key, 4096 byte blocks): 128310 operations in 1 seconds (525557760 bytes)
> test 15 (192 bit key, 8192 byte blocks): 67436 operations in 1 seconds (552435712 bytes)
> test 16 (256 bit key, 16 byte blocks): 1122946 operations in 1 seconds (17967136 bytes)
> test 17 (256 bit key, 64 byte blocks): 1006653 operations in 1 seconds (64425792 bytes)
> test 18 (256 bit key, 256 byte blocks): 744818 operations in 1 seconds (190673408 bytes)
> test 19 (256 bit key, 512 byte blocks): 553923 operations in 1 seconds (283608576 bytes)
> test 20 (256 bit key, 1024 byte blocks): 371402 operations in 1 seconds (380315648 bytes)
> test 21 (256 bit key, 2048 byte blocks): 223312 operations in 1 seconds (457342976 bytes)
> test 22 (256 bit key, 4096 byte blocks): 123945 operations in 1 seconds (507678720 bytes)
> test 23 (256 bit key, 8192 byte blocks): 64935 operations in 1 seconds (531947520 bytes)
>
> Native GCM module with block level interleave of AES-CTR and GHASH
> ==================================================================
>
> testing speed of gcm(aes) (gcm-aes-ce) encryption
> test 0 (128 bit key, 16 byte blocks): 1860711 operations in 1 seconds (29771376 bytes)
> test 1 (128 bit key, 64 byte blocks): 1573017 operations in 1 seconds (100673088 bytes)
> test 2 (128 bit key, 256 byte blocks): 1136989 operations in 1 seconds (291069184 bytes)
> test 3 (128 bit key, 512 byte blocks): 840846 operations in 1 seconds (430513152 bytes)
> test 4 (128 bit key, 1024 byte blocks): 548205 operations in 1 seconds (561361920 bytes)
> test 5 (128 bit key, 2048 byte blocks): 328413 operations in 1 seconds (672589824 bytes)
> test 6 (128 bit key, 4096 byte blocks): 181673 operations in 1 seconds (744132608 bytes)
> test 7 (128 bit key, 8192 byte blocks): 94986 operations in 1 seconds (778125312 bytes)
> test 8 (192 bit key, 16 byte blocks): 1837762 operations in 1 seconds (29404192 bytes)
> test 9 (192 bit key, 64 byte blocks): 1537458 operations in 1 seconds (98397312 bytes)
> test 10 (192 bit key, 256 byte blocks): 1087589 operations in 1 seconds (278422784 bytes)
> test 11 (192 bit key, 512 byte blocks): 807194 operations in 1 seconds (413283328 bytes)
> test 12 (192 bit key, 1024 byte blocks): 524966 operations in 1 seconds (537565184 bytes)
> test 13 (192 bit key, 2048 byte blocks): 312338 operations in 1 seconds (639668224 bytes)
> test 14 (192 bit key, 4096 byte blocks): 173324 operations in 1 seconds (709935104 bytes)
> test 15 (192 bit key, 8192 byte blocks): 90857 operations in 1 seconds (744300544 bytes)
> test 16 (256 bit key, 16 byte blocks): 1798971 operations in 1 seconds (28783536 bytes)
> test 17 (256 bit key, 64 byte blocks): 1497989 operations in 1 seconds (95871296 bytes)
> test 18 (256 bit key, 256 byte blocks): 1058926 operations in 1 seconds (271085056 bytes)
> test 19 (256 bit key, 512 byte blocks): 775609 operations in 1 seconds (397111808 bytes)
> test 20 (256 bit key, 1024 byte blocks): 492267 operations in 1 seconds (504081408 bytes)
> test 21 (256 bit key, 2048 byte blocks): 294868 operations in 1 seconds (603889664 bytes)
> test 22 (256 bit key, 4096 byte blocks): 161802 operations in 1 seconds (662740992 bytes)
> test 23 (256 bit key, 8192 byte blocks): 84664 operations in 1 seconds (693567488 bytes)
More information about the linux-arm-kernel
mailing list