[RFT PATCH v2] crypto: arm64/gcm - implement native driver using v8 Crypto Extensions

Ard Biesheuvel ard.biesheuvel at linaro.org
Fri Jun 30 12:09:07 PDT 2017


On 30 June 2017 at 11:32, Ard Biesheuvel <ard.biesheuvel at linaro.org> wrote:
> Currently, the AES-GCM implementation for arm64 systems that support the
> ARMv8 Crypto Extensions is based on the generic GCM module, which combines
> the AES-CTR implementation using AES instructions with the PMULL based
> GHASH driver. This is suboptimal, given the fact that the input data needs
> to be loaded twice, once for the encryption and again for the MAC
> calculation.
>
> On Cortex-A57 (r1p2) and other recent cores that implement micro-op fusing
> for the AES instructions, AES executes at less than 1 cycle per byte, which
> means that any cycles wasted on loading the data twice hurt even more.
>
> So implement a new GCM driver that combines the AES and PMULL instructions
> at the block level. This improves performance on Cortex-A57 by ~27% (from


37% not 27%

> 3.5 cpb to 2.6 cpb)
>
> Signed-off-by: Ard Biesheuvel <ard.biesheuvel at linaro.org>
> ---
> v2: - rebase onto non-upstream arm64 SIMD refactoring branch
>       (https://git.kernel.org/pub/scm/linux/kernel/git/ardb/linux.git/log/?h=arm64-gcm)
>     - implement non-SIMD fallback
>     - remove accelerated AES routines from setkey() path
>     - use be32() accessors instead of open-coded array assignments
>     - remove redundant round key loads
>
> Raw numbers measured on a 2GHz AMD Overdrive B1 can be found after he patch.
>
>  arch/arm64/crypto/Kconfig         |   3 +-
>  arch/arm64/crypto/ghash-ce-core.S | 175 ++++++++
>  arch/arm64/crypto/ghash-ce-glue.c | 436 ++++++++++++++++++--
>  3 files changed, 587 insertions(+), 27 deletions(-)
>
> diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
> index a669dedc8767..3e5b39b79fb9 100644
> --- a/arch/arm64/crypto/Kconfig
> +++ b/arch/arm64/crypto/Kconfig
> @@ -29,10 +29,11 @@ config CRYPTO_SHA2_ARM64_CE
>         select CRYPTO_SHA256_ARM64
>
>  config CRYPTO_GHASH_ARM64_CE
> -       tristate "GHASH (for GCM chaining mode) using ARMv8 Crypto Extensions"
> +       tristate "GHASH/AES-GCM using ARMv8 Crypto Extensions"
>         depends on KERNEL_MODE_NEON
>         select CRYPTO_HASH
>         select CRYPTO_GF128MUL
> +       select CRYPTO_AES
>
>  config CRYPTO_CRCT10DIF_ARM64_CE
>         tristate "CRCT10DIF digest algorithm using PMULL instructions"
> diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S
> index f0bb9f0b524f..cb22459eba85 100644
> --- a/arch/arm64/crypto/ghash-ce-core.S
> +++ b/arch/arm64/crypto/ghash-ce-core.S
> @@ -77,3 +77,178 @@ CPU_LE(     rev64           T1.16b, T1.16b  )
>         st1             {XL.2d}, [x1]
>         ret
>  ENDPROC(pmull_ghash_update)
> +
> +       KS              .req    v8
> +       CTR             .req    v9
> +       INP             .req    v10
> +
> +       .macro          load_round_keys, rounds, rk
> +       cmp             \rounds, #12
> +       blo             2222f           /* 128 bits */
> +       beq             1111f           /* 192 bits */
> +       ld1             {v17.4s-v18.4s}, [\rk], #32
> +1111:  ld1             {v19.4s-v20.4s}, [\rk], #32
> +2222:  ld1             {v21.4s-v24.4s}, [\rk], #64
> +       ld1             {v25.4s-v28.4s}, [\rk], #64
> +       ld1             {v29.4s-v31.4s}, [\rk]
> +       .endm
> +
> +       .macro          enc_round, state, key
> +       aese            \state\().16b, \key\().16b
> +       aesmc           \state\().16b, \state\().16b
> +       .endm
> +
> +       .macro          enc_block, state, rounds
> +       cmp             \rounds, #12
> +       b.lo            2222f           /* 128 bits */
> +       b.eq            1111f           /* 192 bits */
> +       enc_round       \state, v17
> +       enc_round       \state, v18
> +1111:  enc_round       \state, v19
> +       enc_round       \state, v20
> +2222:  .irp            key, v21, v22, v23, v24, v25, v26, v27, v28, v29
> +       enc_round       \state, \key
> +       .endr
> +       aese            \state\().16b, v30.16b
> +       eor             \state\().16b, \state\().16b, v31.16b
> +       .endm
> +
> +       .macro          pmull_gcm_do_crypt, enc
> +       ld1             {SHASH.2d}, [x4]
> +       ld1             {XL.2d}, [x1]
> +       ldr             x8, [x5, #8]                    // load lower counter
> +
> +       movi            MASK.16b, #0xe1
> +       ext             SHASH2.16b, SHASH.16b, SHASH.16b, #8
> +CPU_LE(        rev             x8, x8          )
> +       shl             MASK.2d, MASK.2d, #57
> +       eor             SHASH2.16b, SHASH2.16b, SHASH.16b
> +
> +       .if             \enc == 1
> +       ld1             {KS.16b}, [x7]
> +       .endif
> +
> +0:     ld1             {CTR.8b}, [x5]                  // load upper counter
> +       ld1             {INP.16b}, [x3], #16
> +       rev             x9, x8
> +       add             x8, x8, #1
> +       sub             w0, w0, #1
> +       ins             CTR.d[1], x9                    // set lower counter
> +
> +       .if             \enc == 1
> +       eor             INP.16b, INP.16b, KS.16b        // encrypt input
> +       st1             {INP.16b}, [x2], #16
> +       .endif
> +
> +       rev64           T1.16b, INP.16b
> +
> +       cmp             w6, #12
> +       b.ge            2f                              // AES-192/256?
> +
> +1:     enc_round       CTR, v21
> +
> +       ext             T2.16b, XL.16b, XL.16b, #8
> +       ext             IN1.16b, T1.16b, T1.16b, #8
> +
> +       enc_round       CTR, v22
> +
> +       eor             T1.16b, T1.16b, T2.16b
> +       eor             XL.16b, XL.16b, IN1.16b
> +
> +       enc_round       CTR, v23
> +
> +       pmull2          XH.1q, SHASH.2d, XL.2d          // a1 * b1
> +       eor             T1.16b, T1.16b, XL.16b
> +
> +       enc_round       CTR, v24
> +
> +       pmull           XL.1q, SHASH.1d, XL.1d          // a0 * b0
> +       pmull           XM.1q, SHASH2.1d, T1.1d         // (a1 + a0)(b1 + b0)
> +
> +       enc_round       CTR, v25
> +
> +       ext             T1.16b, XL.16b, XH.16b, #8
> +       eor             T2.16b, XL.16b, XH.16b
> +       eor             XM.16b, XM.16b, T1.16b
> +
> +       enc_round       CTR, v26
> +
> +       eor             XM.16b, XM.16b, T2.16b
> +       pmull           T2.1q, XL.1d, MASK.1d
> +
> +       enc_round       CTR, v27
> +
> +       mov             XH.d[0], XM.d[1]
> +       mov             XM.d[1], XL.d[0]
> +
> +       enc_round       CTR, v28
> +
> +       eor             XL.16b, XM.16b, T2.16b
> +
> +       enc_round       CTR, v29
> +
> +       ext             T2.16b, XL.16b, XL.16b, #8
> +
> +       aese            CTR.16b, v30.16b
> +
> +       pmull           XL.1q, XL.1d, MASK.1d
> +       eor             T2.16b, T2.16b, XH.16b
> +
> +       eor             KS.16b, CTR.16b, v31.16b
> +
> +       eor             XL.16b, XL.16b, T2.16b
> +
> +       .if             \enc == 0
> +       eor             INP.16b, INP.16b, KS.16b
> +       st1             {INP.16b}, [x2], #16
> +       .endif
> +
> +       cbnz            w0, 0b
> +
> +CPU_LE(        rev             x8, x8          )
> +       st1             {XL.2d}, [x1]
> +       str             x8, [x5, #8]                    // store lower counter
> +
> +       .if             \enc == 1
> +       st1             {KS.16b}, [x7]
> +       .endif
> +
> +       ret
> +
> +2:     b.eq            3f                              // AES-192?
> +       enc_round       CTR, v17
> +       enc_round       CTR, v18
> +3:     enc_round       CTR, v19
> +       enc_round       CTR, v20
> +       b               1b
> +       .endm
> +
> +       /*
> +        * void pmull_gcm_encrypt(int blocks, u64 dg[], u8 dst[], const u8 src[],
> +        *                        struct ghash_key const *k, u8 ctr[],
> +        *                        int rounds, u8 ks[])
> +        */
> +ENTRY(pmull_gcm_encrypt)
> +       pmull_gcm_do_crypt      1
> +ENDPROC(pmull_gcm_encrypt)
> +
> +       /*
> +        * void pmull_gcm_decrypt(int blocks, u64 dg[], u8 dst[], const u8 src[],
> +        *                        struct ghash_key const *k, u8 ctr[],
> +        *                        int rounds)
> +        */
> +ENTRY(pmull_gcm_decrypt)
> +       pmull_gcm_do_crypt      0
> +ENDPROC(pmull_gcm_decrypt)
> +
> +       /*
> +        * void pmull_gcm_encrypt_block(u8 dst[], u8 src[], u8 rk[], int rounds)
> +        */
> +ENTRY(pmull_gcm_encrypt_block)
> +       cbz             x2, 0f
> +       load_round_keys w3, x2
> +0:     ld1             {v0.16b}, [x1]
> +       enc_block       v0, w3
> +       st1             {v0.16b}, [x0]
> +       ret
> +ENDPROC(pmull_gcm_encrypt_block)
> diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
> index 30221ef56e70..85ff57e789ff 100644
> --- a/arch/arm64/crypto/ghash-ce-glue.c
> +++ b/arch/arm64/crypto/ghash-ce-glue.c
> @@ -11,18 +11,25 @@
>  #include <asm/neon.h>
>  #include <asm/simd.h>
>  #include <asm/unaligned.h>
> +#include <crypto/aes.h>
> +#include <crypto/algapi.h>
> +#include <crypto/b128ops.h>
>  #include <crypto/gf128mul.h>
> +#include <crypto/internal/aead.h>
>  #include <crypto/internal/hash.h>
> +#include <crypto/internal/skcipher.h>
> +#include <crypto/scatterwalk.h>
>  #include <linux/cpufeature.h>
>  #include <linux/crypto.h>
>  #include <linux/module.h>
>
> -MODULE_DESCRIPTION("GHASH secure hash using ARMv8 Crypto Extensions");
> +MODULE_DESCRIPTION("GHASH and AES-GCM using ARMv8 Crypto Extensions");
>  MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel at linaro.org>");
>  MODULE_LICENSE("GPL v2");
>
>  #define GHASH_BLOCK_SIZE       16
>  #define GHASH_DIGEST_SIZE      16
> +#define GCM_IV_SIZE            12
>
>  struct ghash_key {
>         u64 a;
> @@ -36,9 +43,25 @@ struct ghash_desc_ctx {
>         u32 count;
>  };
>
> +struct gcm_aes_ctx {
> +       struct crypto_aes_ctx   aes_key;
> +       struct ghash_key        ghash_key;
> +};
> +
>  asmlinkage void pmull_ghash_update(int blocks, u64 dg[], const char *src,
>                                    struct ghash_key const *k, const char *head);
>
> +asmlinkage void pmull_gcm_encrypt(int blocks, u64 dg[], u8 dst[],
> +                                 const u8 src[], struct ghash_key const *k,
> +                                 u8 ctr[], int rounds, u8 ks[]);
> +
> +asmlinkage void pmull_gcm_decrypt(int blocks, u64 dg[], u8 dst[],
> +                                 const u8 src[], struct ghash_key const *k,
> +                                 u8 ctr[], int rounds);
> +
> +asmlinkage void pmull_gcm_encrypt_block(u8 dst[], u8 const src[],
> +                                       u32 const rk[], int rounds);
> +
>  static int ghash_init(struct shash_desc *desc)
>  {
>         struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
> @@ -130,17 +153,11 @@ static int ghash_final(struct shash_desc *desc, u8 *dst)
>         return 0;
>  }
>
> -static int ghash_setkey(struct crypto_shash *tfm,
> -                       const u8 *inkey, unsigned int keylen)
> +static int __ghash_setkey(struct ghash_key *key,
> +                         const u8 *inkey, unsigned int keylen)
>  {
> -       struct ghash_key *key = crypto_shash_ctx(tfm);
>         u64 a, b;
>
> -       if (keylen != GHASH_BLOCK_SIZE) {
> -               crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
> -               return -EINVAL;
> -       }
> -
>         /* needed for the fallback */
>         memcpy(&key->k, inkey, GHASH_BLOCK_SIZE);
>
> @@ -157,31 +174,398 @@ static int ghash_setkey(struct crypto_shash *tfm,
>         return 0;
>  }
>
> +static int ghash_setkey(struct crypto_shash *tfm,
> +                       const u8 *inkey, unsigned int keylen)
> +{
> +       struct ghash_key *key = crypto_shash_ctx(tfm);
> +
> +       if (keylen != GHASH_BLOCK_SIZE) {
> +               crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
> +               return -EINVAL;
> +       }
> +
> +       return __ghash_setkey(key, inkey, keylen);
> +}
> +
>  static struct shash_alg ghash_alg = {
> -       .digestsize     = GHASH_DIGEST_SIZE,
> -       .init           = ghash_init,
> -       .update         = ghash_update,
> -       .final          = ghash_final,
> -       .setkey         = ghash_setkey,
> -       .descsize       = sizeof(struct ghash_desc_ctx),
> -       .base           = {
> -               .cra_name               = "ghash",
> -               .cra_driver_name        = "ghash-ce",
> -               .cra_priority           = 200,
> -               .cra_flags              = CRYPTO_ALG_TYPE_SHASH,
> -               .cra_blocksize          = GHASH_BLOCK_SIZE,
> -               .cra_ctxsize            = sizeof(struct ghash_key),
> -               .cra_module             = THIS_MODULE,
> -       },
> +       .base.cra_name          = "ghash",
> +       .base.cra_driver_name   = "ghash-ce",
> +       .base.cra_priority      = 200,
> +       .base.cra_flags         = CRYPTO_ALG_TYPE_SHASH,
> +       .base.cra_blocksize     = GHASH_BLOCK_SIZE,
> +       .base.cra_ctxsize       = sizeof(struct ghash_key),
> +       .base.cra_module        = THIS_MODULE,
> +
> +       .digestsize             = GHASH_DIGEST_SIZE,
> +       .init                   = ghash_init,
> +       .update                 = ghash_update,
> +       .final                  = ghash_final,
> +       .setkey                 = ghash_setkey,
> +       .descsize               = sizeof(struct ghash_desc_ctx),
>  };
>
> -static int __init ghash_ce_mod_init(void)
> +static int num_rounds(struct crypto_aes_ctx *ctx)
> +{
> +       /*
> +        * # of rounds specified by AES:
> +        * 128 bit key          10 rounds
> +        * 192 bit key          12 rounds
> +        * 256 bit key          14 rounds
> +        * => n byte key        => 6 + (n/4) rounds
> +        */
> +       return 6 + ctx->key_length / 4;
> +}
> +
> +static int gcm_setkey(struct crypto_aead *tfm, const u8 *inkey,
> +                     unsigned int keylen)
> +{
> +       struct gcm_aes_ctx *ctx = crypto_aead_ctx(tfm);
> +       u8 key[GHASH_BLOCK_SIZE];
> +       int ret;
> +
> +       ret = crypto_aes_expand_key(&ctx->aes_key, inkey, keylen);
> +       if (ret) {
> +               tfm->base.crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
> +               return -EINVAL;
> +       }
> +
> +       crypto_aes_encrypt(&ctx->aes_key, key, (u8[AES_BLOCK_SIZE]){});
> +
> +       return __ghash_setkey(&ctx->ghash_key, key, sizeof(key));
> +}
> +
> +static int gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
> +{
> +       switch (authsize) {
> +       case 4:
> +       case 8:
> +       case 12 ... 16:
> +               break;
> +       default:
> +               return -EINVAL;
> +       }
> +       return 0;
> +}
> +
> +static void gcm_update_mac(u64 dg[], const u8 *src, int count, u8 buf[],
> +                          int *buf_count, struct gcm_aes_ctx *ctx)
> +{
> +       if (*buf_count > 0) {
> +               int buf_added = min(count, GHASH_BLOCK_SIZE - *buf_count);
> +
> +               memcpy(&buf[*buf_count], src, buf_added);
> +
> +               *buf_count += buf_added;
> +               src += buf_added;
> +               count -= buf_added;
> +       }
> +
> +       if (count >= GHASH_BLOCK_SIZE || *buf_count == GHASH_BLOCK_SIZE) {
> +               int blocks = count / GHASH_BLOCK_SIZE;
> +
> +               ghash_do_update(blocks, dg, src, &ctx->ghash_key,
> +                               *buf_count ? buf : NULL);
> +
> +               src += blocks * GHASH_BLOCK_SIZE;
> +               count %= GHASH_BLOCK_SIZE;
> +               *buf_count = 0;
> +       }
> +
> +       if (count > 0) {
> +               memcpy(buf, src, count);
> +               *buf_count = count;
> +       }
> +}
> +
> +static void gcm_calculate_auth_mac(struct aead_request *req, u64 dg[])
> +{
> +       struct crypto_aead *aead = crypto_aead_reqtfm(req);
> +       struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead);
> +       u8 buf[GHASH_BLOCK_SIZE];
> +       struct scatter_walk walk;
> +       u32 len = req->assoclen;
> +       int buf_count = 0;
> +
> +       scatterwalk_start(&walk, req->src);
> +
> +       do {
> +               u32 n = scatterwalk_clamp(&walk, len);
> +               u8 *p;
> +
> +               if (!n) {
> +                       scatterwalk_start(&walk, sg_next(walk.sg));
> +                       n = scatterwalk_clamp(&walk, len);
> +               }
> +               p = scatterwalk_map(&walk);
> +
> +               gcm_update_mac(dg, p, n, buf, &buf_count, ctx);
> +               len -= n;
> +
> +               scatterwalk_unmap(p);
> +               scatterwalk_advance(&walk, n);
> +               scatterwalk_done(&walk, 0, len);
> +       } while (len);
> +
> +       if (buf_count) {
> +               memset(&buf[buf_count], 0, GHASH_BLOCK_SIZE - buf_count);
> +               ghash_do_update(1, dg, buf, &ctx->ghash_key, NULL);
> +       }
> +}
> +
> +static void gcm_final(struct aead_request *req, struct gcm_aes_ctx *ctx,
> +                     u64 dg[], u8 tag[], int cryptlen)
> +{
> +       u8 mac[AES_BLOCK_SIZE];
> +       u128 lengths;
> +
> +       lengths.a = cpu_to_be64(req->assoclen * 8);
> +       lengths.b = cpu_to_be64(cryptlen * 8);
> +
> +       ghash_do_update(1, dg, (void *)&lengths, &ctx->ghash_key, NULL);
> +
> +       put_unaligned_be64(dg[1], mac);
> +       put_unaligned_be64(dg[0], mac + 8);
> +
> +       crypto_xor(tag, mac, AES_BLOCK_SIZE);
> +}
> +
> +static int gcm_encrypt(struct aead_request *req)
>  {
> -       return crypto_register_shash(&ghash_alg);
> +       struct crypto_aead *aead = crypto_aead_reqtfm(req);
> +       struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead);
> +       struct skcipher_walk walk;
> +       u8 iv[AES_BLOCK_SIZE];
> +       u8 ks[AES_BLOCK_SIZE];
> +       u8 tag[AES_BLOCK_SIZE];
> +       u64 dg[2] = {};
> +       int err;
> +
> +       if (req->assoclen)
> +               gcm_calculate_auth_mac(req, dg);
> +
> +       memcpy(iv, req->iv, GCM_IV_SIZE);
> +       put_unaligned_be32(1, iv + GCM_IV_SIZE);
> +
> +       if (likely(may_use_simd())) {
> +               kernel_neon_begin();
> +
> +               pmull_gcm_encrypt_block(tag, iv, ctx->aes_key.key_enc,
> +                                       num_rounds(&ctx->aes_key));
> +               put_unaligned_be32(2, iv + GCM_IV_SIZE);
> +               pmull_gcm_encrypt_block(ks, iv, NULL,
> +                                       num_rounds(&ctx->aes_key));
> +               put_unaligned_be32(3, iv + GCM_IV_SIZE);
> +
> +               err = skcipher_walk_aead_encrypt(&walk, req, true);
> +
> +               while (walk.nbytes >= AES_BLOCK_SIZE) {
> +                       int blocks = walk.nbytes / AES_BLOCK_SIZE;
> +
> +                       pmull_gcm_encrypt(blocks, dg, walk.dst.virt.addr,
> +                                         walk.src.virt.addr, &ctx->ghash_key,
> +                                         iv, num_rounds(&ctx->aes_key), ks);
> +
> +                       err = skcipher_walk_done(&walk,
> +                                                walk.nbytes % AES_BLOCK_SIZE);
> +               }
> +               kernel_neon_end();
> +       } else {
> +               crypto_aes_encrypt(&ctx->aes_key, tag, iv);
> +               put_unaligned_be32(2, iv + GCM_IV_SIZE);
> +
> +               err = skcipher_walk_aead_encrypt(&walk, req, true);
> +
> +               while (walk.nbytes >= AES_BLOCK_SIZE) {
> +                       int blocks = walk.nbytes / AES_BLOCK_SIZE;
> +                       u8 *dst = walk.dst.virt.addr;
> +                       u8 *src = walk.src.virt.addr;
> +
> +                       do {
> +                               crypto_aes_encrypt(&ctx->aes_key, ks, iv);
> +                               if (dst != src)
> +                                       memcpy(dst, src, AES_BLOCK_SIZE);
> +                               crypto_xor(dst, ks, AES_BLOCK_SIZE);
> +                               crypto_inc(iv, AES_BLOCK_SIZE);
> +
> +                               dst += AES_BLOCK_SIZE;
> +                               src += AES_BLOCK_SIZE;
> +                       } while (--blocks > 0);
> +
> +                       ghash_do_update(walk.nbytes / AES_BLOCK_SIZE, dg,
> +                                       walk.dst.virt.addr, &ctx->ghash_key,
> +                                       NULL);
> +
> +                       err = skcipher_walk_done(&walk,
> +                                                walk.nbytes % AES_BLOCK_SIZE);
> +               }
> +               if (walk.nbytes)
> +                       crypto_aes_encrypt(&ctx->aes_key, ks, iv);
> +       }
> +
> +       /* handle the tail */
> +       if (walk.nbytes) {
> +               u8 buf[GHASH_BLOCK_SIZE];
> +
> +               if (walk.dst.virt.addr != walk.src.virt.addr)
> +                       memcpy(walk.dst.virt.addr, walk.src.virt.addr,
> +                              walk.nbytes);
> +               crypto_xor(walk.dst.virt.addr, ks, walk.nbytes);
> +
> +               memcpy(buf, walk.dst.virt.addr, walk.nbytes);
> +               memset(buf + walk.nbytes, 0, GHASH_BLOCK_SIZE - walk.nbytes);
> +               ghash_do_update(1, dg, buf, &ctx->ghash_key, NULL);
> +
> +               err = skcipher_walk_done(&walk, 0);
> +       }
> +
> +       if (err)
> +               return err;
> +
> +       gcm_final(req, ctx, dg, tag, req->cryptlen);
> +
> +       /* copy authtag to end of dst */
> +       scatterwalk_map_and_copy(tag, req->dst, req->assoclen + req->cryptlen,
> +                                crypto_aead_authsize(aead), 1);
> +
> +       return 0;
> +}
> +
> +static int gcm_decrypt(struct aead_request *req)
> +{
> +       struct crypto_aead *aead = crypto_aead_reqtfm(req);
> +       struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead);
> +       unsigned int authsize = crypto_aead_authsize(aead);
> +       struct skcipher_walk walk;
> +       u8 iv[AES_BLOCK_SIZE];
> +       u8 tag[AES_BLOCK_SIZE];
> +       u8 buf[GHASH_BLOCK_SIZE];
> +       u64 dg[2] = {};
> +       int err;
> +
> +       if (req->assoclen)
> +               gcm_calculate_auth_mac(req, dg);
> +
> +       memcpy(iv, req->iv, GCM_IV_SIZE);
> +       put_unaligned_be32(1, iv + GCM_IV_SIZE);
> +
> +       if (likely(may_use_simd())) {
> +               kernel_neon_begin();
> +
> +               pmull_gcm_encrypt_block(tag, iv, ctx->aes_key.key_enc,
> +                                       num_rounds(&ctx->aes_key));
> +               put_unaligned_be32(2, iv + GCM_IV_SIZE);
> +
> +               err = skcipher_walk_aead_decrypt(&walk, req, true);
> +
> +               while (walk.nbytes >= AES_BLOCK_SIZE) {
> +                       int blocks = walk.nbytes / AES_BLOCK_SIZE;
> +
> +                       pmull_gcm_decrypt(blocks, dg, walk.dst.virt.addr,
> +                                         walk.src.virt.addr, &ctx->ghash_key,
> +                                         iv, num_rounds(&ctx->aes_key));
> +
> +                       err = skcipher_walk_done(&walk,
> +                                                walk.nbytes % AES_BLOCK_SIZE);
> +               }
> +               if (walk.nbytes)
> +                       pmull_gcm_encrypt_block(iv, iv, NULL,
> +                                               num_rounds(&ctx->aes_key));
> +
> +               kernel_neon_end();
> +       } else {
> +               crypto_aes_encrypt(&ctx->aes_key, tag, iv);
> +               put_unaligned_be32(2, iv + GCM_IV_SIZE);
> +
> +               err = skcipher_walk_aead_decrypt(&walk, req, true);
> +
> +               while (walk.nbytes >= AES_BLOCK_SIZE) {
> +                       int blocks = walk.nbytes / AES_BLOCK_SIZE;
> +                       u8 *dst = walk.dst.virt.addr;
> +                       u8 *src = walk.src.virt.addr;
> +
> +                       ghash_do_update(blocks, dg, walk.src.virt.addr,
> +                                       &ctx->ghash_key, NULL);
> +
> +                       do {
> +                               crypto_aes_encrypt(&ctx->aes_key, buf, iv);
> +                               if (dst != src)
> +                                       memcpy(dst, src, AES_BLOCK_SIZE);
> +                               crypto_xor(dst, buf, AES_BLOCK_SIZE);
> +                               crypto_inc(iv, AES_BLOCK_SIZE);
> +
> +                               dst += AES_BLOCK_SIZE;
> +                               src += AES_BLOCK_SIZE;
> +                       } while (--blocks > 0);
> +
> +                       err = skcipher_walk_done(&walk,
> +                                                walk.nbytes % AES_BLOCK_SIZE);
> +               }
> +               if (walk.nbytes)
> +                       crypto_aes_encrypt(&ctx->aes_key, iv, iv);
> +       }
> +
> +       /* handle the tail */
> +       if (walk.nbytes) {
> +               memcpy(buf, walk.src.virt.addr, walk.nbytes);
> +               memset(buf + walk.nbytes, 0, GHASH_BLOCK_SIZE - walk.nbytes);
> +               ghash_do_update(1, dg, buf, &ctx->ghash_key, NULL);
> +
> +               if (walk.dst.virt.addr != walk.src.virt.addr)
> +                       memcpy(walk.dst.virt.addr, walk.src.virt.addr,
> +                              walk.nbytes);
> +               crypto_xor(walk.dst.virt.addr, iv, walk.nbytes);
> +
> +               err = skcipher_walk_done(&walk, 0);
> +       }
> +
> +       if (err)
> +               return err;
> +
> +       gcm_final(req, ctx, dg, tag, req->cryptlen - authsize);
> +
> +       /* compare calculated auth tag with the stored one */
> +       scatterwalk_map_and_copy(buf, req->src,
> +                                req->assoclen + req->cryptlen - authsize,
> +                                authsize, 0);
> +
> +       if (crypto_memneq(tag, buf, authsize))
> +               return -EBADMSG;
> +       return 0;
> +}
> +
> +static struct aead_alg gcm_aes_alg = {
> +       .ivsize                 = GCM_IV_SIZE,
> +       .chunksize              = AES_BLOCK_SIZE,
> +       .maxauthsize            = AES_BLOCK_SIZE,
> +       .setkey                 = gcm_setkey,
> +       .setauthsize            = gcm_setauthsize,
> +       .encrypt                = gcm_encrypt,
> +       .decrypt                = gcm_decrypt,
> +
> +       .base.cra_name          = "gcm(aes)",
> +       .base.cra_driver_name   = "gcm-aes-ce",
> +       .base.cra_priority      = 300,
> +       .base.cra_blocksize     = 1,
> +       .base.cra_ctxsize       = sizeof(struct gcm_aes_ctx),
> +       .base.cra_module        = THIS_MODULE,
> +};
> +
> +static int __init ghash_ce_mod_init(void)
> +{      int ret;
> +
> +       ret = crypto_register_shash(&ghash_alg);
> +       if (ret)
> +               return ret;
> +
> +       ret = crypto_register_aead(&gcm_aes_alg);
> +       if (ret)
> +               crypto_unregister_shash(&ghash_alg);
> +       return ret;
>  }
>
>  static void __exit ghash_ce_mod_exit(void)
>  {
> +       crypto_unregister_aead(&gcm_aes_alg);
>         crypto_unregister_shash(&ghash_alg);
>  }
>
> --
> 2.9.3
>
>
> Generic GCM wrapper around AES-CTR and GHASH (using AES and PMULL instructions)
> ===============================================================================
>
> testing speed of gcm(aes) (gcm_base(ctr-aes-ce,ghash-ce)) encryption
> test 0 (128 bit key, 16 byte blocks): 1133407 operations in 1 seconds (18134512 bytes)
> test 1 (128 bit key, 64 byte blocks): 1025997 operations in 1 seconds (65663808 bytes)
> test 2 (128 bit key, 256 byte blocks): 768971 operations in 1 seconds (196856576 bytes)
> test 3 (128 bit key, 512 byte blocks): 577197 operations in 1 seconds (295524864 bytes)
> test 4 (128 bit key, 1024 byte blocks): 390516 operations in 1 seconds (399888384 bytes)
> test 5 (128 bit key, 2048 byte blocks): 237002 operations in 1 seconds (485380096 bytes)
> test 6 (128 bit key, 4096 byte blocks): 132590 operations in 1 seconds (543088640 bytes)
> test 7 (128 bit key, 8192 byte blocks): 69495 operations in 1 seconds (569303040 bytes)
> test 8 (192 bit key, 16 byte blocks): 1108665 operations in 1 seconds (17738640 bytes)
> test 9 (192 bit key, 64 byte blocks): 1054793 operations in 1 seconds (67506752 bytes)
> test 10 (192 bit key, 256 byte blocks): 759134 operations in 1 seconds (194338304 bytes)
> test 11 (192 bit key, 512 byte blocks): 565960 operations in 1 seconds (289771520 bytes)
> test 12 (192 bit key, 1024 byte blocks): 380881 operations in 1 seconds (390022144 bytes)
> test 13 (192 bit key, 2048 byte blocks): 231188 operations in 1 seconds (473473024 bytes)
> test 14 (192 bit key, 4096 byte blocks): 128310 operations in 1 seconds (525557760 bytes)
> test 15 (192 bit key, 8192 byte blocks): 67436 operations in 1 seconds (552435712 bytes)
> test 16 (256 bit key, 16 byte blocks): 1122946 operations in 1 seconds (17967136 bytes)
> test 17 (256 bit key, 64 byte blocks): 1006653 operations in 1 seconds (64425792 bytes)
> test 18 (256 bit key, 256 byte blocks): 744818 operations in 1 seconds (190673408 bytes)
> test 19 (256 bit key, 512 byte blocks): 553923 operations in 1 seconds (283608576 bytes)
> test 20 (256 bit key, 1024 byte blocks): 371402 operations in 1 seconds (380315648 bytes)
> test 21 (256 bit key, 2048 byte blocks): 223312 operations in 1 seconds (457342976 bytes)
> test 22 (256 bit key, 4096 byte blocks): 123945 operations in 1 seconds (507678720 bytes)
> test 23 (256 bit key, 8192 byte blocks): 64935 operations in 1 seconds (531947520 bytes)
>
> Native GCM module with block level interleave of AES-CTR and GHASH
> ==================================================================
>
> testing speed of gcm(aes) (gcm-aes-ce) encryption
> test 0 (128 bit key, 16 byte blocks): 1860711 operations in 1 seconds (29771376 bytes)
> test 1 (128 bit key, 64 byte blocks): 1573017 operations in 1 seconds (100673088 bytes)
> test 2 (128 bit key, 256 byte blocks): 1136989 operations in 1 seconds (291069184 bytes)
> test 3 (128 bit key, 512 byte blocks): 840846 operations in 1 seconds (430513152 bytes)
> test 4 (128 bit key, 1024 byte blocks): 548205 operations in 1 seconds (561361920 bytes)
> test 5 (128 bit key, 2048 byte blocks): 328413 operations in 1 seconds (672589824 bytes)
> test 6 (128 bit key, 4096 byte blocks): 181673 operations in 1 seconds (744132608 bytes)
> test 7 (128 bit key, 8192 byte blocks): 94986 operations in 1 seconds (778125312 bytes)
> test 8 (192 bit key, 16 byte blocks): 1837762 operations in 1 seconds (29404192 bytes)
> test 9 (192 bit key, 64 byte blocks): 1537458 operations in 1 seconds (98397312 bytes)
> test 10 (192 bit key, 256 byte blocks): 1087589 operations in 1 seconds (278422784 bytes)
> test 11 (192 bit key, 512 byte blocks): 807194 operations in 1 seconds (413283328 bytes)
> test 12 (192 bit key, 1024 byte blocks): 524966 operations in 1 seconds (537565184 bytes)
> test 13 (192 bit key, 2048 byte blocks): 312338 operations in 1 seconds (639668224 bytes)
> test 14 (192 bit key, 4096 byte blocks): 173324 operations in 1 seconds (709935104 bytes)
> test 15 (192 bit key, 8192 byte blocks): 90857 operations in 1 seconds (744300544 bytes)
> test 16 (256 bit key, 16 byte blocks): 1798971 operations in 1 seconds (28783536 bytes)
> test 17 (256 bit key, 64 byte blocks): 1497989 operations in 1 seconds (95871296 bytes)
> test 18 (256 bit key, 256 byte blocks): 1058926 operations in 1 seconds (271085056 bytes)
> test 19 (256 bit key, 512 byte blocks): 775609 operations in 1 seconds (397111808 bytes)
> test 20 (256 bit key, 1024 byte blocks): 492267 operations in 1 seconds (504081408 bytes)
> test 21 (256 bit key, 2048 byte blocks): 294868 operations in 1 seconds (603889664 bytes)
> test 22 (256 bit key, 4096 byte blocks): 161802 operations in 1 seconds (662740992 bytes)
> test 23 (256 bit key, 8192 byte blocks): 84664 operations in 1 seconds (693567488 bytes)



More information about the linux-arm-kernel mailing list