[PATCH v2 2/5] crypto: stm32 - Support for STM32 CRC32 crypto module

Fabien DESSENNE fabien.dessenne at st.com
Fri Mar 24 02:56:04 PDT 2017


On 24/03/17 05:28, PrasannaKumar Muralidharan wrote:
> On 21 March 2017 at 20:43, Fabien Dessenne <fabien.dessenne at st.com> wrote:
>> This module registers a CRC32 ("Ethernet") and a CRC32C (Castagnoli)
>> algorithm that make use of the STMicroelectronics STM32 crypto hardware.
>>
>> Theses algorithms are compatible with the little-endian generic ones.
>> Both algorithms use ~0 as default seed (key).
>> With CRC32C the output is xored with ~0.
>>
>> Using TCRYPT CRC32C speed test, this shows up to 900% speedup compared
>> to the crc32c-generic algorithm.
> Comparing with crc3c-generic alogrithm does not sound like a good
> metric for someone who has to decide between hw crypto or not.
> Wouldn't it be better if the comparison is between crc32 using NEON
> with hw crypto module? It will help in choosing between hw crypto or
> arch optimised crc routiene.

The STM32 microcontrollers are based on ARM Cortex-M7 (or older core) 
that do not have NEON support.
Moreover, the purpose of this introduction is not to provide with a 
(full) benchmark. It just gives a hint of the HW performance.

>> Signed-off-by: Fabien Dessenne <fabien.dessenne at st.com>
>> ---
>>   drivers/crypto/Kconfig             |   2 +
>>   drivers/crypto/Makefile            |   1 +
>>   drivers/crypto/stm32/Kconfig       |   8 +
>>   drivers/crypto/stm32/Makefile      |   2 +
>>   drivers/crypto/stm32/stm32_crc32.c | 324 +++++++++++++++++++++++++++++++++++++
>>   5 files changed, 337 insertions(+)
>>   create mode 100644 drivers/crypto/stm32/Kconfig
>>   create mode 100644 drivers/crypto/stm32/Makefile
>>   create mode 100644 drivers/crypto/stm32/stm32_crc32.c
>>
>> diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
>> index 473d312..922b323 100644
>> --- a/drivers/crypto/Kconfig
>> +++ b/drivers/crypto/Kconfig
>> @@ -619,4 +619,6 @@ config CRYPTO_DEV_BCM_SPU
>>            Secure Processing Unit (SPU). The SPU driver registers ablkcipher,
>>            ahash, and aead algorithms with the kernel cryptographic API.
>>
>> +source "drivers/crypto/stm32/Kconfig"
>> +
>>   endif # CRYPTO_HW
>> diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
>> index 7396094..95bf2f9 100644
>> --- a/drivers/crypto/Makefile
>> +++ b/drivers/crypto/Makefile
>> @@ -30,6 +30,7 @@ obj-$(CONFIG_CRYPTO_DEV_QCE) += qce/
>>   obj-$(CONFIG_CRYPTO_DEV_ROCKCHIP) += rockchip/
>>   obj-$(CONFIG_CRYPTO_DEV_S5P) += s5p-sss.o
>>   obj-$(CONFIG_CRYPTO_DEV_SAHARA) += sahara.o
>> +obj-$(CONFIG_CRYPTO_DEV_STM32) += stm32/
>>   obj-$(CONFIG_CRYPTO_DEV_SUN4I_SS) += sunxi-ss/
>>   obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o
>>   obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/
>> diff --git a/drivers/crypto/stm32/Kconfig b/drivers/crypto/stm32/Kconfig
>> new file mode 100644
>> index 0000000..792335b
>> --- /dev/null
>> +++ b/drivers/crypto/stm32/Kconfig
>> @@ -0,0 +1,8 @@
>> +config CRYPTO_DEV_STM32
>> +       tristate "Support for STM32 crypto accelerators"
>> +       depends on ARCH_STM32
>> +       select CRYPTO_HASH
>> +       help
>> +          This enables support for the CRC32 hw accelerator which can be found
>> +         on STMicroelectronis STM32 SOC.
>> +
>> diff --git a/drivers/crypto/stm32/Makefile b/drivers/crypto/stm32/Makefile
>> new file mode 100644
>> index 0000000..73b4c6e
>> --- /dev/null
>> +++ b/drivers/crypto/stm32/Makefile
>> @@ -0,0 +1,2 @@
>> +obj-$(CONFIG_CRYPTO_DEV_STM32) += stm32_cryp.o
>> +stm32_cryp-objs := stm32_crc32.o
>> diff --git a/drivers/crypto/stm32/stm32_crc32.c b/drivers/crypto/stm32/stm32_crc32.c
>> new file mode 100644
>> index 0000000..7652822
>> --- /dev/null
>> +++ b/drivers/crypto/stm32/stm32_crc32.c
>> @@ -0,0 +1,324 @@
>> +/*
>> + * Copyright (C) STMicroelectronics SA 2017
>> + * Author: Fabien Dessenne <fabien.dessenne at st.com>
>> + * License terms:  GNU General Public License (GPL), version 2
>> + */
>> +
>> +#include <linux/bitrev.h>
>> +#include <linux/clk.h>
>> +#include <linux/module.h>
>> +#include <linux/platform_device.h>
>> +
>> +#include <crypto/internal/hash.h>
>> +
>> +#include <asm/unaligned.h>
>> +
>> +#define DRIVER_NAME             "stm32-crc32"
>> +#define CHKSUM_DIGEST_SIZE      4
>> +#define CHKSUM_BLOCK_SIZE       1
>> +
>> +/* Registers */
>> +#define CRC_DR                  0x00000000
>> +#define CRC_CR                  0x00000008
>> +#define CRC_INIT                0x00000010
>> +#define CRC_POL                 0x00000014
>> +
>> +/* Registers values */
>> +#define CRC_CR_RESET            BIT(0)
>> +#define CRC_CR_REVERSE          (BIT(7) | BIT(6) | BIT(5))
>> +#define CRC_INIT_DEFAULT        0xFFFFFFFF
>> +
>> +/* Polynomial reversed */
>> +#define POLY_CRC32              0xEDB88320
>> +#define POLY_CRC32C             0x82F63B78
>> +
>> +struct stm32_crc {
>> +       struct list_head list;
>> +       struct device    *dev;
>> +       void __iomem     *regs;
>> +       struct clk       *clk;
>> +       u8               pending_data[sizeof(u32)];
>> +       size_t           nb_pending_bytes;
>> +};
>> +
>> +struct stm32_crc_list {
>> +       struct list_head dev_list;
>> +       spinlock_t       lock; /* protect dev_list */
>> +};
>> +
>> +static struct stm32_crc_list crc_list = {
>> +       .dev_list = LIST_HEAD_INIT(crc_list.dev_list),
>> +       .lock     = __SPIN_LOCK_UNLOCKED(crc_list.lock),
>> +};
>> +
>> +struct stm32_crc_ctx {
>> +       u32 key;
>> +       u32 poly;
>> +};
>> +
>> +struct stm32_crc_desc_ctx {
>> +       u32    partial; /* crc32c: partial in first 4 bytes of that struct */
>> +       struct stm32_crc *crc;
>> +};
>> +
>> +static int stm32_crc32_cra_init(struct crypto_tfm *tfm)
>> +{
>> +       struct stm32_crc_ctx *mctx = crypto_tfm_ctx(tfm);
>> +
>> +       mctx->key = CRC_INIT_DEFAULT;
>> +       mctx->poly = POLY_CRC32;
>> +       return 0;
>> +}
>> +
>> +static int stm32_crc32c_cra_init(struct crypto_tfm *tfm)
>> +{
>> +       struct stm32_crc_ctx *mctx = crypto_tfm_ctx(tfm);
>> +
>> +       mctx->key = CRC_INIT_DEFAULT;
>> +       mctx->poly = POLY_CRC32C;
>> +       return 0;
>> +}
>> +
>> +static int stm32_crc_setkey(struct crypto_shash *tfm, const u8 *key,
>> +                           unsigned int keylen)
>> +{
>> +       struct stm32_crc_ctx *mctx = crypto_shash_ctx(tfm);
>> +
>> +       if (keylen != sizeof(u32)) {
>> +               crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
>> +               return -EINVAL;
>> +       }
>> +
>> +       mctx->key = get_unaligned_le32(key);
>> +       return 0;
>> +}
>> +
>> +static int stm32_crc_init(struct shash_desc *desc)
>> +{
>> +       struct stm32_crc_desc_ctx *ctx = shash_desc_ctx(desc);
>> +       struct stm32_crc_ctx *mctx = crypto_shash_ctx(desc->tfm);
>> +       struct stm32_crc *crc;
>> +
>> +       spin_lock_bh(&crc_list.lock);
>> +       list_for_each_entry(crc, &crc_list.dev_list, list) {
>> +               ctx->crc = crc;
>> +               break;
>> +       }
>> +       spin_unlock_bh(&crc_list.lock);
>> +
>> +       /* Reset, set key, poly and configure in bit reverse mode */
>> +       writel(bitrev32(mctx->key), ctx->crc->regs + CRC_INIT);
>> +       writel(bitrev32(mctx->poly), ctx->crc->regs + CRC_POL);
>> +       writel(CRC_CR_RESET | CRC_CR_REVERSE, ctx->crc->regs + CRC_CR);
>> +
>> +       /* Store partial result */
>> +       ctx->partial = readl(ctx->crc->regs + CRC_DR);
>> +       ctx->crc->nb_pending_bytes = 0;
>> +
>> +       return 0;
>> +}
>> +
>> +static int stm32_crc_update(struct shash_desc *desc, const u8 *d8,
>> +                           unsigned int length)
>> +{
>> +       struct stm32_crc_desc_ctx *ctx = shash_desc_ctx(desc);
>> +       struct stm32_crc *crc = ctx->crc;
>> +       u32 *d32;
>> +       unsigned int i;
>> +
>> +       if (unlikely(crc->nb_pending_bytes)) {
>> +               while (crc->nb_pending_bytes != sizeof(u32) && length) {
>> +                       /* Fill in pending data */
>> +                       crc->pending_data[crc->nb_pending_bytes++] = *(d8++);
>> +                       length--;
>> +               }
>> +
>> +               if (crc->nb_pending_bytes == sizeof(u32)) {
>> +                       /* Process completed pending data */
>> +                       writel(*(u32 *)crc->pending_data, crc->regs + CRC_DR);
>> +                       crc->nb_pending_bytes = 0;
>> +               }
>> +       }
>> +
>> +       d32 = (u32 *)d8;
>> +       for (i = 0; i < length >> 2; i++)
>> +               /* Process 32 bits data */
>> +               writel(*(d32++), crc->regs + CRC_DR);
>> +
>> +       /* Store partial result */
>> +       ctx->partial = readl(crc->regs + CRC_DR);
>> +
>> +       /* Check for pending data (non 32 bits) */
>> +       length &= 3;
>> +       if (likely(!length))
>> +               return 0;
>> +
>> +       if ((crc->nb_pending_bytes + length) >= sizeof(u32)) {
>> +               /* Shall not happen */
>> +               dev_err(crc->dev, "Pending data overflow\n");
>> +               return -EINVAL;
>> +       }
>> +
>> +       d8 = (const u8 *)d32;
>> +       for (i = 0; i < length; i++)
>> +               /* Store pending data */
>> +               crc->pending_data[crc->nb_pending_bytes++] = *(d8++);
>> +
>> +       return 0;
>> +}
>> +
>> +static int stm32_crc_final(struct shash_desc *desc, u8 *out)
>> +{
>> +       struct stm32_crc_desc_ctx *ctx = shash_desc_ctx(desc);
>> +       struct stm32_crc_ctx *mctx = crypto_shash_ctx(desc->tfm);
>> +
>> +       /* Send computed CRC */
>> +       put_unaligned_le32(mctx->poly == POLY_CRC32C ?
>> +                          ~ctx->partial : ctx->partial, out);
>> +
>> +       return 0;
>> +}
>> +
>> +static int stm32_crc_finup(struct shash_desc *desc, const u8 *data,
>> +                          unsigned int length, u8 *out)
>> +{
>> +       return stm32_crc_update(desc, data, length) ?:
>> +              stm32_crc_final(desc, out);
>> +}
>> +
>> +static int stm32_crc_digest(struct shash_desc *desc, const u8 *data,
>> +                           unsigned int length, u8 *out)
>> +{
>> +       return stm32_crc_init(desc) ?: stm32_crc_finup(desc, data, length, out);
>> +}
>> +
>> +static struct shash_alg algs[] = {
>> +       /* CRC-32 */
>> +       {
>> +               .setkey         = stm32_crc_setkey,
>> +               .init           = stm32_crc_init,
>> +               .update         = stm32_crc_update,
>> +               .final          = stm32_crc_final,
>> +               .finup          = stm32_crc_finup,
>> +               .digest         = stm32_crc_digest,
>> +               .descsize       = sizeof(struct stm32_crc_desc_ctx),
>> +               .digestsize     = CHKSUM_DIGEST_SIZE,
>> +               .base           = {
>> +                       .cra_name               = "crc32",
>> +                       .cra_driver_name        = DRIVER_NAME,
>> +                       .cra_priority           = 200,
>> +                       .cra_blocksize          = CHKSUM_BLOCK_SIZE,
>> +                       .cra_alignmask          = 3,
>> +                       .cra_ctxsize            = sizeof(struct stm32_crc_ctx),
>> +                       .cra_module             = THIS_MODULE,
>> +                       .cra_init               = stm32_crc32_cra_init,
>> +               }
>> +       },
>> +       /* CRC-32Castagnoli */
>> +       {
>> +               .setkey         = stm32_crc_setkey,
>> +               .init           = stm32_crc_init,
>> +               .update         = stm32_crc_update,
>> +               .final          = stm32_crc_final,
>> +               .finup          = stm32_crc_finup,
>> +               .digest         = stm32_crc_digest,
>> +               .descsize       = sizeof(struct stm32_crc_desc_ctx),
>> +               .digestsize     = CHKSUM_DIGEST_SIZE,
>> +               .base           = {
>> +                       .cra_name               = "crc32c",
>> +                       .cra_driver_name        = DRIVER_NAME,
>> +                       .cra_priority           = 200,
>> +                       .cra_blocksize          = CHKSUM_BLOCK_SIZE,
>> +                       .cra_alignmask          = 3,
>> +                       .cra_ctxsize            = sizeof(struct stm32_crc_ctx),
>> +                       .cra_module             = THIS_MODULE,
>> +                       .cra_init               = stm32_crc32c_cra_init,
>> +               }
>> +       }
>> +};
> Will be better if the priority is based on benchmark result. ARM arch
> optimised crc32 also defines its priority as 200. Choose a higher
> priority if the hw crypto is either faster or consumes less power.
>
> Regards,
> PrasannaKumar


More information about the linux-arm-kernel mailing list