[PATCH] crypto: arm64/crc32 - merge CRC32 and PMULL instruction based drivers

Matthias Brugger mbrugger at suse.com
Thu Feb 2 08:47:12 PST 2017



On 02/01/2017 04:35 PM, Ard Biesheuvel wrote:
> The PMULL based CRC32 implementation already contains code based on the
> separate, optional CRC32 instructions to fallback to when operating on
> small quantities of data. We can expose these routines directly on systems
> that lack the 64x64 PMULL instructions but do implement the CRC32 ones,
> which makes the driver that is based solely on those CRC32 instructions
> redundant. So remove it.
>
> Note that this aligns arm64 with ARM, whose accelerated CRC32 driver
> also combines the CRC32 extension based and the PMULL based versions.
>
> Signed-off-by: Ard Biesheuvel <ard.biesheuvel at linaro.org>
> ---
>
> This is a meaningful patch by itself imho, but also fixes the issue reported
> by Matthias where their v4.8 based GCC does not understand the -mcpu=generic+crc
> command line option, resulting in failed builds.
>

This patch fixes the issue, thanks.
Tested-by: Matthias Brugger <mbrugger at suse.com>

>  arch/arm64/configs/defconfig      |   1 -
>  arch/arm64/crypto/Kconfig         |   9 +-
>  arch/arm64/crypto/Makefile        |   4 -
>  arch/arm64/crypto/crc32-arm64.c   | 290 --------------------
>  arch/arm64/crypto/crc32-ce-glue.c |  49 +++-
>  5 files changed, 41 insertions(+), 312 deletions(-)
>
> diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
> index 33b744d54739..6fc6f5a2a6e5 100644
> --- a/arch/arm64/configs/defconfig
> +++ b/arch/arm64/configs/defconfig
> @@ -516,4 +516,3 @@ CONFIG_CRYPTO_GHASH_ARM64_CE=y
>  CONFIG_CRYPTO_AES_ARM64_CE_CCM=y
>  CONFIG_CRYPTO_AES_ARM64_CE_BLK=y
>  # CONFIG_CRYPTO_AES_ARM64_NEON_BLK is not set
> -CONFIG_CRYPTO_CRC32_ARM64=y
> diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
> index bed7feddfeed..d92293747d63 100644
> --- a/arch/arm64/crypto/Kconfig
> +++ b/arch/arm64/crypto/Kconfig
> @@ -37,8 +37,8 @@ config CRYPTO_CRCT10DIF_ARM64_CE
>  	select CRYPTO_HASH
>
>  config CRYPTO_CRC32_ARM64_CE
> -	tristate "CRC32 and CRC32C digest algorithms using PMULL instructions"
> -	depends on KERNEL_MODE_NEON && CRC32
> +	tristate "CRC32 and CRC32C digest algorithms using ARMv8 extensions"
> +	depends on CRC32
>  	select CRYPTO_HASH
>
>  config CRYPTO_AES_ARM64
> @@ -71,11 +71,6 @@ config CRYPTO_AES_ARM64_NEON_BLK
>  	select CRYPTO_AES
>  	select CRYPTO_SIMD
>
> -config CRYPTO_CRC32_ARM64
> -	tristate "CRC32 and CRC32C using optional ARMv8 instructions"
> -	depends on ARM64
> -	select CRYPTO_HASH
> -
>  config CRYPTO_CHACHA20_NEON
>  	tristate "NEON accelerated ChaCha20 symmetric cipher"
>  	depends on KERNEL_MODE_NEON
> diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
> index d1ae1b9cbe70..b5edc5918c28 100644
> --- a/arch/arm64/crypto/Makefile
> +++ b/arch/arm64/crypto/Makefile
> @@ -55,10 +55,6 @@ AFLAGS_aes-neon.o	:= -DINTERLEAVE=4
>
>  CFLAGS_aes-glue-ce.o	:= -DUSE_V8_CRYPTO_EXTENSIONS
>
> -obj-$(CONFIG_CRYPTO_CRC32_ARM64) += crc32-arm64.o
> -
> -CFLAGS_crc32-arm64.o	:= -mcpu=generic+crc
> -
>  $(obj)/aes-glue-%.o: $(src)/aes-glue.c FORCE
>  	$(call if_changed_rule,cc_o_c)
>
> diff --git a/arch/arm64/crypto/crc32-arm64.c b/arch/arm64/crypto/crc32-arm64.c
> deleted file mode 100644
> index 6a37c3c6b11d..000000000000
> --- a/arch/arm64/crypto/crc32-arm64.c
> +++ /dev/null
> @@ -1,290 +0,0 @@
> -/*
> - * crc32-arm64.c - CRC32 and CRC32C using optional ARMv8 instructions
> - *
> - * Module based on crypto/crc32c_generic.c
> - *
> - * CRC32 loop taken from Ed Nevill's Hadoop CRC patch
> - * http://mail-archives.apache.org/mod_mbox/hadoop-common-dev/201406.mbox/%3C1403687030.3355.19.camel%40localhost.localdomain%3E
> - *
> - * Using inline assembly instead of intrinsics in order to be backwards
> - * compatible with older compilers.
> - *
> - * Copyright (C) 2014 Linaro Ltd <yazen.ghannam at linaro.org>
> - *
> - * This program is free software; you can redistribute it and/or modify
> - * it under the terms of the GNU General Public License version 2 as
> - * published by the Free Software Foundation.
> - */
> -
> -#include <linux/unaligned/access_ok.h>
> -#include <linux/cpufeature.h>
> -#include <linux/init.h>
> -#include <linux/kernel.h>
> -#include <linux/module.h>
> -#include <linux/string.h>
> -
> -#include <crypto/internal/hash.h>
> -
> -MODULE_AUTHOR("Yazen Ghannam <yazen.ghannam at linaro.org>");
> -MODULE_DESCRIPTION("CRC32 and CRC32C using optional ARMv8 instructions");
> -MODULE_LICENSE("GPL v2");
> -
> -#define CRC32X(crc, value) __asm__("crc32x %w[c], %w[c], %x[v]":[c]"+r"(crc):[v]"r"(value))
> -#define CRC32W(crc, value) __asm__("crc32w %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
> -#define CRC32H(crc, value) __asm__("crc32h %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
> -#define CRC32B(crc, value) __asm__("crc32b %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
> -#define CRC32CX(crc, value) __asm__("crc32cx %w[c], %w[c], %x[v]":[c]"+r"(crc):[v]"r"(value))
> -#define CRC32CW(crc, value) __asm__("crc32cw %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
> -#define CRC32CH(crc, value) __asm__("crc32ch %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
> -#define CRC32CB(crc, value) __asm__("crc32cb %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
> -
> -static u32 crc32_arm64_le_hw(u32 crc, const u8 *p, unsigned int len)
> -{
> -	s64 length = len;
> -
> -	while ((length -= sizeof(u64)) >= 0) {
> -		CRC32X(crc, get_unaligned_le64(p));
> -		p += sizeof(u64);
> -	}
> -
> -	/* The following is more efficient than the straight loop */
> -	if (length & sizeof(u32)) {
> -		CRC32W(crc, get_unaligned_le32(p));
> -		p += sizeof(u32);
> -	}
> -	if (length & sizeof(u16)) {
> -		CRC32H(crc, get_unaligned_le16(p));
> -		p += sizeof(u16);
> -	}
> -	if (length & sizeof(u8))
> -		CRC32B(crc, *p);
> -
> -	return crc;
> -}
> -
> -static u32 crc32c_arm64_le_hw(u32 crc, const u8 *p, unsigned int len)
> -{
> -	s64 length = len;
> -
> -	while ((length -= sizeof(u64)) >= 0) {
> -		CRC32CX(crc, get_unaligned_le64(p));
> -		p += sizeof(u64);
> -	}
> -
> -	/* The following is more efficient than the straight loop */
> -	if (length & sizeof(u32)) {
> -		CRC32CW(crc, get_unaligned_le32(p));
> -		p += sizeof(u32);
> -	}
> -	if (length & sizeof(u16)) {
> -		CRC32CH(crc, get_unaligned_le16(p));
> -		p += sizeof(u16);
> -	}
> -	if (length & sizeof(u8))
> -		CRC32CB(crc, *p);
> -
> -	return crc;
> -}
> -
> -#define CHKSUM_BLOCK_SIZE	1
> -#define CHKSUM_DIGEST_SIZE	4
> -
> -struct chksum_ctx {
> -	u32 key;
> -};
> -
> -struct chksum_desc_ctx {
> -	u32 crc;
> -};
> -
> -static int chksum_init(struct shash_desc *desc)
> -{
> -	struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
> -	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
> -
> -	ctx->crc = mctx->key;
> -
> -	return 0;
> -}
> -
> -/*
> - * Setting the seed allows arbitrary accumulators and flexible XOR policy
> - * If your algorithm starts with ~0, then XOR with ~0 before you set
> - * the seed.
> - */
> -static int chksum_setkey(struct crypto_shash *tfm, const u8 *key,
> -			 unsigned int keylen)
> -{
> -	struct chksum_ctx *mctx = crypto_shash_ctx(tfm);
> -
> -	if (keylen != sizeof(mctx->key)) {
> -		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
> -		return -EINVAL;
> -	}
> -	mctx->key = get_unaligned_le32(key);
> -	return 0;
> -}
> -
> -static int chksum_update(struct shash_desc *desc, const u8 *data,
> -			 unsigned int length)
> -{
> -	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
> -
> -	ctx->crc = crc32_arm64_le_hw(ctx->crc, data, length);
> -	return 0;
> -}
> -
> -static int chksumc_update(struct shash_desc *desc, const u8 *data,
> -			 unsigned int length)
> -{
> -	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
> -
> -	ctx->crc = crc32c_arm64_le_hw(ctx->crc, data, length);
> -	return 0;
> -}
> -
> -static int chksum_final(struct shash_desc *desc, u8 *out)
> -{
> -	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
> -
> -	put_unaligned_le32(ctx->crc, out);
> -	return 0;
> -}
> -
> -static int chksumc_final(struct shash_desc *desc, u8 *out)
> -{
> -	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
> -
> -	put_unaligned_le32(~ctx->crc, out);
> -	return 0;
> -}
> -
> -static int __chksum_finup(u32 crc, const u8 *data, unsigned int len, u8 *out)
> -{
> -	put_unaligned_le32(crc32_arm64_le_hw(crc, data, len), out);
> -	return 0;
> -}
> -
> -static int __chksumc_finup(u32 crc, const u8 *data, unsigned int len, u8 *out)
> -{
> -	put_unaligned_le32(~crc32c_arm64_le_hw(crc, data, len), out);
> -	return 0;
> -}
> -
> -static int chksum_finup(struct shash_desc *desc, const u8 *data,
> -			unsigned int len, u8 *out)
> -{
> -	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
> -
> -	return __chksum_finup(ctx->crc, data, len, out);
> -}
> -
> -static int chksumc_finup(struct shash_desc *desc, const u8 *data,
> -			unsigned int len, u8 *out)
> -{
> -	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
> -
> -	return __chksumc_finup(ctx->crc, data, len, out);
> -}
> -
> -static int chksum_digest(struct shash_desc *desc, const u8 *data,
> -			 unsigned int length, u8 *out)
> -{
> -	struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
> -
> -	return __chksum_finup(mctx->key, data, length, out);
> -}
> -
> -static int chksumc_digest(struct shash_desc *desc, const u8 *data,
> -			 unsigned int length, u8 *out)
> -{
> -	struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
> -
> -	return __chksumc_finup(mctx->key, data, length, out);
> -}
> -
> -static int crc32_cra_init(struct crypto_tfm *tfm)
> -{
> -	struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
> -
> -	mctx->key = 0;
> -	return 0;
> -}
> -
> -static int crc32c_cra_init(struct crypto_tfm *tfm)
> -{
> -	struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
> -
> -	mctx->key = ~0;
> -	return 0;
> -}
> -
> -static struct shash_alg crc32_alg = {
> -	.digestsize		=	CHKSUM_DIGEST_SIZE,
> -	.setkey			=	chksum_setkey,
> -	.init			=	chksum_init,
> -	.update			=	chksum_update,
> -	.final			=	chksum_final,
> -	.finup			=	chksum_finup,
> -	.digest			=	chksum_digest,
> -	.descsize		=	sizeof(struct chksum_desc_ctx),
> -	.base			=	{
> -		.cra_name		=	"crc32",
> -		.cra_driver_name	=	"crc32-arm64-hw",
> -		.cra_priority		=	300,
> -		.cra_blocksize		=	CHKSUM_BLOCK_SIZE,
> -		.cra_alignmask		=	0,
> -		.cra_ctxsize		=	sizeof(struct chksum_ctx),
> -		.cra_module		=	THIS_MODULE,
> -		.cra_init		=	crc32_cra_init,
> -	}
> -};
> -
> -static struct shash_alg crc32c_alg = {
> -	.digestsize		=	CHKSUM_DIGEST_SIZE,
> -	.setkey			=	chksum_setkey,
> -	.init			=	chksum_init,
> -	.update			=	chksumc_update,
> -	.final			=	chksumc_final,
> -	.finup			=	chksumc_finup,
> -	.digest			=	chksumc_digest,
> -	.descsize		=	sizeof(struct chksum_desc_ctx),
> -	.base			=	{
> -		.cra_name		=	"crc32c",
> -		.cra_driver_name	=	"crc32c-arm64-hw",
> -		.cra_priority		=	300,
> -		.cra_blocksize		=	CHKSUM_BLOCK_SIZE,
> -		.cra_alignmask		=	0,
> -		.cra_ctxsize		=	sizeof(struct chksum_ctx),
> -		.cra_module		=	THIS_MODULE,
> -		.cra_init		=	crc32c_cra_init,
> -	}
> -};
> -
> -static int __init crc32_mod_init(void)
> -{
> -	int err;
> -
> -	err = crypto_register_shash(&crc32_alg);
> -
> -	if (err)
> -		return err;
> -
> -	err = crypto_register_shash(&crc32c_alg);
> -
> -	if (err) {
> -		crypto_unregister_shash(&crc32_alg);
> -		return err;
> -	}
> -
> -	return 0;
> -}
> -
> -static void __exit crc32_mod_exit(void)
> -{
> -	crypto_unregister_shash(&crc32_alg);
> -	crypto_unregister_shash(&crc32c_alg);
> -}
> -
> -module_cpu_feature_match(CRC32, crc32_mod_init);
> -module_exit(crc32_mod_exit);
> diff --git a/arch/arm64/crypto/crc32-ce-glue.c b/arch/arm64/crypto/crc32-ce-glue.c
> index 8594127d5e01..eccb1ae90064 100644
> --- a/arch/arm64/crypto/crc32-ce-glue.c
> +++ b/arch/arm64/crypto/crc32-ce-glue.c
> @@ -72,6 +72,24 @@ static int crc32_pmull_init(struct shash_desc *desc)
>  	return 0;
>  }
>
> +static int crc32_update(struct shash_desc *desc, const u8 *data,
> +			unsigned int length)
> +{
> +	u32 *crc = shash_desc_ctx(desc);
> +
> +	*crc = crc32_armv8_le(*crc, data, length);
> +	return 0;
> +}
> +
> +static int crc32c_update(struct shash_desc *desc, const u8 *data,
> +			 unsigned int length)
> +{
> +	u32 *crc = shash_desc_ctx(desc);
> +
> +	*crc = crc32c_armv8_le(*crc, data, length);
> +	return 0;
> +}
> +
>  static int crc32_pmull_update(struct shash_desc *desc, const u8 *data,
>  			 unsigned int length)
>  {
> @@ -156,7 +174,7 @@ static int crc32c_pmull_final(struct shash_desc *desc, u8 *out)
>  static struct shash_alg crc32_pmull_algs[] = { {
>  	.setkey			= crc32_pmull_setkey,
>  	.init			= crc32_pmull_init,
> -	.update			= crc32_pmull_update,
> +	.update			= crc32_update,
>  	.final			= crc32_pmull_final,
>  	.descsize		= sizeof(u32),
>  	.digestsize		= sizeof(u32),
> @@ -171,7 +189,7 @@ static struct shash_alg crc32_pmull_algs[] = { {
>  }, {
>  	.setkey			= crc32_pmull_setkey,
>  	.init			= crc32_pmull_init,
> -	.update			= crc32c_pmull_update,
> +	.update			= crc32c_update,
>  	.final			= crc32c_pmull_final,
>  	.descsize		= sizeof(u32),
>  	.digestsize		= sizeof(u32),
> @@ -187,14 +205,20 @@ static struct shash_alg crc32_pmull_algs[] = { {
>
>  static int __init crc32_pmull_mod_init(void)
>  {
> -	if (elf_hwcap & HWCAP_CRC32) {
> -		fallback_crc32 = crc32_armv8_le;
> -		fallback_crc32c = crc32c_armv8_le;
> -	} else {
> -		fallback_crc32 = crc32_le;
> -		fallback_crc32c = __crc32c_le;
> +	if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_PMULL)) {
> +		crc32_pmull_algs[0].update = crc32_pmull_update;
> +		crc32_pmull_algs[1].update = crc32c_pmull_update;
> +
> +		if (elf_hwcap & HWCAP_CRC32) {
> +			fallback_crc32 = crc32_armv8_le;
> +			fallback_crc32c = crc32c_armv8_le;
> +		} else {
> +			fallback_crc32 = crc32_le;
> +			fallback_crc32c = __crc32c_le;
> +		}
> +	} else if (!(elf_hwcap & HWCAP_CRC32)) {
> +		return -ENODEV;
>  	}
> -
>  	return crypto_register_shashes(crc32_pmull_algs,
>  				       ARRAY_SIZE(crc32_pmull_algs));
>  }
> @@ -205,7 +229,12 @@ static void __exit crc32_pmull_mod_exit(void)
>  				  ARRAY_SIZE(crc32_pmull_algs));
>  }
>
> -module_cpu_feature_match(PMULL, crc32_pmull_mod_init);
> +static const struct cpu_feature crc32_cpu_feature[] = {
> +	{ cpu_feature(CRC32) }, { cpu_feature(PMULL) }, { }
> +};
> +MODULE_DEVICE_TABLE(cpu, crc32_cpu_feature);
> +
> +module_init(crc32_pmull_mod_init);
>  module_exit(crc32_pmull_mod_exit);
>
>  MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel at linaro.org>");
>



More information about the linux-arm-kernel mailing list