[PATCH 2/2] arm64/xor: use EOR3 instructions when available
Ard Biesheuvel
ardb at kernel.org
Mon Dec 13 05:33:21 PST 2021
On Mon, 13 Dec 2021 at 14:25, Catalin Marinas <catalin.marinas at arm.com> wrote:
>
> Hi Ard,
>
> I trust you on the algorithm but some minor issues below.
>
> On Tue, Nov 09, 2021 at 01:03:36PM +0100, Ard Biesheuvel wrote:
> > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> > index 6f2d3e31fb54..14354acba5b4 100644
> > --- a/arch/arm64/Kconfig
> > +++ b/arch/arm64/Kconfig
> > @@ -2034,6 +2034,9 @@ config SYSVIPC_COMPAT
> > def_bool y
> > depends on COMPAT && SYSVIPC
> >
> > +config CC_HAVE_SHA3
> > + def_bool $(cc-option, -march=armv8.2-a+sha3)
>
> Is it the compiler or the assembler that we need to support this? I
> think it's sufficient to only check the latter.
>
> I'd also move it to the ARMv8.2 section.
>
> > +
> > menu "Power management options"
> >
> > source "kernel/power/Kconfig"
> > diff --git a/arch/arm64/lib/xor-neon.c b/arch/arm64/lib/xor-neon.c
> > index ee4795f3e166..0415cb94c781 100644
> > --- a/arch/arm64/lib/xor-neon.c
> > +++ b/arch/arm64/lib/xor-neon.c
> > @@ -172,6 +172,135 @@ void xor_arm64_neon_5(unsigned long bytes, unsigned long *p1,
> > }
> > EXPORT_SYMBOL(xor_arm64_neon_5);
> >
> > +static inline uint64x2_t eor3(uint64x2_t p, uint64x2_t q, uint64x2_t r)
> > +{
> > + uint64x2_t res;
> > +
> > + asm(".arch armv8.2-a+sha3 \n"
> > + "eor3 %0.16b, %1.16b, %2.16b, %3.16b"
> > + : "=w"(res) : "w"(p), "w"(q), "w"(r));
> > + return res;
> > +}
>
> The .arch here may confuse the compiler/assembler since it overrides any
> other .arch. I think this diff on top would do but I haven't extensively
> tested it. I can fold it in if you give it a try:
>
I was going to respin this without the static_call changes, since
those are not going to land anytime soon, and for this code, it
doesn't really matter anyway. I'll fold in your diff and test it as
well.
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 5adae54c98d8..c5104e8829e5 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -1545,6 +1545,12 @@ endmenu
>
> menu "ARMv8.2 architectural features"
>
> +config AS_HAS_ARMV8_2
> + def_bool $(cc-option,-Wa$(comma)-march=armv8.2-a)
> +
> +config AS_HAS_SHA3
> + def_bool $(as-instr,.arch armv8.2-a+sha3)
> +
> config ARM64_PMEM
> bool "Enable support for persistent memory"
> select ARCH_HAS_PMEM_API
> @@ -2032,9 +2038,6 @@ config SYSVIPC_COMPAT
> def_bool y
> depends on COMPAT && SYSVIPC
>
> -config CC_HAVE_SHA3
> - def_bool $(cc-option, -march=armv8.2-a+sha3)
> -
> menu "Power management options"
>
> source "kernel/power/Kconfig"
> diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
> index e8cfc5868aa8..2f1de88651e6 100644
> --- a/arch/arm64/Makefile
> +++ b/arch/arm64/Makefile
> @@ -58,6 +58,11 @@ stack_protector_prepare: prepare0
> include/generated/asm-offsets.h))
> endif
>
> +ifeq ($(CONFIG_AS_HAS_ARMV8_2), y)
> +# make sure to pass the newest target architecture to -march.
> +asm-arch := armv8.2-a
> +endif
> +
> # Ensure that if the compiler supports branch protection we default it
> # off, this will be overridden if we are using branch protection.
> branch-prot-flags-y += $(call cc-option,-mbranch-protection=none)
> diff --git a/arch/arm64/lib/xor-neon.c b/arch/arm64/lib/xor-neon.c
> index 0415cb94c781..2ca823825363 100644
> --- a/arch/arm64/lib/xor-neon.c
> +++ b/arch/arm64/lib/xor-neon.c
> @@ -176,7 +176,7 @@ static inline uint64x2_t eor3(uint64x2_t p, uint64x2_t q, uint64x2_t r)
> {
> uint64x2_t res;
>
> - asm(".arch armv8.2-a+sha3 \n"
> + asm(ARM64_ASM_PREAMBLE ".arch_extension sha3\n"
> "eor3 %0.16b, %1.16b, %2.16b, %3.16b"
> : "=w"(res) : "w"(p), "w"(q), "w"(r));
> return res;
> @@ -311,7 +311,7 @@ EXPORT_STATIC_CALL(xor_arm64_5);
>
> static int __init xor_neon_init(void)
> {
> - if (IS_ENABLED(CONFIG_CC_HAVE_SHA3) && cpu_have_named_feature(SHA3)) {
> + if (IS_ENABLED(CONFIG_AS_HAS_SHA3) && cpu_have_named_feature(SHA3)) {
> static_call_update(xor_arm64_3, xor_arm64_eor3_3);
> static_call_update(xor_arm64_4, xor_arm64_eor3_4);
> static_call_update(xor_arm64_5, xor_arm64_eor3_5);
>
> --
> Catalin
More information about the linux-arm-kernel
mailing list