[PATCH v2 02/11] crypto: sha256: PBL SHA-256 fast path via pbl/sha256.c
Johannes Schneider
johannes.schneider at leica-geosystems.com
Sat Jul 4 05:26:19 PDT 2026
Hashing MB-scale blobs in the PBL -- e.g. the fw-external SHA-256 verify on
i.MX8M, ~720 KiB of BL32 -- spends hundreds of ms in the generic-C
sha256_transform() even with the D-cache warm.
Add a PBL-local one-shot
void pbl_sha256(const void *buf, size_t len, u8 out[SHA256_DIGEST_SIZE]);
in a new pbl/sha256.c that picks the best transform available: the generic
digest API by default, or the ARMv8 Crypto Extensions asm core
(arch/arm/crypto/sha2-ce-core.S) under CONFIG_PBL_DIGEST_SHA256_ARM64_CE,
driven through the sha256_base_* helpers (the same batched block/finalize
framing sha2-ce-glue.c uses, without the crypto-API and kernel_neon_begin
shims the PBL has no use for). pbl_barebox_verify() calls it.
The ~720 KiB verify drops from ~300 ms (generic-C) to 3-5 ms (batched
crypto-ext) with a warm D-cache.
Signed-off-by: Johannes Schneider <johannes.schneider at leica-geosystems.com>
Assisted-by: Claude Opus 4.8 (1M context) <noreply at anthropic.com>
---
Notes:
v2:
- Move the PBL crypto-ext dispatch out of crypto/sha2.c into a new
pbl/sha256.c with a one-shot pbl_sha256(); crypto/sha2.c is left
untouched (Sascha).
- Build pbl/sha256.o unconditionally (pbl-y), like crypto/sha2.o, so the
ARMv8 CE core always finds its sha256_ce_offsetof_* glue; it no longer
hangs off CONFIG_HAVE_IMAGE_COMPRESSION.
- Gate the crypto-ext path on ID_AA64ISAR0_EL1.SHA2 and fall back to
generic C, matching sha2-ce-glue.c; the CE instructions are optional on
ARMv8 and trap as undefined otherwise (Copilot).
arch/arm/crypto/Makefile | 3 ++
crypto/Kconfig | 12 ++++++
include/crypto/pbl-sha.h | 4 ++
pbl/Makefile | 1 +
pbl/decomp.c | 6 +--
pbl/sha256.c | 85 ++++++++++++++++++++++++++++++++++++++++
6 files changed, 106 insertions(+), 5 deletions(-)
create mode 100644 pbl/sha256.c
diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
index 55b3ac0538..085c01db09 100644
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile
@@ -15,6 +15,9 @@ sha1-ce-y := sha1-ce-glue.o sha1-ce-core.o
obj-$(CONFIG_DIGEST_SHA256_ARM64_CE) += sha2-ce.o
sha2-ce-y := sha2-ce-glue.o sha2-ce-core.o
+# Reuse the asm core; the PBL glue lives in pbl/sha256.c.
+pbl-$(CONFIG_PBL_DIGEST_SHA256_ARM64_CE) += sha2-ce-core.o
+
quiet_cmd_perl = PERL $@
cmd_perl = $(PERL) $(<) > $(@)
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 528e9a0d22..3dfb316b32 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -107,6 +107,18 @@ config DIGEST_SHA256_ARM64_CE
Architecture: arm64 using:
- ARMv8 Crypto Extensions
+config PBL_DIGEST_SHA256_ARM64_CE
+ bool "SHA-256 in PBL via ARMv8 Crypto Extensions"
+ depends on CPU_V8 && PBL_IMAGE
+ help
+ Use ARMv8 Crypto Extensions (sha256h/sha256h2/sha256su0/sha256su1)
+ for the SHA-256 transform inside the PBL. Roughly 100x faster than
+ the generic-C transform; for callers that hash large blobs (e.g.
+ fw-external SHA-256 verifies) this is the difference between tens
+ of ms and hundreds. Requires Cortex-A53 or later with the optional
+ Crypto Extensions feature.
+
+
endif
config CRYPTO_PBKDF2
diff --git a/include/crypto/pbl-sha.h b/include/crypto/pbl-sha.h
index 7d323ab479..2508448ab4 100644
--- a/include/crypto/pbl-sha.h
+++ b/include/crypto/pbl-sha.h
@@ -3,6 +3,7 @@
#define __PBL_SHA_H_
+#include <crypto/sha.h>
#include <digest.h>
#include <types.h>
@@ -10,4 +11,7 @@ int sha256_init(struct digest *desc);
int sha256_update(struct digest *desc, const void *data, unsigned long len);
int sha256_final(struct digest *desc, u8 *out);
+/* One-shot SHA-256 that picks the best transform available in the PBL. */
+void pbl_sha256(const void *buf, size_t len, u8 out[SHA256_DIGEST_SIZE]);
+
#endif /* __PBL-SHA_H_ */
diff --git a/pbl/Makefile b/pbl/Makefile
index 45cfbf5fba..4506f192fe 100644
--- a/pbl/Makefile
+++ b/pbl/Makefile
@@ -6,6 +6,7 @@
pbl-y += misc.o
pbl-y += string.o
pbl-y += malloc.o
+pbl-y += sha256.o
pbl-$(CONFIG_HAVE_IMAGE_COMPRESSION) += decomp.o
pbl-$(CONFIG_LIBFDT) += fdt.o
pbl-$(CONFIG_PBL_CONSOLE) += console.o
diff --git a/pbl/decomp.c b/pbl/decomp.c
index 1539a6b67e..2b3c35012f 100644
--- a/pbl/decomp.c
+++ b/pbl/decomp.c
@@ -58,8 +58,6 @@ extern unsigned char sha_sum_end[];
int pbl_barebox_verify(const void *compressed_start, unsigned int len,
const void *hash, unsigned int hash_len)
{
- struct sha256_state sha_state = { 0 };
- struct digest d = { .ctx = &sha_state };
char computed_hash[SHA256_DIGEST_SIZE];
int i;
const char *char_hash = hash;
@@ -67,9 +65,7 @@ int pbl_barebox_verify(const void *compressed_start, unsigned int len,
if (hash_len != SHA256_DIGEST_SIZE)
return -1;
- sha256_init(&d);
- sha256_update(&d, compressed_start, len);
- sha256_final(&d, computed_hash);
+ pbl_sha256(compressed_start, len, computed_hash);
if (IS_ENABLED(CONFIG_DEBUG_LL)) {
puts_ll("CH ");
diff --git a/pbl/sha256.c b/pbl/sha256.c
new file mode 100644
index 0000000000..22af1d6907
--- /dev/null
+++ b/pbl/sha256.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * pbl_sha256() - one-shot SHA-256 for the PBL, picking the best available
+ * transform (ARMv8 Crypto Extensions if present, else generic C).
+ */
+
+#include <common.h>
+#include <crypto/sha.h>
+#include <crypto/pbl-sha.h>
+#include <digest.h>
+
+static void pbl_sha256_generic(const void *buf, size_t len, u8 *out)
+{
+ struct sha256_state state = { };
+ struct digest d = { .ctx = &state, .length = SHA256_DIGEST_SIZE };
+
+ sha256_init(&d);
+ sha256_update(&d, buf, len);
+ sha256_final(&d, out);
+}
+
+#ifdef CONFIG_PBL_DIGEST_SHA256_ARM64_CE
+
+#include <crypto/sha256_base.h>
+#include <linux/linkage.h>
+#include <linux/kernel.h>
+#include <asm/sysreg.h>
+
+/* Layout sha2-ce-core.S expects; it reads count/finalize at the offsets below. */
+struct pbl_sha256_ce_state {
+ struct sha256_state sst;
+ u32 finalize;
+};
+
+const u32 sha256_ce_offsetof_count = offsetof(struct pbl_sha256_ce_state, sst.count);
+const u32 sha256_ce_offsetof_finalize = offsetof(struct pbl_sha256_ce_state, finalize);
+
+asmlinkage int sha2_ce_transform(struct pbl_sha256_ce_state *sst,
+ const u8 *src, int blocks);
+
+static void pbl_sha2_ce_block(struct sha256_state *sst, const u8 *src,
+ int blocks)
+{
+ struct pbl_sha256_ce_state *s =
+ container_of(sst, struct pbl_sha256_ce_state, sst);
+
+ /* finalize == 0: C does the padding via sha256_base_do_finalize(). */
+ s->finalize = 0;
+ while (blocks) {
+ int rem = sha2_ce_transform(s, src, blocks);
+
+ src += (blocks - rem) * SHA256_BLOCK_SIZE;
+ blocks = rem;
+ }
+}
+
+void pbl_sha256(const void *buf, size_t len, u8 out[SHA256_DIGEST_SIZE])
+{
+ struct pbl_sha256_ce_state s;
+ struct digest d = { .ctx = &s, .length = SHA256_DIGEST_SIZE };
+
+ /*
+ * The Crypto Extensions are optional on ARMv8 and sha256h & co. trap
+ * as undefined without them, so gate on ID_AA64ISAR0_EL1.SHA2 like
+ * sha2-ce-glue.c does at registration and fall back to generic C.
+ */
+ if (!(read_sysreg(ID_AA64ISAR0_EL1) & ID_AA64ISAR0_EL1_SHA2_MASK)) {
+ pbl_sha256_generic(buf, len, out);
+ return;
+ }
+
+ sha256_base_init(&d);
+ sha256_base_do_update(&d, buf, len, pbl_sha2_ce_block);
+ sha256_base_do_finalize(&d, pbl_sha2_ce_block);
+ sha256_base_finish(&d, out);
+}
+
+#else /* generic C transform via crypto/sha2.c */
+
+void pbl_sha256(const void *buf, size_t len, u8 out[SHA256_DIGEST_SIZE])
+{
+ pbl_sha256_generic(buf, len, out);
+}
+
+#endif
--
2.43.0
More information about the barebox
mailing list