[PATCH v2 02/11] crypto: sha256: PBL SHA-256 fast path via pbl/sha256.c

Johannes Schneider johannes.schneider at leica-geosystems.com
Sat Jul 4 05:26:19 PDT 2026


Hashing MB-scale blobs in the PBL -- e.g. the fw-external SHA-256 verify on
i.MX8M, ~720 KiB of BL32 -- spends hundreds of ms in the generic-C
sha256_transform() even with the D-cache warm.

Add a PBL-local one-shot

    void pbl_sha256(const void *buf, size_t len, u8 out[SHA256_DIGEST_SIZE]);

in a new pbl/sha256.c that picks the best transform available: the generic
digest API by default, or the ARMv8 Crypto Extensions asm core
(arch/arm/crypto/sha2-ce-core.S) under CONFIG_PBL_DIGEST_SHA256_ARM64_CE,
driven through the sha256_base_* helpers (the same batched block/finalize
framing sha2-ce-glue.c uses, without the crypto-API and kernel_neon_begin
shims the PBL has no use for). pbl_barebox_verify() calls it.

The ~720 KiB verify drops from ~300 ms (generic-C) to 3-5 ms (batched
crypto-ext) with a warm D-cache.

Signed-off-by: Johannes Schneider <johannes.schneider at leica-geosystems.com>
Assisted-by: Claude Opus 4.8 (1M context) <noreply at anthropic.com>
---

Notes:
    v2:
    - Move the PBL crypto-ext dispatch out of crypto/sha2.c into a new
      pbl/sha256.c with a one-shot pbl_sha256(); crypto/sha2.c is left
      untouched (Sascha).
    
    - Build pbl/sha256.o unconditionally (pbl-y), like crypto/sha2.o, so the
      ARMv8 CE core always finds its sha256_ce_offsetof_* glue; it no longer
      hangs off CONFIG_HAVE_IMAGE_COMPRESSION.
    
    - Gate the crypto-ext path on ID_AA64ISAR0_EL1.SHA2 and fall back to
      generic C, matching sha2-ce-glue.c; the CE instructions are optional on
      ARMv8 and trap as undefined otherwise (Copilot).

 arch/arm/crypto/Makefile |  3 ++
 crypto/Kconfig           | 12 ++++++
 include/crypto/pbl-sha.h |  4 ++
 pbl/Makefile             |  1 +
 pbl/decomp.c             |  6 +--
 pbl/sha256.c             | 85 ++++++++++++++++++++++++++++++++++++++++
 6 files changed, 106 insertions(+), 5 deletions(-)
 create mode 100644 pbl/sha256.c

diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
index 55b3ac0538..085c01db09 100644
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile
@@ -15,6 +15,9 @@ sha1-ce-y := sha1-ce-glue.o sha1-ce-core.o
 obj-$(CONFIG_DIGEST_SHA256_ARM64_CE) += sha2-ce.o
 sha2-ce-y := sha2-ce-glue.o sha2-ce-core.o
 
+# Reuse the asm core; the PBL glue lives in pbl/sha256.c.
+pbl-$(CONFIG_PBL_DIGEST_SHA256_ARM64_CE) += sha2-ce-core.o
+
 quiet_cmd_perl = PERL    $@
       cmd_perl = $(PERL) $(<) > $(@)
 
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 528e9a0d22..3dfb316b32 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -107,6 +107,18 @@ config DIGEST_SHA256_ARM64_CE
 	  Architecture: arm64 using:
 	  - ARMv8 Crypto Extensions
 
+config PBL_DIGEST_SHA256_ARM64_CE
+	bool "SHA-256 in PBL via ARMv8 Crypto Extensions"
+	depends on CPU_V8 && PBL_IMAGE
+	help
+	  Use ARMv8 Crypto Extensions (sha256h/sha256h2/sha256su0/sha256su1)
+	  for the SHA-256 transform inside the PBL. Roughly 100x faster than
+	  the generic-C transform; for callers that hash large blobs (e.g.
+	  fw-external SHA-256 verifies) this is the difference between tens
+	  of ms and hundreds. Requires Cortex-A53 or later with the optional
+	  Crypto Extensions feature.
+
+
 endif
 
 config CRYPTO_PBKDF2
diff --git a/include/crypto/pbl-sha.h b/include/crypto/pbl-sha.h
index 7d323ab479..2508448ab4 100644
--- a/include/crypto/pbl-sha.h
+++ b/include/crypto/pbl-sha.h
@@ -3,6 +3,7 @@
 
 #define __PBL_SHA_H_
 
+#include <crypto/sha.h>
 #include <digest.h>
 #include <types.h>
 
@@ -10,4 +11,7 @@ int sha256_init(struct digest *desc);
 int sha256_update(struct digest *desc, const void *data, unsigned long len);
 int sha256_final(struct digest *desc, u8 *out);
 
+/* One-shot SHA-256 that picks the best transform available in the PBL. */
+void pbl_sha256(const void *buf, size_t len, u8 out[SHA256_DIGEST_SIZE]);
+
 #endif /* __PBL-SHA_H_ */
diff --git a/pbl/Makefile b/pbl/Makefile
index 45cfbf5fba..4506f192fe 100644
--- a/pbl/Makefile
+++ b/pbl/Makefile
@@ -6,6 +6,7 @@
 pbl-y += misc.o
 pbl-y += string.o
 pbl-y += malloc.o
+pbl-y += sha256.o
 pbl-$(CONFIG_HAVE_IMAGE_COMPRESSION) += decomp.o
 pbl-$(CONFIG_LIBFDT) += fdt.o
 pbl-$(CONFIG_PBL_CONSOLE) += console.o
diff --git a/pbl/decomp.c b/pbl/decomp.c
index 1539a6b67e..2b3c35012f 100644
--- a/pbl/decomp.c
+++ b/pbl/decomp.c
@@ -58,8 +58,6 @@ extern unsigned char sha_sum_end[];
 int pbl_barebox_verify(const void *compressed_start, unsigned int len,
 		       const void *hash, unsigned int hash_len)
 {
-	struct sha256_state sha_state = { 0 };
-	struct digest d = { .ctx = &sha_state };
 	char computed_hash[SHA256_DIGEST_SIZE];
 	int i;
 	const char *char_hash = hash;
@@ -67,9 +65,7 @@ int pbl_barebox_verify(const void *compressed_start, unsigned int len,
 	if (hash_len != SHA256_DIGEST_SIZE)
 		return -1;
 
-	sha256_init(&d);
-	sha256_update(&d, compressed_start, len);
-	sha256_final(&d, computed_hash);
+	pbl_sha256(compressed_start, len, computed_hash);
 	if (IS_ENABLED(CONFIG_DEBUG_LL)) {
 		puts_ll("CH ");
 
diff --git a/pbl/sha256.c b/pbl/sha256.c
new file mode 100644
index 0000000000..22af1d6907
--- /dev/null
+++ b/pbl/sha256.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * pbl_sha256() - one-shot SHA-256 for the PBL, picking the best available
+ * transform (ARMv8 Crypto Extensions if present, else generic C).
+ */
+
+#include <common.h>
+#include <crypto/sha.h>
+#include <crypto/pbl-sha.h>
+#include <digest.h>
+
+static void pbl_sha256_generic(const void *buf, size_t len, u8 *out)
+{
+	struct sha256_state state = { };
+	struct digest d = { .ctx = &state, .length = SHA256_DIGEST_SIZE };
+
+	sha256_init(&d);
+	sha256_update(&d, buf, len);
+	sha256_final(&d, out);
+}
+
+#ifdef CONFIG_PBL_DIGEST_SHA256_ARM64_CE
+
+#include <crypto/sha256_base.h>
+#include <linux/linkage.h>
+#include <linux/kernel.h>
+#include <asm/sysreg.h>
+
+/* Layout sha2-ce-core.S expects; it reads count/finalize at the offsets below. */
+struct pbl_sha256_ce_state {
+	struct sha256_state	sst;
+	u32			finalize;
+};
+
+const u32 sha256_ce_offsetof_count    = offsetof(struct pbl_sha256_ce_state, sst.count);
+const u32 sha256_ce_offsetof_finalize = offsetof(struct pbl_sha256_ce_state, finalize);
+
+asmlinkage int sha2_ce_transform(struct pbl_sha256_ce_state *sst,
+				 const u8 *src, int blocks);
+
+static void pbl_sha2_ce_block(struct sha256_state *sst, const u8 *src,
+			      int blocks)
+{
+	struct pbl_sha256_ce_state *s =
+		container_of(sst, struct pbl_sha256_ce_state, sst);
+
+	/* finalize == 0: C does the padding via sha256_base_do_finalize(). */
+	s->finalize = 0;
+	while (blocks) {
+		int rem = sha2_ce_transform(s, src, blocks);
+
+		src += (blocks - rem) * SHA256_BLOCK_SIZE;
+		blocks = rem;
+	}
+}
+
+void pbl_sha256(const void *buf, size_t len, u8 out[SHA256_DIGEST_SIZE])
+{
+	struct pbl_sha256_ce_state s;
+	struct digest d = { .ctx = &s, .length = SHA256_DIGEST_SIZE };
+
+	/*
+	 * The Crypto Extensions are optional on ARMv8 and sha256h & co. trap
+	 * as undefined without them, so gate on ID_AA64ISAR0_EL1.SHA2 like
+	 * sha2-ce-glue.c does at registration and fall back to generic C.
+	 */
+	if (!(read_sysreg(ID_AA64ISAR0_EL1) & ID_AA64ISAR0_EL1_SHA2_MASK)) {
+		pbl_sha256_generic(buf, len, out);
+		return;
+	}
+
+	sha256_base_init(&d);
+	sha256_base_do_update(&d, buf, len, pbl_sha2_ce_block);
+	sha256_base_do_finalize(&d, pbl_sha2_ce_block);
+	sha256_base_finish(&d, out);
+}
+
+#else /* generic C transform via crypto/sha2.c */
+
+void pbl_sha256(const void *buf, size_t len, u8 out[SHA256_DIGEST_SIZE])
+{
+	pbl_sha256_generic(buf, len, out);
+}
+
+#endif
-- 
2.43.0




More information about the barebox mailing list