@@ -171,4 +171,14 @@ config ARMV8_SECURE_BASE
endif
+menuconfig ARMV8_CRYPTO
+ bool "ARM64 Accelerated Cryptographic Algorithms"
+
+if ARMV8_CRYPTO
+
+config ARMV8_CE_SHA1
+ bool "SHA-1 digest algorithm (ARMv8 Crypto Extensions)"
+ default y
+endif
+
endif
@@ -44,3 +44,4 @@ obj-$(CONFIG_TARGET_HIKEY) += hisilicon/
obj-$(CONFIG_ARMV8_PSCI) += psci.o
obj-$(CONFIG_TARGET_BCMNS3) += bcmns3/
obj-$(CONFIG_XEN) += xen/
+obj-$(CONFIG_ARMV8_CE_SHA1) += sha1_ce_glue.o sha1_ce_core.o
new file mode 100644
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * sha1_ce_core.S - SHA-1 secure hash using ARMv8 Crypto Extensions
+ *
+ * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2022 Linaro Ltd <loic.poulain@linaro.org>
+ */
+
+ #include <config.h>
+ #include <linux/linkage.h>
+ #include <asm/system.h>
+ #include <asm/macro.h>
+
+ .text
+ .arch armv8-a+crypto
+
+ k0 .req v0
+ k1 .req v1
+ k2 .req v2
+ k3 .req v3
+
+ t0 .req v4
+ t1 .req v5
+
+ dga .req q6
+ dgav .req v6
+ dgb .req s7
+ dgbv .req v7
+
+ dg0q .req q12
+ dg0s .req s12
+ dg0v .req v12
+ dg1s .req s13
+ dg1v .req v13
+ dg2s .req s14
+
+ .macro add_only, op, ev, rc, s0, dg1
+ .ifc \ev, ev
+ add t1.4s, v\s0\().4s, \rc\().4s
+ sha1h dg2s, dg0s
+ .ifnb \dg1
+ sha1\op dg0q, \dg1, t0.4s
+ .else
+ sha1\op dg0q, dg1s, t0.4s
+ .endif
+ .else
+ .ifnb \s0
+ add t0.4s, v\s0\().4s, \rc\().4s
+ .endif
+ sha1h dg1s, dg0s
+ sha1\op dg0q, dg2s, t1.4s
+ .endif
+ .endm
+
+ .macro add_update, op, ev, rc, s0, s1, s2, s3, dg1
+ sha1su0 v\s0\().4s, v\s1\().4s, v\s2\().4s
+ add_only \op, \ev, \rc, \s1, \dg1
+ sha1su1 v\s0\().4s, v\s3\().4s
+ .endm
+
+ .macro loadrc, k, val, tmp
+ movz \tmp, :abs_g0_nc:\val
+ movk \tmp, :abs_g1:\val
+ dup \k, \tmp
+ .endm
+
+ /*
+ * void sha1_armv8_ce_process(uint32_t state[5], uint8_t const *src,
+ * uint32_t blocks)
+ */
+ENTRY(sha1_armv8_ce_process)
+ /* load round constants */
+ loadrc k0.4s, 0x5a827999, w6
+ loadrc k1.4s, 0x6ed9eba1, w6
+ loadrc k2.4s, 0x8f1bbcdc, w6
+ loadrc k3.4s, 0xca62c1d6, w6
+
+ /* load state (4+1 digest states) */
+ ld1 {dgav.4s}, [x0]
+ ldr dgb, [x0, #16]
+
+ /* load input (64 bytes into v8->v11 16B vectors) */
+0: ld1 {v8.4s-v11.4s}, [x1], #64
+ sub w2, w2, #1
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ rev32 v8.16b, v8.16b
+ rev32 v9.16b, v9.16b
+ rev32 v10.16b, v10.16b
+ rev32 v11.16b, v11.16b
+#endif
+
+1: add t0.4s, v8.4s, k0.4s
+ mov dg0v.16b, dgav.16b
+
+ add_update c, ev, k0, 8, 9, 10, 11, dgb
+ add_update c, od, k0, 9, 10, 11, 8
+ add_update c, ev, k0, 10, 11, 8, 9
+ add_update c, od, k0, 11, 8, 9, 10
+ add_update c, ev, k1, 8, 9, 10, 11
+
+ add_update p, od, k1, 9, 10, 11, 8
+ add_update p, ev, k1, 10, 11, 8, 9
+ add_update p, od, k1, 11, 8, 9, 10
+ add_update p, ev, k1, 8, 9, 10, 11
+ add_update p, od, k2, 9, 10, 11, 8
+
+ add_update m, ev, k2, 10, 11, 8, 9
+ add_update m, od, k2, 11, 8, 9, 10
+ add_update m, ev, k2, 8, 9, 10, 11
+ add_update m, od, k2, 9, 10, 11, 8
+ add_update m, ev, k3, 10, 11, 8, 9
+
+ add_update p, od, k3, 11, 8, 9, 10
+ add_only p, ev, k3, 9
+ add_only p, od, k3, 10
+ add_only p, ev, k3, 11
+ add_only p, od
+
+ /* update state */
+ add dgbv.2s, dgbv.2s, dg1v.2s
+ add dgav.4s, dgav.4s, dg0v.4s
+
+ /* loop on next block? */
+ cbz w2, 2f
+ b 0b
+
+ /* store new state */
+2: st1 {dgav.4s}, [x0]
+ str dgb, [x0, #16]
+ mov w0, w2
+ ret
+ENDPROC(sha1_armv8_ce_process)
new file mode 100644
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * sha1_ce_glue.S - SHA-1 secure hash using ARMv8 Crypto Extensions
+ *
+ * Copyright (C) 2022 Linaro Ltd <loic.poulain@linaro.org>
+ */
+
+#include <common.h>
+#include <u-boot/sha1.h>
+
+extern void sha1_armv8_ce_process(uint32_t state[5], uint8_t const *src,
+ uint32_t blocks);
+
+void sha1_process(sha1_context *ctx, const unsigned char *data,
+ unsigned int blocks)
+{
+ if (!blocks)
+ return;
+
+ sha1_armv8_ce_process(ctx->state, data, blocks);
+}
This patch adds support for the SHA-1 Secure Hash Algorithm for CPUs that have support for the SHA-1 part of the ARM v8 Crypto Extensions. It greatly improves sha-1 based operations, about 10x faster on iMX8M evk board. ~12ms vs ~165ms for a 20MiB kernel sha-1 verification. asm implementation is a simplified version of the Linux version (from Ard Biesheuvel). Signed-off-by: Loic Poulain <loic.poulain@linaro.org> --- arch/arm/cpu/armv8/Kconfig | 10 +++ arch/arm/cpu/armv8/Makefile | 1 + arch/arm/cpu/armv8/sha1_ce_core.S | 132 ++++++++++++++++++++++++++++++++++++++ arch/arm/cpu/armv8/sha1_ce_glue.c | 21 ++++++ 4 files changed, 164 insertions(+) create mode 100644 arch/arm/cpu/armv8/sha1_ce_core.S create mode 100644 arch/arm/cpu/armv8/sha1_ce_glue.c