@@ -128,6 +128,7 @@ config CRYPTO_CRC32_ARM_CE
config CRYPTO_CHACHA20_NEON
tristate "NEON and scalar accelerated ChaCha stream cipher algorithms"
select CRYPTO_BLKCIPHER
+ select CRYPTO_ARCH_HAVE_LIB_CHACHA
config CRYPTO_NHPOLY1305_NEON
tristate "NEON accelerated NHPoly1305 hash function (for Adiantum)"
@@ -11,6 +11,7 @@
#include <crypto/internal/chacha.h>
#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
+#include <linux/jump_label.h>
#include <linux/kernel.h>
#include <linux/module.h>
@@ -29,6 +30,8 @@ asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes,
const u32 *state, int nrounds);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_neon);
+
static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
unsigned int bytes, int nrounds)
{
@@ -55,6 +58,42 @@ static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
}
}
+void hchacha_block(const u32 *state, u32 *stream, int nrounds)
+{
+ if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) ||
+ !static_branch_likely(&use_neon) || !crypto_simd_usable()) {
+ hchacha_block_arm(state, stream, nrounds);
+ } else {
+ kernel_neon_begin();
+ hchacha_block_neon(state, stream, nrounds);
+ kernel_neon_end();
+ }
+}
+EXPORT_SYMBOL(hchacha_block);
+
+void chacha_init(u32 *state, const u32 *key, const u8 *iv)
+{
+ chacha_init_generic(state, key, iv);
+}
+EXPORT_SYMBOL(chacha_init);
+
+void chacha_crypt(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
+ int nrounds)
+{
+ if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) ||
+ !static_branch_likely(&use_neon) ||
+ bytes <= CHACHA_BLOCK_SIZE || !crypto_simd_usable()) {
+ chacha_doarm(dst, src, bytes, state, nrounds);
+ state[12] += DIV_ROUND_UP(bytes, CHACHA_BLOCK_SIZE);
+ return;
+ }
+
+ kernel_neon_begin();
+ chacha_doneon(state, dst, src, bytes, nrounds);
+ kernel_neon_end();
+}
+EXPORT_SYMBOL(chacha_crypt);
+
static int chacha_stream_xor(struct skcipher_request *req,
const struct chacha_ctx *ctx, const u8 *iv)
{
@@ -108,7 +147,7 @@ static int chacha_neon_stream_xor(struct skcipher_request *req,
if (nbytes < walk.total)
nbytes = round_down(nbytes, walk.stride);
- if (!do_neon) {
+ if (!static_branch_likely(&use_neon) || !do_neon) {
chacha_doarm(walk.dst.virt.addr, walk.src.virt.addr,
nbytes, state, ctx->nrounds);
state[12] += DIV_ROUND_UP(nbytes, CHACHA_BLOCK_SIZE);
@@ -160,7 +199,7 @@ static int xchacha_neon(struct skcipher_request *req)
chacha_init_generic(state, ctx->key, req->iv);
- if (!crypto_simd_usable()) {
+ if (!static_branch_likely(&use_neon) || !crypto_simd_usable()) {
hchacha_block_arm(state, subctx.key, ctx->nrounds);
} else {
kernel_neon_begin();
@@ -309,6 +348,8 @@ static int __init chacha_simd_mod_init(void)
for (i = 0; i < ARRAY_SIZE(neon_algs); i++)
neon_algs[i].base.cra_priority = 0;
break;
+ default:
+ static_branch_enable(&use_neon);
}
err = crypto_register_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
Expose the accelerated NEON ChaCha routine directly as a symbol export so that users of the ChaCha library API can use it directly. Given that calls into the library API will always go through the routines in this module if it is enabled, switch to static keys to select the optimal implementation available (which may be none at all, in which case we defer to the generic implementation for all invocations). Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> --- arch/arm/crypto/Kconfig | 1 + arch/arm/crypto/chacha-glue.c | 45 +++++++++++++++++++- 2 files changed, 44 insertions(+), 2 deletions(-) -- 2.20.1