@@ -10,6 +10,7 @@
#include <crypto/internal/poly1305.h>
#include <crypto/internal/simd.h>
#include <linux/crypto.h>
+#include <linux/jump_label.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <asm/simd.h>
@@ -21,7 +22,8 @@ asmlinkage void poly1305_2block_sse2(u32 *h, const u8 *src, const u32 *r,
asmlinkage void poly1305_4block_avx2(u32 *h, const u8 *src, const u32 *r,
unsigned int blocks, const u32 *u);
-static bool poly1305_use_avx2 __ro_after_init;
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_simd);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx2);
static void poly1305_simd_mult(u32 *a, const u32 *b)
{
@@ -64,7 +66,7 @@ static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx,
}
if (IS_ENABLED(CONFIG_AS_AVX2) &&
- poly1305_use_avx2 &&
+ static_branch_likely(&poly1305_use_avx2) &&
srclen >= POLY1305_BLOCK_SIZE * 4) {
if (unlikely(dctx->rset < 4)) {
if (dctx->rset < 2) {
@@ -103,10 +105,9 @@ static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx,
return srclen;
}
-static int poly1305_simd_update(struct shash_desc *desc,
- const u8 *src, unsigned int srclen)
+void poly1305_update(struct poly1305_desc_ctx *dctx, const u8 *src,
+ unsigned int srclen)
{
- struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
bool do_simd = (srclen > 288) && crypto_simd_usable();
unsigned int bytes;
@@ -118,7 +119,8 @@ static int poly1305_simd_update(struct shash_desc *desc,
dctx->buflen += bytes;
if (dctx->buflen == POLY1305_BLOCK_SIZE) {
- if (likely(do_simd)) {
+ if (static_branch_likely(&poly1305_use_simd) &&
+ likely(do_simd)) {
kernel_fpu_begin();
poly1305_simd_blocks(dctx, dctx->buf,
POLY1305_BLOCK_SIZE);
@@ -132,7 +134,8 @@ static int poly1305_simd_update(struct shash_desc *desc,
}
if (likely(srclen >= POLY1305_BLOCK_SIZE)) {
- if (likely(do_simd)) {
+ if (static_branch_likely(&poly1305_use_simd) &&
+ likely(do_simd)) {
kernel_fpu_begin();
bytes = poly1305_simd_blocks(dctx, src, srclen);
kernel_fpu_end();
@@ -148,6 +151,7 @@ static int poly1305_simd_update(struct shash_desc *desc,
memcpy(dctx->buf, src, srclen);
}
}
+EXPORT_SYMBOL(poly1305_update);
static int crypto_poly1305_init(struct shash_desc *desc)
{
@@ -172,6 +176,27 @@ static int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
return 0;
}
+static int poly1305_simd_update(struct shash_desc *desc,
+ const u8 *src, unsigned int srclen)
+{
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ poly1305_update(dctx, src, srclen);
+ return 0;
+}
+
+void poly1305_init(struct poly1305_desc_ctx *desc, const u8 *key)
+{
+ poly1305_init_generic(desc, key);
+}
+EXPORT_SYMBOL(poly1305_init);
+
+void poly1305_final(struct poly1305_desc_ctx *desc, u8 *digest)
+{
+ poly1305_final_generic(desc, digest);
+}
+EXPORT_SYMBOL(poly1305_final);
+
static struct shash_alg alg = {
.digestsize = POLY1305_DIGEST_SIZE,
.init = crypto_poly1305_init,
@@ -190,15 +215,15 @@ static struct shash_alg alg = {
static int __init poly1305_simd_mod_init(void)
{
if (!boot_cpu_has(X86_FEATURE_XMM2))
- return -ENODEV;
-
- poly1305_use_avx2 = IS_ENABLED(CONFIG_AS_AVX2) &&
- boot_cpu_has(X86_FEATURE_AVX) &&
- boot_cpu_has(X86_FEATURE_AVX2) &&
- cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
- alg.descsize = sizeof(struct poly1305_desc_ctx) + 5 * sizeof(u32);
- if (poly1305_use_avx2)
- alg.descsize += 10 * sizeof(u32);
+ return 0;
+
+ static_branch_enable(&poly1305_use_simd);
+
+ if (IS_ENABLED(CONFIG_AS_AVX2) &&
+ boot_cpu_has(X86_FEATURE_AVX) &&
+ boot_cpu_has(X86_FEATURE_AVX2) &&
+ cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL))
+ static_branch_enable(&poly1305_use_avx2);
return crypto_register_shash(&alg);
}
@@ -687,6 +687,7 @@ config CRYPTO_ARCH_HAVE_LIB_POLY1305
config CRYPTO_LIB_POLY1305_RSIZE
int
+ default 4 if X86_64
default 1
config CRYPTO_LIB_POLY1305
@@ -711,6 +712,7 @@ config CRYPTO_POLY1305_X86_64
tristate "Poly1305 authenticator algorithm (x86_64/SSE2/AVX2)"
depends on X86 && 64BIT
select CRYPTO_LIB_POLY1305_GENERIC
+ select CRYPTO_ARCH_HAVE_LIB_POLY1305
help
Poly1305 authenticator algorithm, RFC7539.
Implement the init/update/final Poly1305 library routines in the accelerated SIMD driver for x86 so they are accessible to users of the Poly1305 library interface as well. Given that calls into the library API will always go through the routines in this module if it is enabled, switch to static keys to select the optimal implementation available (which may be none at all, in which case we defer to the generic implementation for all invocations). Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> --- arch/x86/crypto/poly1305_glue.c | 57 ++++++++++++++------ crypto/Kconfig | 2 + 2 files changed, 43 insertions(+), 16 deletions(-) -- 2.20.1