[resend,08/18] crypto: arm64/aes-ce-cipher - match round key endianness with generic code

Message ID	20170724102820.16534-9-ard.biesheuvel@linaro.org
State	Accepted
Commit	f402e3115e20b345bd6fbfcf463a506d958c7bf6
Headers	show Delivered-To: patch@linaro.org Received-SPF: pass (google.com: best guess record for domain of linux-crypto-owner@vger.kernel.org designates 209.132.180.67 as permitted sender) client-ip=209.132.180.67; From: Ard Biesheuvel <ard.biesheuvel@linaro.org> To: linux-crypto@vger.kernel.org, linux-arm-kernel@lists.infradead.org Cc: herbert@gondor.apana.org.au, dave.martin@arm.com, Ard Biesheuvel <ard.biesheuvel@linaro.org> Subject: [PATCH resend 08/18] crypto: arm64/aes-ce-cipher - match round key endianness with generic code Date: Mon, 24 Jul 2017 11:28:10 +0100 Message-Id: <20170724102820.16534-9-ard.biesheuvel@linaro.org> In-Reply-To: <20170724102820.16534-1-ard.biesheuvel@linaro.org> References: <20170724102820.16534-1-ard.biesheuvel@linaro.org> Sender: linux-crypto-owner@vger.kernel.org Precedence: bulk
Series	crypto: ARM/arm64 roundup for v4.14 \| expand [resend,00/18] crypto: ARM/arm64 roundup for v4.14 [resend,01/18] crypto/algapi - use separate dst and src operands for __crypto_xor() [resend,02/18] crypto/algapi - make crypto_xor() take separate dst and src arguments [resend,03/18] crypto: arm64/ghash-ce - add non-SIMD scalar fallback [resend,04/18] crypto: arm64/crct10dif - add non-SIMD generic fallback [resend,05/18] crypto: arm64/crc32 - add non-SIMD scalar fallback [resend,06/18] crypto: arm64/sha1-ce - add non-SIMD generic fallback [resend,07/18] crypto: arm64/sha2-ce - add non-SIMD scalar fallback [resend,08/18] crypto: arm64/aes-ce-cipher - match round key endianness with generic code [resend,09/18] crypto: arm64/aes-ce-cipher: add non-SIMD generic fallback [resend,10/18] crypto: arm64/aes-ce-ccm: add non-SIMD generic fallback [resend,11/18] crypto: arm64/aes-blk - add a non-SIMD fallback for synchronous CTR [resend,12/18] crypto: arm64/chacha20 - take may_use_simd() into account [resend,13/18] crypto: arm64/aes-bs - implement non-SIMD fallback for AES-CTR [resend,14/18] crypto: arm64/gcm - implement native driver using v8 Crypto Extensions [resend,15/18] crypto: arm/ghash - add NEON accelerated fallback for vmull.p64 [resend,16/18] crypto: arm64/ghash - add NEON accelerated fallback for 64-bit PMULL [resend,17/18] crypto: arm/aes - avoid expanded lookup tables in the final round [resend,18/18] crypto: arm64/aes - avoid expanded lookup tables in the final round

diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S index 3363560c79b7..e3a375c4cb83 100644 --- a/arch/arm64/crypto/aes-ce-ccm-core.S +++ b/arch/arm64/crypto/aes-ce-ccm-core.S @@ -1,7 +1,7 @@ /* * aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions * - * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org> + * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -32,7 +32,7 @@ ENTRY(ce_aes_ccm_auth_data) beq 8f /* out of input? */ cbnz w8, 0b eor v0.16b, v0.16b, v1.16b -1: ld1 {v3.16b}, [x4] /* load first round key */ +1: ld1 {v3.4s}, [x4] /* load first round key */ prfm pldl1strm, [x1] cmp w5, #12 /* which key size? */ add x6, x4, #16 @@ -42,17 +42,17 @@ ENTRY(ce_aes_ccm_auth_data) mov v5.16b, v3.16b b 4f 2: mov v4.16b, v3.16b - ld1 {v5.16b}, [x6], #16 /* load 2nd round key */ + ld1 {v5.4s}, [x6], #16 /* load 2nd round key */ 3: aese v0.16b, v4.16b aesmc v0.16b, v0.16b -4: ld1 {v3.16b}, [x6], #16 /* load next round key */ +4: ld1 {v3.4s}, [x6], #16 /* load next round key */ aese v0.16b, v5.16b aesmc v0.16b, v0.16b -5: ld1 {v4.16b}, [x6], #16 /* load next round key */ +5: ld1 {v4.4s}, [x6], #16 /* load next round key */ subs w7, w7, #3 aese v0.16b, v3.16b aesmc v0.16b, v0.16b - ld1 {v5.16b}, [x6], #16 /* load next round key */ + ld1 {v5.4s}, [x6], #16 /* load next round key */ bpl 3b aese v0.16b, v4.16b subs w2, w2, #16 /* last data? */ @@ -90,7 +90,7 @@ ENDPROC(ce_aes_ccm_auth_data) * u32 rounds); */ ENTRY(ce_aes_ccm_final) - ld1 {v3.16b}, [x2], #16 /* load first round key */ + ld1 {v3.4s}, [x2], #16 /* load first round key */ ld1 {v0.16b}, [x0] /* load mac */ cmp w3, #12 /* which key size? */ sub w3, w3, #2 /* modified # of rounds */ @@ -100,17 +100,17 @@ ENTRY(ce_aes_ccm_final) mov v5.16b, v3.16b b 2f 0: mov v4.16b, v3.16b -1: ld1 {v5.16b}, [x2], #16 /* load next round key */ +1: ld1 {v5.4s}, [x2], #16 /* load next round key */ aese v0.16b, v4.16b aesmc v0.16b, v0.16b aese v1.16b, v4.16b aesmc v1.16b, v1.16b -2: ld1 {v3.16b}, [x2], #16 /* load next round key */ +2: ld1 {v3.4s}, [x2], #16 /* load next round key */ aese v0.16b, v5.16b aesmc v0.16b, v0.16b aese v1.16b, v5.16b aesmc v1.16b, v1.16b -3: ld1 {v4.16b}, [x2], #16 /* load next round key */ +3: ld1 {v4.4s}, [x2], #16 /* load next round key */ subs w3, w3, #3 aese v0.16b, v3.16b aesmc v0.16b, v0.16b @@ -137,31 +137,31 @@ CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */ cmp w4, #12 /* which key size? */ sub w7, w4, #2 /* get modified # of rounds */ ins v1.d[1], x9 /* no carry in lower ctr */ - ld1 {v3.16b}, [x3] /* load first round key */ + ld1 {v3.4s}, [x3] /* load first round key */ add x10, x3, #16 bmi 1f bne 4f mov v5.16b, v3.16b b 3f 1: mov v4.16b, v3.16b - ld1 {v5.16b}, [x10], #16 /* load 2nd round key */ + ld1 {v5.4s}, [x10], #16 /* load 2nd round key */ 2: /* inner loop: 3 rounds, 2x interleaved */ aese v0.16b, v4.16b aesmc v0.16b, v0.16b aese v1.16b, v4.16b aesmc v1.16b, v1.16b -3: ld1 {v3.16b}, [x10], #16 /* load next round key */ +3: ld1 {v3.4s}, [x10], #16 /* load next round key */ aese v0.16b, v5.16b aesmc v0.16b, v0.16b aese v1.16b, v5.16b aesmc v1.16b, v1.16b -4: ld1 {v4.16b}, [x10], #16 /* load next round key */ +4: ld1 {v4.4s}, [x10], #16 /* load next round key */ subs w7, w7, #3 aese v0.16b, v3.16b aesmc v0.16b, v0.16b aese v1.16b, v3.16b aesmc v1.16b, v1.16b - ld1 {v5.16b}, [x10], #16 /* load next round key */ + ld1 {v5.4s}, [x10], #16 /* load next round key */ bpl 2b aese v0.16b, v4.16b aese v1.16b, v4.16b diff --git a/arch/arm64/crypto/aes-ce-cipher.c b/arch/arm64/crypto/aes-ce-cipher.c index 50d9fe11d0c8..a0a0e5e3a8b5 100644 --- a/arch/arm64/crypto/aes-ce-cipher.c +++ b/arch/arm64/crypto/aes-ce-cipher.c @@ -1,7 +1,7 @@ /* * aes-ce-cipher.c - core AES cipher using ARMv8 Crypto Extensions * - * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org> + * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -9,6 +9,7 @@ */ #include <asm/neon.h> +#include <asm/unaligned.h> #include <crypto/aes.h> #include <linux/cpufeature.h> #include <linux/crypto.h> @@ -47,24 +48,24 @@ static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[]) kernel_neon_begin_partial(4); __asm__(" ld1 {v0.16b}, %[in] ;" - " ld1 {v1.16b}, [%[key]], #16 ;" + " ld1 {v1.4s}, [%[key]], #16 ;" " cmp %w[rounds], #10 ;" " bmi 0f ;" " bne 3f ;" " mov v3.16b, v1.16b ;" " b 2f ;" "0: mov v2.16b, v1.16b ;" - " ld1 {v3.16b}, [%[key]], #16 ;" + " ld1 {v3.4s}, [%[key]], #16 ;" "1: aese v0.16b, v2.16b ;" " aesmc v0.16b, v0.16b ;" - "2: ld1 {v1.16b}, [%[key]], #16 ;" + "2: ld1 {v1.4s}, [%[key]], #16 ;" " aese v0.16b, v3.16b ;" " aesmc v0.16b, v0.16b ;" - "3: ld1 {v2.16b}, [%[key]], #16 ;" + "3: ld1 {v2.4s}, [%[key]], #16 ;" " subs %w[rounds], %w[rounds], #3 ;" " aese v0.16b, v1.16b ;" " aesmc v0.16b, v0.16b ;" - " ld1 {v3.16b}, [%[key]], #16 ;" + " ld1 {v3.4s}, [%[key]], #16 ;" " bpl 1b ;" " aese v0.16b, v2.16b ;" " eor v0.16b, v0.16b, v3.16b ;" @@ -92,24 +93,24 @@ static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[]) kernel_neon_begin_partial(4); __asm__(" ld1 {v0.16b}, %[in] ;" - " ld1 {v1.16b}, [%[key]], #16 ;" + " ld1 {v1.4s}, [%[key]], #16 ;" " cmp %w[rounds], #10 ;" " bmi 0f ;" " bne 3f ;" " mov v3.16b, v1.16b ;" " b 2f ;" "0: mov v2.16b, v1.16b ;" - " ld1 {v3.16b}, [%[key]], #16 ;" + " ld1 {v3.4s}, [%[key]], #16 ;" "1: aesd v0.16b, v2.16b ;" " aesimc v0.16b, v0.16b ;" - "2: ld1 {v1.16b}, [%[key]], #16 ;" + "2: ld1 {v1.4s}, [%[key]], #16 ;" " aesd v0.16b, v3.16b ;" " aesimc v0.16b, v0.16b ;" - "3: ld1 {v2.16b}, [%[key]], #16 ;" + "3: ld1 {v2.4s}, [%[key]], #16 ;" " subs %w[rounds], %w[rounds], #3 ;" " aesd v0.16b, v1.16b ;" " aesimc v0.16b, v0.16b ;" - " ld1 {v3.16b}, [%[key]], #16 ;" + " ld1 {v3.4s}, [%[key]], #16 ;" " bpl 1b ;" " aesd v0.16b, v2.16b ;" " eor v0.16b, v0.16b, v3.16b ;" @@ -165,20 +166,16 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key, key_len != AES_KEYSIZE_256) return -EINVAL; - memcpy(ctx->key_enc, in_key, key_len); ctx->key_length = key_len; + for (i = 0; i < kwords; i++) + ctx->key_enc[i] = get_unaligned_le32(in_key + i * sizeof(u32)); kernel_neon_begin_partial(2); for (i = 0; i < sizeof(rcon); i++) { u32 *rki = ctx->key_enc + (i * kwords); u32 *rko = rki + kwords; -#ifndef CONFIG_CPU_BIG_ENDIAN rko[0] = ror32(aes_sub(rki[kwords - 1]), 8) ^ rcon[i] ^ rki[0]; -#else - rko[0] = rol32(aes_sub(rki[kwords - 1]), 8) ^ (rcon[i] << 24) ^ - rki[0]; -#endif rko[1] = rko[0] ^ rki[1]; rko[2] = rko[1] ^ rki[2]; rko[3] = rko[2] ^ rki[3]; @@ -210,9 +207,9 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key, key_dec[0] = key_enc[j]; for (i = 1, j--; j > 0; i++, j--) - __asm__("ld1 {v0.16b}, %[in] ;" + __asm__("ld1 {v0.4s}, %[in] ;" "aesimc v1.16b, v0.16b ;" - "st1 {v1.16b}, %[out] ;" + "st1 {v1.4s}, %[out] ;" : [out] "=Q"(key_dec[i]) : [in] "Q"(key_enc[j]) diff --git a/arch/arm64/crypto/aes-ce.S b/arch/arm64/crypto/aes-ce.S index b46093d567e5..50330f5c3adc 100644 --- a/arch/arm64/crypto/aes-ce.S +++ b/arch/arm64/crypto/aes-ce.S @@ -2,7 +2,7 @@ * linux/arch/arm64/crypto/aes-ce.S - AES cipher for ARMv8 with * Crypto Extensions * - * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org> + * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -22,11 +22,11 @@ cmp \rounds, #12 blo 2222f /* 128 bits */ beq 1111f /* 192 bits */ - ld1 {v17.16b-v18.16b}, [\rk], #32 -1111: ld1 {v19.16b-v20.16b}, [\rk], #32 -2222: ld1 {v21.16b-v24.16b}, [\rk], #64 - ld1 {v25.16b-v28.16b}, [\rk], #64 - ld1 {v29.16b-v31.16b}, [\rk] + ld1 {v17.4s-v18.4s}, [\rk], #32 +1111: ld1 {v19.4s-v20.4s}, [\rk], #32 +2222: ld1 {v21.4s-v24.4s}, [\rk], #64 + ld1 {v25.4s-v28.4s}, [\rk], #64 + ld1 {v29.4s-v31.4s}, [\rk] .endm /* prepare for encryption with key in rk[] */

[resend,08/18] crypto: arm64/aes-ce-cipher - match round key endianness with generic code

Commit Message

Patch