From patchwork Thu Feb  2 11:38:55 2017
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Ard Biesheuvel <ard.biesheuvel@linaro.org>
X-Patchwork-Id: 93097
Delivered-To: patch@linaro.org
Received: by 10.140.20.99 with SMTP id 90csp55570qgi;
 Thu, 2 Feb 2017 03:39:07 -0800 (PST)
X-Received: by 10.98.74.84 with SMTP id x81mr9783297pfa.172.1486035547893;
 Thu, 02 Feb 2017 03:39:07 -0800 (PST)
Return-Path: <linux-crypto-owner@vger.kernel.org>
Received: from vger.kernel.org (vger.kernel.org. [209.132.180.67])
 by mx.google.com with ESMTP id
 g9si22034275plk.185.2017.02.02.03.39.07; 
 Thu, 02 Feb 2017 03:39:07 -0800 (PST)
Received-SPF: pass (google.com: best guess record for domain of
 linux-crypto-owner@vger.kernel.org designates 209.132.180.67
 as permitted sender) client-ip=209.132.180.67; 
Authentication-Results: mx.google.com; dkim=pass header.i=@linaro.org;
 spf=pass (google.com: best guess record for domain of
 linux-crypto-owner@vger.kernel.org designates 209.132.180.67
 as permitted sender)
 smtp.mailfrom=linux-crypto-owner@vger.kernel.org; 
 dmarc=pass (p=NONE sp=NONE dis=NONE) header.from=linaro.org
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
 id S1751232AbdBBLjG (ORCPT <rfc822;victor.chong@linaro.org>
 + 1 other); Thu, 2 Feb 2017 06:39:06 -0500
Received: from mail-wm0-f53.google.com ([74.125.82.53]:38297 "EHLO
 mail-wm0-f53.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
 with ESMTP id S1751019AbdBBLjF (ORCPT
 <rfc822;linux-crypto@vger.kernel.org>);
 Thu, 2 Feb 2017 06:39:05 -0500
Received: by mail-wm0-f53.google.com with SMTP id r141so84925555wmg.1
 for <linux-crypto@vger.kernel.org>;
 Thu, 02 Feb 2017 03:39:05 -0800 (PST)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linaro.org; s=google; 
 h=from:to:cc:subject:date:message-id;
 bh=YVU3edjpz5GxptZUk030444pcSpN8P6ww8AA1+ti6v0=;
 b=Z2RQe25o3FQFMqESK0LCezyfmO2vi8I3Dn2uKH40BtC1xAHbp1zhH46DLX8TuKhj2w
 wVxZwk1UH48DvZ8h766aE32obo7X4BMtDAnJxTb7DOCAc525+PaSqQfu+JuoIi/BB8xI
 XAEofiHMPVEg/258ob31HN8izxxwaMcAkc6uo=
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
 d=1e100.net; s=20161025;
 h=x-gm-message-state:from:to:cc:subject:date:message-id;
 bh=YVU3edjpz5GxptZUk030444pcSpN8P6ww8AA1+ti6v0=;
 b=YW+eknmnhU6zzJRPTryH/NEOAdcjXNst9oDWHccUpS69liLKnnEKsHR31IU1zbELH7
 yAdRuIr1tdmNBbXe1+te5ktcKXpCZoKeeCGuYXp9tOOkuaeMb/fRI1CHYvFAZXRqb7bc
 B3N5mTVyMnyZjFz80G3U61K9qnHJupMaAt+hrBs2UnrHgBv+H39Kh21n9L48joLy80xH
 X45vlJm40lV/3Zn9MX1uaMIzNsbxxUsjpMLEYyvbOMW0Ljpa/74vpxoVbOCG2ZY+W1/g
 I/84uUBkcOGtp/8g7zWjjvVzMI3kmDi2gQKVM7T/JKZpulpXjjIJRSFv/PDpHl61chNA
 vMkg==
X-Gm-Message-State: AIkVDXJZJQCgf87CzKm3agtPwYHBo5h+yUgXtC7FcLcjzZBanyHsejC6PVvAOKfguMk/3zu+
X-Received: by 10.28.137.203 with SMTP id l194mr22424980wmd.63.1486035544253; 
 Thu, 02 Feb 2017 03:39:04 -0800 (PST)
Received: from localhost.localdomain ([105.130.17.13])
 by smtp.gmail.com with ESMTPSA id
 40sm39603114wry.22.2017.02.02.03.39.00
 (version=TLS1_2 cipher=ECDHE-RSA-AES128-SHA bits=128/128);
 Thu, 02 Feb 2017 03:39:01 -0800 (PST)
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
To: linux-crypto@vger.kernel.org, herbert@gondor.apana.org.au
Cc: linux-arm-kernel@lists.infradead.org,
 Ard Biesheuvel <ard.biesheuvel@linaro.org>
Subject: [PATCH 1/2] crypto: arm64/aes - don't use IV buffer to return final
 keystream block
Date: Thu,  2 Feb 2017 11:38:55 +0000
Message-Id: <1486035536-895-1-git-send-email-ard.biesheuvel@linaro.org>
X-Mailer: git-send-email 2.7.4
Sender: linux-crypto-owner@vger.kernel.org
Precedence: bulk
List-ID: <linux-crypto.vger.kernel.org>
X-Mailing-List: linux-crypto@vger.kernel.org

The arm64 bit sliced AES core code uses the IV buffer to pass the final
keystream block back to the glue code if the input is not a multiple of
the block size, so that the asm code does not have to deal with anything
except 16 byte blocks. This is done under the assumption that the outgoing
IV is meaningless anyway in this case, given that chaining is no longer
possible under these circumstances.

However, as it turns out, the CCM driver does expect the IV to retain
a value that is equal to the original IV except for the counter value,
and even interprets byte zero as a length indicator, which may result
in memory corruption if the IV is overwritten with something else.

So use a separate buffer to return the final keystream block.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
Note that this patch includes the fix

crypto: arm64/aes-neon-bs - honour iv_out requirement in CTR mode

which I sent out earlier.

 arch/arm64/crypto/aes-neonbs-core.S | 37 ++++++++++++--------
 arch/arm64/crypto/aes-neonbs-glue.c |  9 ++---
 2 files changed, 28 insertions(+), 18 deletions(-)

-- 
2.7.4

diff --git a/arch/arm64/crypto/aes-neonbs-core.S b/arch/arm64/crypto/aes-neonbs-core.S
index 8d0cdaa2768d..ca0472500433 100644
--- a/arch/arm64/crypto/aes-neonbs-core.S
+++ b/arch/arm64/crypto/aes-neonbs-core.S
@@ -853,13 +853,15 @@ ENDPROC(aesbs_xts_decrypt)
 
 	/*
 	 * aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
-	 *		     int rounds, int blocks, u8 iv[], bool final)
+	 *		     int rounds, int blocks, u8 iv[], u8 final[])
 	 */
 ENTRY(aesbs_ctr_encrypt)
 	stp		x29, x30, [sp, #-16]!
 	mov		x29, sp
 
-	add		x4, x4, x6		// do one extra block if final
+	cmp		x6, #0
+	cset		x10, ne
+	add		x4, x4, x10		// do one extra block if final
 
 	ldp		x7, x8, [x5]
 	ld1		{v0.16b}, [x5]
@@ -874,19 +876,26 @@ CPU_LE(	rev		x8, x8		)
 	csel		x4, x4, xzr, pl
 	csel		x9, x9, xzr, le
 
+	tbnz		x9, #1, 0f
 	next_ctr	v1
+	tbnz		x9, #2, 0f
 	next_ctr	v2
+	tbnz		x9, #3, 0f
 	next_ctr	v3
+	tbnz		x9, #4, 0f
 	next_ctr	v4
+	tbnz		x9, #5, 0f
 	next_ctr	v5
+	tbnz		x9, #6, 0f
 	next_ctr	v6
+	tbnz		x9, #7, 0f
 	next_ctr	v7
 
 0:	mov		bskey, x2
 	mov		rounds, x3
 	bl		aesbs_encrypt8
 
-	lsr		x9, x9, x6		// disregard the extra block
+	lsr		x9, x9, x10		// disregard the extra block
 	tbnz		x9, #0, 0f
 
 	ld1		{v8.16b}, [x1], #16
@@ -928,36 +937,36 @@ CPU_LE(	rev		x8, x8		)
 	eor		v5.16b, v5.16b, v15.16b
 	st1		{v5.16b}, [x0], #16
 
-	next_ctr	v0
+8:	next_ctr	v0
 	cbnz		x4, 99b
 
 0:	st1		{v0.16b}, [x5]
-8:	ldp		x29, x30, [sp], #16
+	ldp		x29, x30, [sp], #16
 	ret
 
 	/*
-	 * If we are handling the tail of the input (x6 == 1), return the
-	 * final keystream block back to the caller via the IV buffer.
+	 * If we are handling the tail of the input (x6 != NULL), return the
+	 * final keystream block back to the caller.
 	 */
 1:	cbz		x6, 8b
-	st1		{v1.16b}, [x5]
+	st1		{v1.16b}, [x6]
 	b		8b
 2:	cbz		x6, 8b
-	st1		{v4.16b}, [x5]
+	st1		{v4.16b}, [x6]
 	b		8b
 3:	cbz		x6, 8b
-	st1		{v6.16b}, [x5]
+	st1		{v6.16b}, [x6]
 	b		8b
 4:	cbz		x6, 8b
-	st1		{v3.16b}, [x5]
+	st1		{v3.16b}, [x6]
 	b		8b
 5:	cbz		x6, 8b
-	st1		{v7.16b}, [x5]
+	st1		{v7.16b}, [x6]
 	b		8b
 6:	cbz		x6, 8b
-	st1		{v2.16b}, [x5]
+	st1		{v2.16b}, [x6]
 	b		8b
 7:	cbz		x6, 8b
-	st1		{v5.16b}, [x5]
+	st1		{v5.16b}, [x6]
 	b		8b
 ENDPROC(aesbs_ctr_encrypt)
diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c
index 863e436ecf89..db2501d93550 100644
--- a/arch/arm64/crypto/aes-neonbs-glue.c
+++ b/arch/arm64/crypto/aes-neonbs-glue.c
@@ -34,7 +34,7 @@ asmlinkage void aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[],
 				  int rounds, int blocks, u8 iv[]);
 
 asmlinkage void aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
-				  int rounds, int blocks, u8 iv[], bool final);
+				  int rounds, int blocks, u8 iv[], u8 final[]);
 
 asmlinkage void aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[],
 				  int rounds, int blocks, u8 iv[]);
@@ -201,6 +201,7 @@ static int ctr_encrypt(struct skcipher_request *req)
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm);
 	struct skcipher_walk walk;
+	u8 buf[AES_BLOCK_SIZE];
 	int err;
 
 	err = skcipher_walk_virt(&walk, req, true);
@@ -208,12 +209,12 @@ static int ctr_encrypt(struct skcipher_request *req)
 	kernel_neon_begin();
 	while (walk.nbytes > 0) {
 		unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
-		bool final = (walk.total % AES_BLOCK_SIZE) != 0;
+		u8 *final = (walk.total % AES_BLOCK_SIZE) ? buf : NULL;
 
 		if (walk.nbytes < walk.total) {
 			blocks = round_down(blocks,
 					    walk.stride / AES_BLOCK_SIZE);
-			final = false;
+			final = NULL;
 		}
 
 		aesbs_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
@@ -225,7 +226,7 @@ static int ctr_encrypt(struct skcipher_request *req)
 
 			if (dst != src)
 				memcpy(dst, src, walk.total % AES_BLOCK_SIZE);
-			crypto_xor(dst, walk.iv, walk.total % AES_BLOCK_SIZE);
+			crypto_xor(dst, final, walk.total % AES_BLOCK_SIZE);
 
 			err = skcipher_walk_done(&walk, 0);
 			break;