diff mbox series

[v5,5/5] crypto: arm64/aes-ccm - avoid by-ref argument for ce_aes_ccm_auth_data

Message ID 20210521102053.66609-6-ardb@kernel.org
State Accepted
Commit 898387e40cf538b7d1605e05d456699fe418a77f
Headers show
Series running kernel mode SIMD with softirqs disabled | expand

Commit Message

Ard Biesheuvel May 21, 2021, 10:20 a.m. UTC
With the SIMD code path removed, we can clean up the CCM auth-only path
a bit further, by passing the 'macp' input buffer pointer by value,
rather than by reference, and taking the output value from the
function's return value.

This way, the compiler is no longer forced to allocate macp on the
stack. This is not expected to make any difference in practice, it just
makes for slightly cleaner code.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm64/crypto/aes-ce-ccm-core.S | 23 ++++++++++----------
 arch/arm64/crypto/aes-ce-ccm-glue.c | 17 +++++----------
 2 files changed, 17 insertions(+), 23 deletions(-)

Comments

Eric Biggers May 24, 2021, 9:53 p.m. UTC | #1
On Fri, May 21, 2021 at 12:20:53PM +0200, Ard Biesheuvel wrote:
> With the SIMD code path removed, we can clean up the CCM auth-only path

> a bit further, by passing the 'macp' input buffer pointer by value,

> rather than by reference, and taking the output value from the

> function's return value.

> 

> This way, the compiler is no longer forced to allocate macp on the

> stack. This is not expected to make any difference in practice, it just

> makes for slightly cleaner code.

> 

> Signed-off-by: Ard Biesheuvel <ardb@kernel.org>

> ---

>  arch/arm64/crypto/aes-ce-ccm-core.S | 23 ++++++++++----------

>  arch/arm64/crypto/aes-ce-ccm-glue.c | 17 +++++----------

>  2 files changed, 17 insertions(+), 23 deletions(-)

> 

> diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S

> index 8adff299fcd3..b03f7f71f893 100644

> --- a/arch/arm64/crypto/aes-ce-ccm-core.S

> +++ b/arch/arm64/crypto/aes-ce-ccm-core.S

> @@ -12,22 +12,21 @@

>  	.arch	armv8-a+crypto

>  

>  	/*

> -	 * void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,

> -	 *			     u32 *macp, u8 const rk[], u32 rounds);

> +	 * u32 ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,

> +	 *			    u32 macp, u8 const rk[], u32 rounds);


How is this different from 'u8 mac[]' which is already one of the parameters?

- Eric
Ard Biesheuvel May 26, 2021, 9:14 a.m. UTC | #2
On Mon, 24 May 2021 at 23:53, Eric Biggers <ebiggers@kernel.org> wrote:
>

> On Fri, May 21, 2021 at 12:20:53PM +0200, Ard Biesheuvel wrote:

> > With the SIMD code path removed, we can clean up the CCM auth-only path

> > a bit further, by passing the 'macp' input buffer pointer by value,

> > rather than by reference, and taking the output value from the

> > function's return value.

> >

> > This way, the compiler is no longer forced to allocate macp on the

> > stack. This is not expected to make any difference in practice, it just

> > makes for slightly cleaner code.

> >

> > Signed-off-by: Ard Biesheuvel <ardb@kernel.org>

> > ---

> >  arch/arm64/crypto/aes-ce-ccm-core.S | 23 ++++++++++----------

> >  arch/arm64/crypto/aes-ce-ccm-glue.c | 17 +++++----------

> >  2 files changed, 17 insertions(+), 23 deletions(-)

> >

> > diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S

> > index 8adff299fcd3..b03f7f71f893 100644

> > --- a/arch/arm64/crypto/aes-ce-ccm-core.S

> > +++ b/arch/arm64/crypto/aes-ce-ccm-core.S

> > @@ -12,22 +12,21 @@

> >       .arch   armv8-a+crypto

> >

> >       /*

> > -      * void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,

> > -      *                           u32 *macp, u8 const rk[], u32 rounds);

> > +      * u32 ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,

> > +      *                          u32 macp, u8 const rk[], u32 rounds);

>

> How is this different from 'u8 mac[]' which is already one of the parameters?

>


mac[] is the combined digest/input buffer, and macp is the index into
it that keeps track on how much new input we have accumulated. I.e.,
instead of having a separate buffer of the same size, and accumulating
bytes until we can perform the XOR + AES transformation, the partial
input is accumulated into mac[] using XOR directly.
diff mbox series

Patch

diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S
index 8adff299fcd3..b03f7f71f893 100644
--- a/arch/arm64/crypto/aes-ce-ccm-core.S
+++ b/arch/arm64/crypto/aes-ce-ccm-core.S
@@ -12,22 +12,21 @@ 
 	.arch	armv8-a+crypto
 
 	/*
-	 * void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
-	 *			     u32 *macp, u8 const rk[], u32 rounds);
+	 * u32 ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
+	 *			    u32 macp, u8 const rk[], u32 rounds);
 	 */
 SYM_FUNC_START(ce_aes_ccm_auth_data)
-	ldr	w8, [x3]			/* leftover from prev round? */
 	ld1	{v0.16b}, [x0]			/* load mac */
-	cbz	w8, 1f
-	sub	w8, w8, #16
+	cbz	w3, 1f
+	sub	w3, w3, #16
 	eor	v1.16b, v1.16b, v1.16b
 0:	ldrb	w7, [x1], #1			/* get 1 byte of input */
 	subs	w2, w2, #1
-	add	w8, w8, #1
+	add	w3, w3, #1
 	ins	v1.b[0], w7
 	ext	v1.16b, v1.16b, v1.16b, #1	/* rotate in the input bytes */
 	beq	8f				/* out of input? */
-	cbnz	w8, 0b
+	cbnz	w3, 0b
 	eor	v0.16b, v0.16b, v1.16b
 1:	ld1	{v3.4s}, [x4]			/* load first round key */
 	prfm	pldl1strm, [x1]
@@ -62,7 +61,7 @@  SYM_FUNC_START(ce_aes_ccm_auth_data)
 	beq	10f
 	adds	w2, w2, #16
 	beq	10f
-	mov	w8, w2
+	mov	w3, w2
 7:	ldrb	w7, [x1], #1
 	umov	w6, v0.b[0]
 	eor	w6, w6, w7
@@ -71,15 +70,15 @@  SYM_FUNC_START(ce_aes_ccm_auth_data)
 	beq	10f
 	ext	v0.16b, v0.16b, v0.16b, #1	/* rotate out the mac bytes */
 	b	7b
-8:	cbz	w8, 91f
-	mov	w7, w8
-	add	w8, w8, #16
+8:	cbz	w3, 91f
+	mov	w7, w3
+	add	w3, w3, #16
 9:	ext	v1.16b, v1.16b, v1.16b, #1
 	adds	w7, w7, #1
 	bne	9b
 91:	eor	v0.16b, v0.16b, v1.16b
 	st1	{v0.16b}, [x0]
-10:	str	w8, [x3]
+10:	mov	w0, w3
 	ret
 SYM_FUNC_END(ce_aes_ccm_auth_data)
 
diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c
index 8effd18429ac..d4e87b4e348f 100644
--- a/arch/arm64/crypto/aes-ce-ccm-glue.c
+++ b/arch/arm64/crypto/aes-ce-ccm-glue.c
@@ -29,8 +29,8 @@  static int num_rounds(struct crypto_aes_ctx *ctx)
 	return 6 + ctx->key_length / 4;
 }
 
-asmlinkage void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
-				     u32 *macp, u32 const rk[], u32 rounds);
+asmlinkage u32 ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
+				    u32 macp, u32 const rk[], u32 rounds);
 
 asmlinkage void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
 				   u32 const rk[], u32 rounds, u8 mac[],
@@ -96,13 +96,6 @@  static int ccm_init_mac(struct aead_request *req, u8 maciv[], u32 msglen)
 	return 0;
 }
 
-static void ccm_update_mac(struct crypto_aes_ctx *key, u8 mac[], u8 const in[],
-			   u32 abytes, u32 *macp)
-{
-	ce_aes_ccm_auth_data(mac, in, abytes, macp, key->key_enc,
-			     num_rounds(key));
-}
-
 static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
 {
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
@@ -122,7 +115,8 @@  static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
 		ltag.len = 6;
 	}
 
-	ccm_update_mac(ctx, mac, (u8 *)&ltag, ltag.len, &macp);
+	macp = ce_aes_ccm_auth_data(mac, (u8 *)&ltag, ltag.len, macp,
+				    ctx->key_enc, num_rounds(ctx));
 	scatterwalk_start(&walk, req->src);
 
 	do {
@@ -134,7 +128,8 @@  static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
 			n = scatterwalk_clamp(&walk, len);
 		}
 		p = scatterwalk_map(&walk);
-		ccm_update_mac(ctx, mac, p, n, &macp);
+		macp = ce_aes_ccm_auth_data(mac, p, n, macp, ctx->key_enc,
+					    num_rounds(ctx));
 		len -= n;
 
 		scatterwalk_unmap(p);