diff mbox series

[v2,12/15] crypto: arm64/sm4 - add CE implementation for ESSIV mode

Message ID 20221018071006.5717-13-tianjia.zhang@linux.alibaba.com
State New
Headers show
Series Optimizing SM3 and SM4 algorithms using arm64 NEON/CE instructions | expand

Commit Message

tianjia.zhang Oct. 18, 2022, 7:10 a.m. UTC
This patch is a CE-optimized assembly implementation for ESSIV mode.
The assembly part is realized by reusing the CBC mode.

Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
---
 arch/arm64/crypto/sm4-ce-core.S |  42 +++++++++++
 arch/arm64/crypto/sm4-ce-glue.c | 128 ++++++++++++++++++++++++++++++++
 2 files changed, 170 insertions(+)

Comments

Eric Biggers Oct. 20, 2022, 3:58 a.m. UTC | #1
On Tue, Oct 18, 2022 at 03:10:03PM +0800, Tianjia Zhang wrote:
> This patch is a CE-optimized assembly implementation for ESSIV mode.
> The assembly part is realized by reusing the CBC mode.
> 
> Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>

Is there still a use case for CBC-ESSIV mode these days, now that everyone is
using XTS instead?

- Eric
tianjia.zhang Oct. 21, 2022, 2:47 a.m. UTC | #2
Hi Eric,

On 10/20/22 11:58 AM, Eric Biggers wrote:
> On Tue, Oct 18, 2022 at 03:10:03PM +0800, Tianjia Zhang wrote:
>> This patch is a CE-optimized assembly implementation for ESSIV mode.
>> The assembly part is realized by reusing the CBC mode.
>>
>> Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
> 
> Is there still a use case for CBC-ESSIV mode these days, now that everyone is
> using XTS instead?
> 
> - Eric

The mainstream is already using XTS, but CBC-ESSIV is still an optional
backup algorithm, especially in block crypto and fscrypto, I'm currently
working on supporting the SM4 algorithm for these subsystems.

Cheers,
Tianjia
tianjia.zhang Oct. 25, 2022, 7:40 a.m. UTC | #3
Hi Eric,

On 10/25/22 1:20 PM, Eric Biggers wrote:
> On Fri, Oct 21, 2022 at 10:47:14AM +0800, Tianjia Zhang wrote:
>> Hi Eric,
>>
>> On 10/20/22 11:58 AM, Eric Biggers wrote:
>>> On Tue, Oct 18, 2022 at 03:10:03PM +0800, Tianjia Zhang wrote:
>>>> This patch is a CE-optimized assembly implementation for ESSIV mode.
>>>> The assembly part is realized by reusing the CBC mode.
>>>>
>>>> Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
>>>
>>> Is there still a use case for CBC-ESSIV mode these days, now that everyone is
>>> using XTS instead?
>>>
>>> - Eric
>>
>> The mainstream is already using XTS, but CBC-ESSIV is still an optional
>> backup algorithm, especially in block crypto and fscrypto, I'm currently
>> working on supporting the SM4 algorithm for these subsystems.
>>
> 
> The only reason that AES-CBC-ESSIV support was added to fs/crypto/ was because
> someone had a low-power embedded device with a hardware crypto accelerator that
> only supported AES-CBC.
> 
> Nothing like that is relevant here, as this is just a software implementation.
> 
> Please just don't include ESSIV.  There's no need to implement random useless
> algorithms.  It could always be added later if a use case actually arises.
> 
> - Eric

Thanks for this information, I will remove the ESSIV code in the next
patch.

Best regards,
Tianjia
diff mbox series

Patch

diff --git a/arch/arm64/crypto/sm4-ce-core.S b/arch/arm64/crypto/sm4-ce-core.S
index ddd15ec09d38..6b923c3209a0 100644
--- a/arch/arm64/crypto/sm4-ce-core.S
+++ b/arch/arm64/crypto/sm4-ce-core.S
@@ -154,6 +154,26 @@  SYM_FUNC_START(sm4_ce_crypt)
 	ret;
 SYM_FUNC_END(sm4_ce_crypt)
 
+.align 3
+SYM_FUNC_START(sm4_ce_essiv_cbc_enc)
+	/* input:
+	 *   x0: round key array, CTX
+	 *   x1: dst
+	 *   x2: src
+	 *   x3: iv (big endian, 128 bit)
+	 *   w4: nblocks
+	 *   x5: round key array for IV
+	 */
+	ld1		{RIV.16b}, [x3]
+
+	SM4_PREPARE(x5)
+
+	SM4_CRYPT_BLK(RIV)
+
+	SM4_PREPARE(x0)
+
+	b		.Lcbc_enc_loop_4x
+
 .align 3
 SYM_FUNC_START(sm4_ce_cbc_enc)
 	/* input:
@@ -208,6 +228,27 @@  SYM_FUNC_START(sm4_ce_cbc_enc)
 
 	ret
 SYM_FUNC_END(sm4_ce_cbc_enc)
+SYM_FUNC_END(sm4_ce_essiv_cbc_enc)
+
+.align 3
+SYM_FUNC_START(sm4_ce_essiv_cbc_dec)
+	/* input:
+	 *   x0: round key array, CTX
+	 *   x1: dst
+	 *   x2: src
+	 *   x3: iv (big endian, 128 bit)
+	 *   w4: nblocks
+	 *   x5: round key array for IV
+	 */
+	ld1		{RIV.16b}, [x3]
+
+	SM4_PREPARE(x5)
+
+	SM4_CRYPT_BLK(RIV)
+
+	SM4_PREPARE(x0)
+
+	b		.Lcbc_dec_loop_8x
 
 .align 3
 SYM_FUNC_START(sm4_ce_cbc_dec)
@@ -306,6 +347,7 @@  SYM_FUNC_START(sm4_ce_cbc_dec)
 
 	ret
 SYM_FUNC_END(sm4_ce_cbc_dec)
+SYM_FUNC_END(sm4_ce_essiv_cbc_dec)
 
 .align 3
 SYM_FUNC_START(sm4_ce_cbc_cts_enc)
diff --git a/arch/arm64/crypto/sm4-ce-glue.c b/arch/arm64/crypto/sm4-ce-glue.c
index 8222766f712a..6267ec1cfac0 100644
--- a/arch/arm64/crypto/sm4-ce-glue.c
+++ b/arch/arm64/crypto/sm4-ce-glue.c
@@ -19,6 +19,8 @@ 
 #include <crypto/scatterwalk.h>
 #include <crypto/xts.h>
 #include <crypto/sm4.h>
+#include <crypto/sm3.h>
+#include <crypto/hash.h>
 
 #define BYTES2BLKS(nbytes)	((nbytes) >> 4)
 
@@ -35,6 +37,12 @@  asmlinkage void sm4_ce_cbc_cts_enc(const u32 *rkey, u8 *dst, const u8 *src,
 				   u8 *iv, unsigned int nbytes);
 asmlinkage void sm4_ce_cbc_cts_dec(const u32 *rkey, u8 *dst, const u8 *src,
 				   u8 *iv, unsigned int nbytes);
+asmlinkage void sm4_ce_essiv_cbc_enc(const u32 *rkey1, u8 *dst, const u8 *src,
+				     u8 *iv, unsigned int nblocks,
+				     const u32 *rkey2_enc);
+asmlinkage void sm4_ce_essiv_cbc_dec(const u32 *rkey1, u8 *dst, const u8 *src,
+				     u8 *iv, unsigned int nblocks,
+				     const u32 *rkey2_enc);
 asmlinkage void sm4_ce_cfb_enc(const u32 *rkey, u8 *dst, const u8 *src,
 			       u8 *iv, unsigned int nblks);
 asmlinkage void sm4_ce_cfb_dec(const u32 *rkey, u8 *dst, const u8 *src,
@@ -58,6 +66,12 @@  struct sm4_xts_ctx {
 	struct sm4_ctx key2;
 };
 
+struct sm4_essiv_cbc_ctx {
+	struct sm4_ctx key1;
+	struct sm4_ctx key2;
+	struct crypto_shash *hash;
+};
+
 static int sm4_setkey(struct crypto_skcipher *tfm, const u8 *key,
 		      unsigned int key_len)
 {
@@ -96,6 +110,27 @@  static int sm4_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
 	return 0;
 }
 
+static int sm4_essiv_cbc_setkey(struct crypto_skcipher *tfm, const u8 *key,
+				unsigned int key_len)
+{
+	struct sm4_essiv_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
+	u8 __aligned(8) digest[SM3_DIGEST_SIZE];
+
+	if (key_len != SM4_KEY_SIZE)
+		return -EINVAL;
+
+	crypto_shash_tfm_digest(ctx->hash, key, key_len, digest);
+
+	kernel_neon_begin();
+	sm4_ce_expand_key(key, ctx->key1.rkey_enc,
+			  ctx->key1.rkey_dec, crypto_sm4_fk, crypto_sm4_ck);
+	sm4_ce_expand_key(digest, ctx->key2.rkey_enc,
+			  ctx->key2.rkey_dec, crypto_sm4_fk, crypto_sm4_ck);
+	kernel_neon_end();
+
+	return 0;
+}
+
 static int sm4_ecb_do_crypt(struct skcipher_request *req, const u32 *rkey)
 {
 	struct skcipher_walk walk;
@@ -497,6 +532,81 @@  static int sm4_xts_decrypt(struct skcipher_request *req)
 	return sm4_xts_crypt(req, false);
 }
 
+static int sm4_essiv_cbc_init_tfm(struct crypto_skcipher *tfm)
+{
+	struct sm4_essiv_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	ctx->hash = crypto_alloc_shash("sm3", 0, 0);
+
+	return PTR_ERR_OR_ZERO(ctx->hash);
+}
+
+static void sm4_essiv_cbc_exit_tfm(struct crypto_skcipher *tfm)
+{
+	struct sm4_essiv_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	crypto_free_shash(ctx->hash);
+}
+
+static int sm4_essiv_cbc_crypt(struct skcipher_request *req, bool encrypt)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct sm4_essiv_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nblocks;
+	int err;
+
+	err = skcipher_walk_virt(&walk, req, false);
+
+	if ((nblocks = walk.nbytes / SM4_BLOCK_SIZE) > 0) {
+		kernel_neon_begin();
+
+		if (encrypt)
+			sm4_ce_essiv_cbc_enc(ctx->key1.rkey_enc,
+					     walk.dst.virt.addr,
+					     walk.src.virt.addr, walk.iv,
+					     nblocks, ctx->key2.rkey_enc);
+		else
+			sm4_ce_essiv_cbc_dec(ctx->key1.rkey_dec,
+					     walk.dst.virt.addr,
+					     walk.src.virt.addr, walk.iv,
+					     nblocks, ctx->key2.rkey_enc);
+
+		kernel_neon_end();
+
+		err = skcipher_walk_done(&walk, walk.nbytes % SM4_BLOCK_SIZE);
+		if (err)
+			return err;
+	}
+
+	while ((nblocks = walk.nbytes / SM4_BLOCK_SIZE) > 0) {
+		kernel_neon_begin();
+
+		if (encrypt)
+			sm4_ce_cbc_enc(ctx->key1.rkey_enc, walk.dst.virt.addr,
+				       walk.src.virt.addr, walk.iv, nblocks);
+		else
+			sm4_ce_cbc_dec(ctx->key1.rkey_dec, walk.dst.virt.addr,
+				       walk.src.virt.addr, walk.iv, nblocks);
+
+		kernel_neon_end();
+
+		err = skcipher_walk_done(&walk, walk.nbytes % SM4_BLOCK_SIZE);
+	}
+
+	return err;
+}
+
+static int sm4_essiv_cbc_encrypt(struct skcipher_request *req)
+{
+	return sm4_essiv_cbc_crypt(req, true);
+}
+
+static int sm4_essiv_cbc_decrypt(struct skcipher_request *req)
+{
+	return sm4_essiv_cbc_crypt(req, false);
+}
+
 static struct skcipher_alg sm4_algs[] = {
 	{
 		.base = {
@@ -591,6 +701,23 @@  static struct skcipher_alg sm4_algs[] = {
 		.setkey		= sm4_xts_setkey,
 		.encrypt	= sm4_xts_encrypt,
 		.decrypt	= sm4_xts_decrypt,
+	}, {
+		.base = {
+			.cra_name		= "essiv(cbc(sm4),sm3)",
+			.cra_driver_name	= "essiv-cbc-sm4-sm3-ce",
+			.cra_priority		= 400 + 1,
+			.cra_blocksize		= SM4_BLOCK_SIZE,
+			.cra_ctxsize		= sizeof(struct sm4_essiv_cbc_ctx),
+			.cra_module		= THIS_MODULE,
+		},
+		.min_keysize	= SM4_KEY_SIZE,
+		.max_keysize	= SM4_KEY_SIZE,
+		.ivsize		= SM4_BLOCK_SIZE,
+		.setkey		= sm4_essiv_cbc_setkey,
+		.encrypt	= sm4_essiv_cbc_encrypt,
+		.decrypt	= sm4_essiv_cbc_decrypt,
+		.init		= sm4_essiv_cbc_init_tfm,
+		.exit		= sm4_essiv_cbc_exit_tfm,
 	}
 };
 
@@ -616,5 +743,6 @@  MODULE_ALIAS_CRYPTO("cfb(sm4)");
 MODULE_ALIAS_CRYPTO("ctr(sm4)");
 MODULE_ALIAS_CRYPTO("cts(cbc(sm4))");
 MODULE_ALIAS_CRYPTO("xts(sm4)");
+MODULE_ALIAS_CRYPTO("essiv(cbc(sm4),sm3)");
 MODULE_AUTHOR("Tianjia Zhang <tianjia.zhang@linux.alibaba.com>");
 MODULE_LICENSE("GPL v2");