@@ -62,11 +62,6 @@
*Visit http://software.intel.com/en-us/articles/
*and refer to improving-the-performance-of-the-secure-hash-algorithm-1/
*
- *Updates 20-byte SHA-1 record at start of 'state', from 'input', for
- *even number of 'blocks' consecutive 64-byte blocks.
- *
- *extern "C" void sha1_transform_avx2(
- * struct sha1_state *state, const u8* input, int blocks );
*/
#include <linux/linkage.h>
@@ -629,13 +624,22 @@ _loop3:
_end:
.endm
-/*
- * macro implements SHA-1 function's body for several 64-byte blocks
- * param: function's name
- */
-.macro SHA1_VECTOR_ASM name
- SYM_FUNC_START(\name)
+.text
+
+/**
+ * sha1_transform_avx2 - Calculate SHA1 hash using the x86 AVX2 feature set
+ * @digest: address of current 20-byte hash value (%rdi, CTX macro)
+ * @data: address of data (%rsi, BUF macro);
+ * data size must be a multiple of 64 bytes
+ * @blocks: number of 64-byte blocks (%rdx, CNT macro)
+ *
+ * This function supports 64-bit CPUs.
+ *
+ * Return: none
+ * Prototype: asmlinkage void sha1_transform_avx2(u32 *digest, const u8 *data, int blocks)
+ */
+SYM_FUNC_START(sha1_transform_avx2)
push %rbx
push %r12
push %r13
@@ -675,9 +679,7 @@ _loop3:
pop %rbx
RET
-
- SYM_FUNC_END(\name)
-.endm
+SYM_FUNC_END(sha1_transform_avx2)
.section .rodata
@@ -706,6 +708,4 @@ BSWAP_SHUFB_CTL:
.long 0x04050607
.long 0x08090a0b
.long 0x0c0d0e0f
-.text
-SHA1_VECTOR_ASM sha1_transform_avx2
@@ -71,9 +71,16 @@
#define MSG3 %xmm6
#define SHUF_MASK %xmm7
+.text
-/*
- * Intel SHA Extensions optimized implementation of a SHA-1 update function
+/**
+ * sha1_transform_ni - Calculate SHA1 hash using the x86 SHA-NI feature set
+ * @digest: address of current 20-byte hash value (%rdi, DIGEST_PTR macro)
+ * @data: address of data (%rsi, DATA_PTR macro);
+ * data size must be a multiple of 64 bytes
+ * @blocks: number of 64-byte blocks (%rdx, NUM_BLKS macro)
+ *
+ * This function supports 64-bit CPUs.
*
* The function takes a pointer to the current hash values, a pointer to the
* input data, and a number of 64 byte blocks to process. Once all blocks have
@@ -85,15 +92,10 @@
* The indented lines in the loop are instructions related to rounds processing.
* The non-indented lines are instructions related to the message schedule.
*
- * void sha1_ni_transform(uint32_t *digest, const void *data,
- uint32_t numBlocks)
- * digest : pointer to digest
- * data: pointer to input data
- * numBlocks: Number of blocks to process
+ * Return: none
+ * Prototype: asmlinkage void sha1_transform_ni(u32 *digest, const u8 *data, int blocks)
*/
-.text
-.align 32
-SYM_FUNC_START(sha1_ni_transform)
+SYM_FUNC_START(sha1_transform_ni)
push %rbp
mov %rsp, %rbp
sub $FRAME_SIZE, %rsp
@@ -450,20 +450,24 @@ BSWAP_SHUFB_CTL:
.long 0x0c0d0e0f
-.section .text
-
W_PRECALC_SSSE3
.macro xmm_mov a, b
movdqu \a,\b
.endm
-/*
- * SSSE3 optimized implementation:
+.text
+
+/**
+ * sha1_transform_ssse3 - Calculate SHA1 hash using the x86 SSSE3 feature set
+ * @digest: address of current 20-byte hash value (%rdi, CTX macro)
+ * @data: address of data (%rsi, BUF macro);
+ * data size must be a multiple of 64 bytes
+ * @blocks: number of 64-byte blocks (%rdx, CNT macro)
*
- * extern "C" void sha1_transform_ssse3(struct sha1_state *state,
- * const u8 *data, int blocks);
+ * This function supports 64-bit CPUs.
*
- * Note that struct sha1_state is assumed to begin with u32 state[5].
+ * Return: none
+ * Prototype: asmlinkage void sha1_transform_ssse3(u32 *digest, const u8 *data, int blocks)
*/
SHA1_VECTOR_ASM sha1_transform_ssse3
@@ -545,9 +549,16 @@ W_PRECALC_AVX
vmovdqu \a,\b
.endm
-
-/* AVX optimized implementation:
- * extern "C" void sha1_transform_avx(struct sha1_state *state,
- * const u8 *data, int blocks);
+/**
+ * sha1_transform_avx - Calculate SHA1 hash using the x86 AVX feature set
+ * @digest: address of current 20-byte hash value (%rdi, CTX macro)
+ * @data: address of data (%rsi, BUF macro);
+ * data size must be a multiple of 64 bytes
+ * @blocks: number of 64-byte blocks (%rdx, CNT macro)
+ *
+ * This function supports 64-bit CPUs.
+ *
+ * Return: none
+ * Prototype: asmlinkage void sha1_transform_avx(u32 *digest, const u8 *data, int blocks)
*/
SHA1_VECTOR_ASM sha1_transform_avx
@@ -94,9 +94,9 @@ SHUF_00BA = %xmm10 # shuffle xBxA -> 00BA
SHUF_DC00 = %xmm12 # shuffle xDxC -> DC00
BYTE_FLIP_MASK = %xmm13
-NUM_BLKS = %rdx # 3rd arg
-INP = %rsi # 2nd arg
CTX = %rdi # 1st arg
+INP = %rsi # 2nd arg
+NUM_BLKS = %rdx # 3rd arg
SRND = %rsi # clobbers INP
c = %ecx
@@ -339,15 +339,21 @@ a = TMP_
ROTATE_ARGS
.endm
-########################################################################
-## void sha256_transform_avx(state sha256_state *state, const u8 *data, int blocks)
-## arg 1 : pointer to state
-## arg 2 : pointer to input data
-## arg 3 : Num blocks
-########################################################################
.text
+
+/**
+ * sha256_transform_avx - Calculate SHA256 hash using the x86 AVX feature set
+ * @digest: address of current 32-byte hash value (%rdi, CTX macro)
+ * @data: address of data (%rsi, INP macro);
+ * data size must be a multiple of 64 bytes
+ * @blocks: number of 64-byte blocks (%rdx, NUM_BLKS macro)
+ *
+ * This function supports 64-bit CPUs.
+ *
+ * Return: none
+ * Prototype: asmlinkage void sha256_transform_avx(u32 *digest, const u8 *data, int blocks)
+ */
SYM_FUNC_START(sha256_transform_avx)
-.align 32
pushq %rbx
pushq %r12
pushq %r13
@@ -89,9 +89,9 @@ BYTE_FLIP_MASK = %ymm13
X_BYTE_FLIP_MASK = %xmm13 # XMM version of BYTE_FLIP_MASK
-NUM_BLKS = %rdx # 3rd arg
-INP = %rsi # 2nd arg
CTX = %rdi # 1st arg
+INP = %rsi # 2nd arg
+NUM_BLKS = %rdx # 3rd arg
c = %ecx
d = %r8d
e = %edx # clobbers NUM_BLKS
@@ -516,15 +516,22 @@ STACK_SIZE = _CTX + _CTX_SIZE
.endm
-########################################################################
-## void sha256_transform_rorx(struct sha256_state *state, const u8 *data, int blocks)
-## arg 1 : pointer to state
-## arg 2 : pointer to input data
-## arg 3 : Num blocks
-########################################################################
.text
+
+/**
+ * sha256_transform_rorx - Calculate SHA512 hash using x86 AVX2 feature set
+ * including the RORX (rotate right logical without affecting flags) instruction
+ * @digest: address of current 32-byte hash value (%rdi, CTX macro)
+ * @data: address of data (%rsi, INP macro);
+ * data size must be a multiple of 64 bytes
+ * @blocks: number of 64-byte blocks (%rdx, NUM_BLKS macro)
+ *
+ * This function supports 64-bit CPUs.
+ *
+ * Return: none
+ * Prototype: asmlinkage void sha256_transform_rorx(u32 *digest, const u8 *data, int blocks)
+ */
SYM_FUNC_START(sha256_transform_rorx)
-.align 32
pushq %rbx
pushq %r12
pushq %r13
@@ -87,9 +87,9 @@ SHUF_00BA = %xmm10 # shuffle xBxA -> 00BA
SHUF_DC00 = %xmm11 # shuffle xDxC -> DC00
BYTE_FLIP_MASK = %xmm12
-NUM_BLKS = %rdx # 3rd arg
-INP = %rsi # 2nd arg
CTX = %rdi # 1st arg
+INP = %rsi # 2nd arg
+NUM_BLKS = %rdx # 3rd arg
SRND = %rsi # clobbers INP
c = %ecx
@@ -346,17 +346,21 @@ a = TMP_
ROTATE_ARGS
.endm
-########################################################################
-## void sha256_transform_ssse3(struct sha256_state *state, const u8 *data,
-## int blocks);
-## arg 1 : pointer to state
-## (struct sha256_state is assumed to begin with u32 state[8])
-## arg 2 : pointer to input data
-## arg 3 : Num blocks
-########################################################################
.text
+
+/**
+ * sha256_transform_ssse3 - Calculate SHA256 hash using the x86 SSSE3 feature set
+ * @digest: address of current 32-byte hash value (%rdi, CTX macro)
+ * @data: address of data (%rsi, INP macro);
+ * data size must be a multiple of 64 bytes
+ * @blocks: number of 64-byte blocks (%rdx, NUM_BLKS macro)
+ *
+ * This function supports 64-bit CPUs.
+ *
+ * Return: none
+ * Prototype: asmlinkage void sha256_transform_ssse3(u32 *digest, const u8 *data, int blocks)
+ */
SYM_FUNC_START(sha256_transform_ssse3)
-.align 32
pushq %rbx
pushq %r12
pushq %r13
@@ -75,8 +75,16 @@
#define ABEF_SAVE %xmm9
#define CDGH_SAVE %xmm10
-/*
- * Intel SHA Extensions optimized implementation of a SHA-256 update function
+.text
+
+/**
+ * sha256_transform_ni - Calculate SHA256 hash using the x86 SHA-NI feature set
+ * @digest: address of current 32-byte hash value (%rdi, DIGEST_PTR macro)
+ * @data: address of data (%rsi, DATA_PTR macro);
+ * data size must be a multiple of 64 bytes
+ * @blocks: number of 64-byte blocks (%rdx, NUM_BLKS macro)
+ *
+ * This function supports 64-bit CPUs.
*
* The function takes a pointer to the current hash values, a pointer to the
* input data, and a number of 64 byte blocks to process. Once all blocks have
@@ -88,17 +96,10 @@
* The indented lines in the loop are instructions related to rounds processing.
* The non-indented lines are instructions related to the message schedule.
*
- * void sha256_ni_transform(uint32_t *digest, const void *data,
- uint32_t numBlocks);
- * digest : pointer to digest
- * data: pointer to input data
- * numBlocks: Number of blocks to process
+ * Return: none
+ * Prototype: asmlinkage void sha256_transform_ni(u32 *digest, const u8 *data, int blocks)
*/
-
-.text
-.align 32
-SYM_FUNC_START(sha256_ni_transform)
-
+SYM_FUNC_START(sha256_transform_ni)
shl $6, NUM_BLKS /* convert to bytes */
jz .Ldone_hash
add DATA_PTR, NUM_BLKS /* pointer to end of data */
@@ -49,15 +49,10 @@
#include <linux/linkage.h>
-.text
-
# Virtual Registers
-# ARG1
-digest = %rdi
-# ARG2
-msg = %rsi
-# ARG3
-msglen = %rdx
+digest = %rdi # ARG1
+msg = %rsi # ARG2
+msglen = %rdx # ARG3
T1 = %rcx
T2 = %r8
a_64 = %r9
@@ -265,14 +260,20 @@ frame_size = frame_WK + WK_SIZE
RotateState
.endm
-########################################################################
-# void sha512_transform_avx(sha512_state *state, const u8 *data, int blocks)
-# Purpose: Updates the SHA512 digest stored at "state" with the message
-# stored in "data".
-# The size of the message pointed to by "data" must be an integer multiple
-# of SHA512 message blocks.
-# "blocks" is the message length in SHA512 blocks
-########################################################################
+.text
+
+/**
+ * sha512_transform_avx - Calculate SHA512 hash using the x86 AVX feature set
+ * @digest: address of current 64-byte hash value (%rdi, digest macro)
+ * @data: address of data (%rsi, msg macro);
+ * data must be a multiple of 128 bytes
+ * @blocks: number of 128-byte blocks (%rdx, msglen macro)
+ *
+ * This function supports 64-bit CPUs.
+ *
+ * Return: none
+ * Prototype: asmlinkage void sha512_transform_avx(u32 *digest, const u8 *data, int blocks)
+ */
SYM_FUNC_START(sha512_transform_avx)
test msglen, msglen
je nowork
@@ -51,8 +51,6 @@
#include <linux/linkage.h>
-.text
-
# Virtual Registers
Y_0 = %ymm4
Y_1 = %ymm5
@@ -68,13 +66,10 @@ XFER = YTMP0
BYTE_FLIP_MASK = %ymm9
-# 1st arg is %rdi, which is saved to the stack and accessed later via %r12
-CTX1 = %rdi
+CTX1 = %rdi # 1st arg, which is saved to the stack and accessed later via %r12
CTX2 = %r12
-# 2nd arg
-INP = %rsi
-# 3rd arg
-NUM_BLKS = %rdx
+INP = %rsi # 2nd arg
+NUM_BLKS = %rdx # 3rd arg
c = %rcx
d = %r8
@@ -557,14 +552,21 @@ frame_size = frame_CTX + CTX_SIZE
.endm
-########################################################################
-# void sha512_transform_rorx(sha512_state *state, const u8 *data, int blocks)
-# Purpose: Updates the SHA512 digest stored at "state" with the message
-# stored in "data".
-# The size of the message pointed to by "data" must be an integer multiple
-# of SHA512 message blocks.
-# "blocks" is the message length in SHA512 blocks
-########################################################################
+.text
+
+/**
+ * sha512_transform_rorx - Calculate SHA512 hash using the x86 AVX2 feature set
+ * including the RORX (rotate right logical without affecting flags) instruction
+ * @digest: address of 64-byte hash value (%rdi, CTX1 macro)
+ * @data: address of data (%rsi, INP macro);
+ * data must be a multiple of 128 bytes
+ * @blocks: number of 128-byte blocks (%rdx, NUM_BLKS macro)
+ *
+ * This function supports 64-bit CPUs.
+ *
+ * Return: none
+ * Prototype: asmlinkage void sha512_transform_rorx(u32 *digest, const u8 *data, int blocks)
+ */
SYM_FUNC_START(sha512_transform_rorx)
# Save GPRs
push %rbx
@@ -49,15 +49,10 @@
#include <linux/linkage.h>
-.text
-
# Virtual Registers
-# ARG1
-digest = %rdi
-# ARG2
-msg = %rsi
-# ARG3
-msglen = %rdx
+digest = %rdi # ARG1
+msg = %rsi # ARG2
+msglen = %rdx # ARG3
T1 = %rcx
T2 = %r8
a_64 = %r9
@@ -264,18 +259,21 @@ frame_size = frame_WK + WK_SIZE
RotateState
.endm
-########################################################################
-## void sha512_transform_ssse3(struct sha512_state *state, const u8 *data,
-## int blocks);
-# (struct sha512_state is assumed to begin with u64 state[8])
-# Purpose: Updates the SHA512 digest stored at "state" with the message
-# stored in "data".
-# The size of the message pointed to by "data" must be an integer multiple
-# of SHA512 message blocks.
-# "blocks" is the message length in SHA512 blocks.
-########################################################################
-SYM_FUNC_START(sha512_transform_ssse3)
+.text
+/**
+ * sha512_transform_ssse3 - Calculate SHA512 hash using x86 SSSE3 feature set
+ * @digest: address of current 64-byte hash value (%rdi, digest macro)
+ * @data: address of data (%rsi, msg macro);
+ * data size must be a multiple of 128 bytes
+ * @blocks: number of 128-byte blocks (%rdx, msglen macro)
+ *
+ * This function supports 64-bit CPUs.
+ *
+ * Return: none
+ * Prototype: asmlinkage void sha512_transform_ssse3(u32 *digest, const u8 *data, int blocks)
+ */
+SYM_FUNC_START(sha512_transform_ssse3)
test msglen, msglen
je nowork
Add kernel-doc comments for assembly language functions exported to C glue code. Remove .align directives that are overridden by SYM_FUNC_START (which includes .align 4). Signed-off-by: Robert Elliott <elliott@hpe.com> --- arch/x86/crypto/sha1_avx2_x86_64_asm.S | 32 +++++++++++------------ arch/x86/crypto/sha1_ni_asm.S | 22 +++++++++------- arch/x86/crypto/sha1_ssse3_asm.S | 33 +++++++++++++++-------- arch/x86/crypto/sha256-avx-asm.S | 24 ++++++++++------- arch/x86/crypto/sha256-avx2-asm.S | 25 +++++++++++------- arch/x86/crypto/sha256-ssse3-asm.S | 26 +++++++++++-------- arch/x86/crypto/sha256_ni_asm.S | 25 +++++++++--------- arch/x86/crypto/sha512-avx-asm.S | 33 +++++++++++------------ arch/x86/crypto/sha512-avx2-asm.S | 34 ++++++++++++------------ arch/x86/crypto/sha512-ssse3-asm.S | 36 ++++++++++++-------------- 10 files changed, 161 insertions(+), 129 deletions(-)