@@ -15,6 +15,7 @@
#include "exec/helper-proto.h"
#include "tcg/tcg-gvec-desc.h"
#include "crypto/aes.h"
+#include "crypto/aes-round.h"
#include "crypto/sm4.h"
#include "vec_internal.h"
@@ -45,6 +46,8 @@ static void clear_tail_16(void *vd, uint32_t desc)
clear_tail(vd, opr_sz, max_sz);
}
+static const AESState aes_zero = { };
+
static void do_crypto_aese(uint64_t *rd, uint64_t *rn, uint64_t *rm,
const uint8_t *sbox, const uint8_t *shift)
{
@@ -70,7 +73,26 @@ void HELPER(crypto_aese)(void *vd, void *vn, void *vm, uint32_t desc)
intptr_t i, opr_sz = simd_oprsz(desc);
for (i = 0; i < opr_sz; i += 16) {
- do_crypto_aese(vd + i, vn + i, vm + i, AES_sbox, AES_shifts);
+ AESState *ad = (AESState *)(vd + i);
+ AESState *st = (AESState *)(vn + i);
+ AESState *rk = (AESState *)(vm + i);
+ AESState t;
+
+ /*
+ * Our uint64_t are in the wrong order for big-endian.
+ * The Arm AddRoundKey comes first, while the API AddRoundKey
+ * comes last: perform the xor here, and provide zero to API.
+ */
+ if (HOST_BIG_ENDIAN) {
+ t.d[0] = st->d[1] ^ rk->d[1];
+ t.d[1] = st->d[0] ^ rk->d[0];
+ aesenc_SB_SR_AK(&t, &t, &aes_zero, false);
+ ad->d[0] = t.d[1];
+ ad->d[1] = t.d[0];
+ } else {
+ t.v = st->v ^ rk->v;
+ aesenc_SB_SR_AK(ad, &t, &aes_zero, false);
+ }
}
clear_tail(vd, opr_sz, simd_maxsz(desc));
}