@@ -516,7 +516,10 @@ DEF_HELPER_FLAGS_2(neon_qzip32, TCG_CALL_NO_RWG, void, ptr, ptr)
DEF_HELPER_FLAGS_4(crypto_aese, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(crypto_aesmc, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(crypto_sha1_3reg, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sha1su0, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sha1c, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sha1p, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sha1m, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(crypto_sha1h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(crypto_sha1su1, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
@@ -24,11 +24,11 @@ union CRYPTO_STATE {
};
#ifdef HOST_WORDS_BIGENDIAN
-#define CR_ST_BYTE(state, i) (state.bytes[(15 - (i)) ^ 8])
-#define CR_ST_WORD(state, i) (state.words[(3 - (i)) ^ 2])
+#define CR_ST_BYTE(state, i) ((state).bytes[(15 - (i)) ^ 8])
+#define CR_ST_WORD(state, i) ((state).words[(3 - (i)) ^ 2])
#else
-#define CR_ST_BYTE(state, i) (state.bytes[i])
-#define CR_ST_WORD(state, i) (state.words[i])
+#define CR_ST_BYTE(state, i) ((state).bytes[i])
+#define CR_ST_WORD(state, i) ((state).words[i])
#endif
/*
@@ -258,49 +258,74 @@ static uint32_t maj(uint32_t x, uint32_t y, uint32_t z)
return (x & y) | ((x | y) & z);
}
-void HELPER(crypto_sha1_3reg)(void *vd, void *vn, void *vm, uint32_t op)
+void HELPER(crypto_sha1su0)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ uint64_t *d = vd, *n = vn, *m = vm;
+ uint64_t d0, d1;
+
+ d0 = d[1] ^ d[0] ^ m[0];
+ d1 = n[0] ^ d[1] ^ m[1];
+ d[0] = d0;
+ d[1] = d1;
+
+ clear_tail_16(vd, desc);
+}
+
+static inline void crypto_sha1_3reg(uint64_t *rd, uint64_t *rn,
+ uint64_t *rm, uint32_t desc,
+ uint32_t (*fn)(union CRYPTO_STATE *d))
{
- uint64_t *rd = vd;
- uint64_t *rn = vn;
- uint64_t *rm = vm;
union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
+ int i;
- if (op == 3) { /* sha1su0 */
- d.l[0] ^= d.l[1] ^ m.l[0];
- d.l[1] ^= n.l[0] ^ m.l[1];
- } else {
- int i;
+ for (i = 0; i < 4; i++) {
+ uint32_t t = fn(&d);
- for (i = 0; i < 4; i++) {
- uint32_t t;
+ t += rol32(CR_ST_WORD(d, 0), 5) + CR_ST_WORD(n, 0)
+ + CR_ST_WORD(m, i);
- switch (op) {
- case 0: /* sha1c */
- t = cho(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3));
- break;
- case 1: /* sha1p */
- t = par(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3));
- break;
- case 2: /* sha1m */
- t = maj(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3));
- break;
- default:
- g_assert_not_reached();
- }
- t += rol32(CR_ST_WORD(d, 0), 5) + CR_ST_WORD(n, 0)
- + CR_ST_WORD(m, i);
-
- CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3);
- CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
- CR_ST_WORD(d, 2) = ror32(CR_ST_WORD(d, 1), 2);
- CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
- CR_ST_WORD(d, 0) = t;
- }
+ CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3);
+ CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
+ CR_ST_WORD(d, 2) = ror32(CR_ST_WORD(d, 1), 2);
+ CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
+ CR_ST_WORD(d, 0) = t;
}
rd[0] = d.l[0];
rd[1] = d.l[1];
+
+ clear_tail_16(rd, desc);
+}
+
+static uint32_t do_sha1c(union CRYPTO_STATE *d)
+{
+ return cho(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
+}
+
+void HELPER(crypto_sha1c)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ crypto_sha1_3reg(vd, vn, vm, desc, do_sha1c);
+}
+
+static uint32_t do_sha1p(union CRYPTO_STATE *d)
+{
+ return par(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
+}
+
+void HELPER(crypto_sha1p)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ crypto_sha1_3reg(vd, vn, vm, desc, do_sha1p);
+}
+
+static uint32_t do_sha1m(union CRYPTO_STATE *d)
+{
+ return maj(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
+}
+
+void HELPER(crypto_sha1m)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ crypto_sha1_3reg(vd, vn, vm, desc, do_sha1m);
}
void HELPER(crypto_sha1h)(void *vd, void *vm, uint32_t desc)
@@ -14527,10 +14527,19 @@ static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
switch (opcode) {
case 0: /* SHA1C */
+ genfn = gen_helper_crypto_sha1c;
+ feature = dc_isar_feature(aa64_sha1, s);
+ break;
case 1: /* SHA1P */
+ genfn = gen_helper_crypto_sha1p;
+ feature = dc_isar_feature(aa64_sha1, s);
+ break;
case 2: /* SHA1M */
+ genfn = gen_helper_crypto_sha1m;
+ feature = dc_isar_feature(aa64_sha1, s);
+ break;
case 3: /* SHA1SU0 */
- genfn = NULL;
+ genfn = gen_helper_crypto_sha1su0;
feature = dc_isar_feature(aa64_sha1, s);
break;
case 4: /* SHA256H */
@@ -14558,23 +14567,7 @@ static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
if (!fp_access_check(s)) {
return;
}
-
- if (genfn) {
- gen_gvec_op3_ool(s, true, rd, rn, rm, 0, genfn);
- } else {
- TCGv_i32 tcg_opcode = tcg_const_i32(opcode);
- TCGv_ptr tcg_rd_ptr = vec_full_reg_ptr(s, rd);
- TCGv_ptr tcg_rn_ptr = vec_full_reg_ptr(s, rn);
- TCGv_ptr tcg_rm_ptr = vec_full_reg_ptr(s, rm);
-
- gen_helper_crypto_sha1_3reg(tcg_rd_ptr, tcg_rn_ptr,
- tcg_rm_ptr, tcg_opcode);
-
- tcg_temp_free_i32(tcg_opcode);
- tcg_temp_free_ptr(tcg_rd_ptr);
- tcg_temp_free_ptr(tcg_rn_ptr);
- tcg_temp_free_ptr(tcg_rm_ptr);
- }
+ gen_gvec_op3_ool(s, true, rd, rn, rm, 0, genfn);
}
/* Crypto two-reg SHA
@@ -5305,7 +5305,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
int vec_size;
uint32_t imm;
TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5;
- TCGv_ptr ptr1, ptr2, ptr3;
+ TCGv_ptr ptr1;
TCGv_i64 tmp64;
/* FIXME: this access check should not take precedence over UNDEF
@@ -5353,38 +5353,46 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
*/
if (!q) {
return 1;
- }
- if (!u) { /* SHA-1 */
- if (!dc_isar_feature(aa32_sha1, s)) {
- return 1;
- }
- ptr1 = vfp_reg_ptr(true, rd);
- ptr2 = vfp_reg_ptr(true, rn);
- ptr3 = vfp_reg_ptr(true, rm);
- tmp4 = tcg_const_i32(size);
- gen_helper_crypto_sha1_3reg(ptr1, ptr2, ptr3, tmp4);
- tcg_temp_free_i32(tmp4);
- tcg_temp_free_ptr(ptr1);
- tcg_temp_free_ptr(ptr2);
- tcg_temp_free_ptr(ptr3);
- } else { /* SHA-256 */
+ } else {
gen_helper_gvec_3 *fn;
- if (!dc_isar_feature(aa32_sha2, s)) {
- return 1;
- }
- switch (size) {
- case 0:
- fn = gen_helper_crypto_sha256h;
- break;
- case 1:
- fn = gen_helper_crypto_sha256h2;
- break;
- case 2:
- fn = gen_helper_crypto_sha256su1;
- break;
- default:
- return 1;
+ if (!u) { /* SHA-1 */
+ if (!dc_isar_feature(aa32_sha1, s)) {
+ return 1;
+ }
+ switch (size) {
+ case 0:
+ fn = gen_helper_crypto_sha1c;
+ break;
+ case 1:
+ fn = gen_helper_crypto_sha1p;
+ break;
+ case 2:
+ fn = gen_helper_crypto_sha1m;
+ break;
+ case 3:
+ fn = gen_helper_crypto_sha1su0;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ } else { /* SHA-256 */
+ if (!dc_isar_feature(aa32_sha2, s)) {
+ return 1;
+ }
+ switch (size) {
+ case 0:
+ fn = gen_helper_crypto_sha256h;
+ break;
+ case 1:
+ fn = gen_helper_crypto_sha256h2;
+ break;
+ case 2:
+ fn = gen_helper_crypto_sha256su1;
+ break;
+ default:
+ return 1;
+ }
}
tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, 16, 16, 0, fn);
}
Rather than passing an opcode to a helper, fully decode the operation at translate time. Use clear_tail_16 to zap the balance of the SVE register with the AdvSIMD write. Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- target/arm/helper.h | 5 +- target/arm/crypto_helper.c | 99 ++++++++++++++++++++++++-------------- target/arm/translate-a64.c | 29 +++++------ target/arm/translate.c | 70 +++++++++++++++------------ 4 files changed, 116 insertions(+), 87 deletions(-) -- 2.20.1