Message ID | 20180122172643.29742-3-ard.biesheuvel@linaro.org |
---|---|
State | Superseded |
Headers | show |
Series | target-arm: add SHA-3, SM3 and SHA512 instruction support | expand |
On 22 January 2018 at 17:26, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote: > This implements emulation of the new SHA-3 instructions that have > been added as an optional extensions to the ARMv8 Crypto Extensions > in ARM v8.2. > > Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> > --- > target/arm/cpu.h | 1 + > target/arm/translate-a64.c | 148 +++++++++++++++++++- > 2 files changed, 145 insertions(+), 4 deletions(-) > > diff --git a/target/arm/cpu.h b/target/arm/cpu.h > index 32a18510e70b..d0b19e0cbc88 100644 > --- a/target/arm/cpu.h > +++ b/target/arm/cpu.h > @@ -1373,6 +1373,7 @@ enum arm_features { > ARM_FEATURE_JAZELLE, /* has (trivial) Jazelle implementation */ > ARM_FEATURE_SVE, /* has Scalable Vector Extension */ > ARM_FEATURE_V8_SHA512, /* implements SHA512 part of v8 Crypto Extensions */ > + ARM_FEATURE_V8_SHA3, /* implements SHA3 part of v8 Crypto Extensions */ > }; > > static inline int arm_feature(CPUARMState *env, int feature) > diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c > index 888f5a39a283..10f2e518f303 100644 > --- a/target/arm/translate-a64.c > +++ b/target/arm/translate-a64.c > @@ -11162,9 +11162,10 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) > feature = ARM_FEATURE_V8_SHA512; > genfn = gen_helper_crypto_sha512su1; > break; > - default: > - unallocated_encoding(s); > - return; > + case 3: /* RAX1 */ > + feature = ARM_FEATURE_V8_SHA3; > + genfn = NULL; > + break; > } > } else { > unallocated_encoding(s); > @@ -11193,7 +11194,28 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) > tcg_temp_free_ptr(tcg_rn_ptr); > tcg_temp_free_ptr(tcg_rm_ptr); > } else { > - g_assert_not_reached(); > + TCGv_i64 tcg_op1, tcg_op2, tcg_res[2]; > + int pass; > + > + tcg_op1 = tcg_temp_new_i64(); > + tcg_op2 = tcg_temp_new_i64(); > + tcg_res[0] = tcg_temp_new_i64(); > + tcg_res[1] = tcg_temp_new_i64(); > + > + for (pass = 0; pass < 2; pass++) { > + read_vec_element(s, tcg_op1, rn, pass, MO_64); > + read_vec_element(s, tcg_op2, rm, pass, MO_64); > + > + tcg_gen_rotli_i64(tcg_res[pass], tcg_op2, 1); > + tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1); > + } > + write_vec_element(s, tcg_res[0], rd, 0, MO_64); > + write_vec_element(s, tcg_res[1], rd, 1, MO_64); > + > + tcg_temp_free(tcg_op1); > + tcg_temp_free(tcg_op2); > + tcg_temp_free(tcg_res[0]); > + tcg_temp_free(tcg_res[1]); > } > } > > @@ -11240,6 +11262,122 @@ static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn) > tcg_temp_free_ptr(tcg_rn_ptr); > } > > +/* Crypto four-register > + * 31 23 22 21 20 16 15 14 10 9 5 4 0 > + * +-------------------+-----+------+---+------+------+------+ > + * | 1 1 0 0 1 1 1 0 0 | Op0 | Rm | 0 | Ra | Rn | Rd | > + * +-------------------+-----+------+---+------+------+------+ > + */ > +static void disas_crypto_four_reg(DisasContext *s, uint32_t insn) > +{ > + int op0 = extract32(insn, 21, 2); > + int rm = extract32(insn, 16, 5); > + int ra = extract32(insn, 10, 5); > + int rn = extract32(insn, 5, 5); > + int rd = extract32(insn, 0, 5); > + int feature; > + > + switch (op0) { > + case 0: /* EOR3 */ > + case 1: /* BCAX */ > + feature = ARM_FEATURE_V8_SHA3; > + break; > + default: > + unallocated_encoding(s); > + return; > + } > + > + if (!arm_dc_feature(s, feature)) { > + unallocated_encoding(s); > + return; > + } > + > + if (!fp_access_check(s)) { > + return; > + } > + > + if (op0 < 2) { > + TCG_i64 tcg_op1, tcg_op2, tcg_op3, tcg_res[2]; Apologies, there's a typo here: TCGv_i64 not TCG_i64 Let me know if I need to resend. > + int pass; > + > + tcg_op1 = tcg_temp_new_i64(); > + tcg_op2 = tcg_temp_new_i64(); > + tcg_op3 = tcg_temp_new_i64(); > + tcg_res[0] = tcg_temp_new_i64(); > + tcg_res[1] = tcg_temp_new_i64(); > + > + for (pass = 0; pass < 2; pass++) { > + read_vec_element(s, tcg_op1, rn, pass, MO_64); > + read_vec_element(s, tcg_op2, rm, pass, MO_64); > + read_vec_element(s, tcg_op3, ra, pass, MO_64); > + > + if (op0 == 0) { > + /* EOR3 */ > + tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op3); > + } else { > + /* BCAX */ > + tcg_gen_andc_i64(tcg_res[pass], tcg_op2, tcg_op3); > + } > + tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1); > + } > + write_vec_element(s, tcg_res[0], rd, 0, MO_64); > + write_vec_element(s, tcg_res[1], rd, 1, MO_64); > + > + tcg_temp_free(tcg_op1); > + tcg_temp_free(tcg_op2); > + tcg_temp_free(tcg_op3); > + tcg_temp_free(tcg_res[0]); > + tcg_temp_free(tcg_res[1]); > + } else { > + g_assert_not_reached(); > + } > +} > + > +/* Crypto XAR > + * 31 21 20 16 15 10 9 5 4 0 > + * +-----------------------+------+--------+------+------+ > + * | 1 1 0 0 1 1 1 0 1 0 0 | Rm | imm6 | Rn | Rd | > + * +-----------------------+------+--------+------+------+ > + */ > +static void disas_crypto_xar(DisasContext *s, uint32_t insn) > +{ > + int rm = extract32(insn, 16, 5); > + int imm6 = extract32(insn, 10, 6); > + int rn = extract32(insn, 5, 5); > + int rd = extract32(insn, 0, 5); > + TCGv_i64 tcg_op1, tcg_op2, tcg_res[2]; > + int pass; > + > + if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA3)) { > + unallocated_encoding(s); > + return; > + } > + > + if (!fp_access_check(s)) { > + return; > + } > + > + tcg_op1 = tcg_temp_new_i64(); > + tcg_op2 = tcg_temp_new_i64(); > + tcg_res[0] = tcg_temp_new_i64(); > + tcg_res[1] = tcg_temp_new_i64(); > + > + for (pass = 0; pass < 2; pass++) { > + read_vec_element(s, tcg_op1, rn, pass, MO_64); > + read_vec_element(s, tcg_op2, rm, pass, MO_64); > + > + tcg_gen_xor_i64(tcg_res[pass], tcg_op1, tcg_op2); > + tcg_gen_rotri_i64(tcg_res[pass], tcg_res[pass], imm6); > + } > + write_vec_element(s, tcg_res[0], rd, 0, MO_64); > + write_vec_element(s, tcg_res[1], rd, 1, MO_64); > + > + tcg_temp_free(tcg_op1); > + tcg_temp_free(tcg_op2); > + tcg_temp_free(tcg_res[0]); > + tcg_temp_free(tcg_res[1]); > +} > + > /* C3.6 Data processing - SIMD, inc Crypto > * > * As the decode gets a little complex we are using a table based > @@ -11271,6 +11409,8 @@ static const AArch64DecodeTable data_proc_simd[] = { > { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha }, > { 0xce608000, 0xffe0b000, disas_crypto_three_reg_sha512 }, > { 0xcec08000, 0xfffff000, disas_crypto_two_reg_sha512 }, > + { 0xce000000, 0xff808000, disas_crypto_four_reg }, > + { 0xce800000, 0xffe00000, disas_crypto_xar }, > { 0x00000000, 0x00000000, NULL } > }; > > -- > 2.11.0 >
On 23 January 2018 at 20:09, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote: > On 22 January 2018 at 17:26, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote: >> This implements emulation of the new SHA-3 instructions that have >> been added as an optional extensions to the ARMv8 Crypto Extensions >> in ARM v8.2. >> + if (op0 < 2) { >> + TCG_i64 tcg_op1, tcg_op2, tcg_op3, tcg_res[2]; > > Apologies, there's a typo here: TCGv_i64 not TCG_i64 > > Let me know if I need to resend. No, if that's the only issue I'll just fix it up locally. >> + tcg_temp_free(tcg_op1); >> + tcg_temp_free(tcg_op2); >> + tcg_temp_free(tcg_op3); >> + tcg_temp_free(tcg_res[0]); >> + tcg_temp_free(tcg_res[1]); These I think should all be tcg_temp_free_i64(). The unsuffixed version is for "TCGv", which is a value of size target_ulong. As it happens that's always i64 for code in translate-a64.c, so it's only a stylistic difference, but the rest of the code explicitly uses i64. I'll fix this locally too. thanks -- PMM
diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 32a18510e70b..d0b19e0cbc88 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -1373,6 +1373,7 @@ enum arm_features { ARM_FEATURE_JAZELLE, /* has (trivial) Jazelle implementation */ ARM_FEATURE_SVE, /* has Scalable Vector Extension */ ARM_FEATURE_V8_SHA512, /* implements SHA512 part of v8 Crypto Extensions */ + ARM_FEATURE_V8_SHA3, /* implements SHA3 part of v8 Crypto Extensions */ }; static inline int arm_feature(CPUARMState *env, int feature) diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 888f5a39a283..10f2e518f303 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -11162,9 +11162,10 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) feature = ARM_FEATURE_V8_SHA512; genfn = gen_helper_crypto_sha512su1; break; - default: - unallocated_encoding(s); - return; + case 3: /* RAX1 */ + feature = ARM_FEATURE_V8_SHA3; + genfn = NULL; + break; } } else { unallocated_encoding(s); @@ -11193,7 +11194,28 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) tcg_temp_free_ptr(tcg_rn_ptr); tcg_temp_free_ptr(tcg_rm_ptr); } else { - g_assert_not_reached(); + TCGv_i64 tcg_op1, tcg_op2, tcg_res[2]; + int pass; + + tcg_op1 = tcg_temp_new_i64(); + tcg_op2 = tcg_temp_new_i64(); + tcg_res[0] = tcg_temp_new_i64(); + tcg_res[1] = tcg_temp_new_i64(); + + for (pass = 0; pass < 2; pass++) { + read_vec_element(s, tcg_op1, rn, pass, MO_64); + read_vec_element(s, tcg_op2, rm, pass, MO_64); + + tcg_gen_rotli_i64(tcg_res[pass], tcg_op2, 1); + tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1); + } + write_vec_element(s, tcg_res[0], rd, 0, MO_64); + write_vec_element(s, tcg_res[1], rd, 1, MO_64); + + tcg_temp_free(tcg_op1); + tcg_temp_free(tcg_op2); + tcg_temp_free(tcg_res[0]); + tcg_temp_free(tcg_res[1]); } } @@ -11240,6 +11262,122 @@ static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn) tcg_temp_free_ptr(tcg_rn_ptr); } +/* Crypto four-register + * 31 23 22 21 20 16 15 14 10 9 5 4 0 + * +-------------------+-----+------+---+------+------+------+ + * | 1 1 0 0 1 1 1 0 0 | Op0 | Rm | 0 | Ra | Rn | Rd | + * +-------------------+-----+------+---+------+------+------+ + */ +static void disas_crypto_four_reg(DisasContext *s, uint32_t insn) +{ + int op0 = extract32(insn, 21, 2); + int rm = extract32(insn, 16, 5); + int ra = extract32(insn, 10, 5); + int rn = extract32(insn, 5, 5); + int rd = extract32(insn, 0, 5); + int feature; + + switch (op0) { + case 0: /* EOR3 */ + case 1: /* BCAX */ + feature = ARM_FEATURE_V8_SHA3; + break; + default: + unallocated_encoding(s); + return; + } + + if (!arm_dc_feature(s, feature)) { + unallocated_encoding(s); + return; + } + + if (!fp_access_check(s)) { + return; + } + + if (op0 < 2) { + TCG_i64 tcg_op1, tcg_op2, tcg_op3, tcg_res[2]; + int pass; + + tcg_op1 = tcg_temp_new_i64(); + tcg_op2 = tcg_temp_new_i64(); + tcg_op3 = tcg_temp_new_i64(); + tcg_res[0] = tcg_temp_new_i64(); + tcg_res[1] = tcg_temp_new_i64(); + + for (pass = 0; pass < 2; pass++) { + read_vec_element(s, tcg_op1, rn, pass, MO_64); + read_vec_element(s, tcg_op2, rm, pass, MO_64); + read_vec_element(s, tcg_op3, ra, pass, MO_64); + + if (op0 == 0) { + /* EOR3 */ + tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op3); + } else { + /* BCAX */ + tcg_gen_andc_i64(tcg_res[pass], tcg_op2, tcg_op3); + } + tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1); + } + write_vec_element(s, tcg_res[0], rd, 0, MO_64); + write_vec_element(s, tcg_res[1], rd, 1, MO_64); + + tcg_temp_free(tcg_op1); + tcg_temp_free(tcg_op2); + tcg_temp_free(tcg_op3); + tcg_temp_free(tcg_res[0]); + tcg_temp_free(tcg_res[1]); + } else { + g_assert_not_reached(); + } +} + +/* Crypto XAR + * 31 21 20 16 15 10 9 5 4 0 + * +-----------------------+------+--------+------+------+ + * | 1 1 0 0 1 1 1 0 1 0 0 | Rm | imm6 | Rn | Rd | + * +-----------------------+------+--------+------+------+ + */ +static void disas_crypto_xar(DisasContext *s, uint32_t insn) +{ + int rm = extract32(insn, 16, 5); + int imm6 = extract32(insn, 10, 6); + int rn = extract32(insn, 5, 5); + int rd = extract32(insn, 0, 5); + TCGv_i64 tcg_op1, tcg_op2, tcg_res[2]; + int pass; + + if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA3)) { + unallocated_encoding(s); + return; + } + + if (!fp_access_check(s)) { + return; + } + + tcg_op1 = tcg_temp_new_i64(); + tcg_op2 = tcg_temp_new_i64(); + tcg_res[0] = tcg_temp_new_i64(); + tcg_res[1] = tcg_temp_new_i64(); + + for (pass = 0; pass < 2; pass++) { + read_vec_element(s, tcg_op1, rn, pass, MO_64); + read_vec_element(s, tcg_op2, rm, pass, MO_64); + + tcg_gen_xor_i64(tcg_res[pass], tcg_op1, tcg_op2); + tcg_gen_rotri_i64(tcg_res[pass], tcg_res[pass], imm6); + } + write_vec_element(s, tcg_res[0], rd, 0, MO_64); + write_vec_element(s, tcg_res[1], rd, 1, MO_64); + + tcg_temp_free(tcg_op1); + tcg_temp_free(tcg_op2); + tcg_temp_free(tcg_res[0]); + tcg_temp_free(tcg_res[1]); +} + /* C3.6 Data processing - SIMD, inc Crypto * * As the decode gets a little complex we are using a table based @@ -11271,6 +11409,8 @@ static const AArch64DecodeTable data_proc_simd[] = { { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha }, { 0xce608000, 0xffe0b000, disas_crypto_three_reg_sha512 }, { 0xcec08000, 0xfffff000, disas_crypto_two_reg_sha512 }, + { 0xce000000, 0xff808000, disas_crypto_four_reg }, + { 0xce800000, 0xffe00000, disas_crypto_xar }, { 0x00000000, 0x00000000, NULL } };
This implements emulation of the new SHA-3 instructions that have been added as an optional extensions to the ARMv8 Crypto Extensions in ARM v8.2. Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> --- target/arm/cpu.h | 1 + target/arm/translate-a64.c | 148 +++++++++++++++++++- 2 files changed, 145 insertions(+), 4 deletions(-) -- 2.11.0