Message ID | 20220308015358.188499-8-richard.henderson@linaro.org |
---|---|
State | New |
Headers | show |
Series | s390x/tcg: Implement Vector-Enhancements Facility 2 | expand |
On 08.03.22 02:53, Richard Henderson wrote: > From: David Miller <dmiller423@gmail.com> > > Signed-off-by: David Miller <dmiller423@gmail.com> > Message-Id: <20220307020327.3003-6-dmiller423@gmail.com> > [rth: Split out elements (plural) from element (scalar) > Use tcg little-endian memory ops, plus hswap and wswap.] > Signed-off-by: Richard Henderson <richard.henderson@linar.org> > --- > target/s390x/tcg/translate_vx.c.inc | 101 ++++++++++++++++++++++++++++ > target/s390x/tcg/insn-data.def | 4 ++ > 2 files changed, 105 insertions(+) > > diff --git a/target/s390x/tcg/translate_vx.c.inc b/target/s390x/tcg/translate_vx.c.inc > index ac807122a3..9a82401d71 100644 > --- a/target/s390x/tcg/translate_vx.c.inc > +++ b/target/s390x/tcg/translate_vx.c.inc > @@ -457,6 +457,56 @@ static DisasJumpType op_vlrep(DisasContext *s, DisasOps *o) > return DISAS_NEXT; > } > > +static DisasJumpType op_vlbr(DisasContext *s, DisasOps *o) > +{ > + const uint8_t es = get_field(s, m3); > + TCGv_i64 t0, t1, tt; > + > + if (es < ES_16 || es > ES_128) { > + gen_program_exception(s, PGM_SPECIFICATION); > + return DISAS_NORETURN; > + } > + > + t0 = tcg_temp_new_i64(); > + t1 = tcg_temp_new_i64(); > + > + /* Begin with byte reversed doublewords... */ > + tcg_gen_qemu_ld_i64(t0, o->addr1, get_mem_index(s), MO_LEUQ); > + gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8); > + tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_LEUQ); > + Would it make sense to just special-case ES_128, by loading them into the proper t0/t1 right away? if (es == ES_128) { tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_LEUQ); gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8); tcg_gen_qemu_ld_i64(t0, o->addr1, get_mem_index(s), MO_LEUQ); goto write; } /* Begin with byte reversed doublewords... */ tcg_gen_qemu_ld_i64(t0, o->addr1, get_mem_index(s), MO_LEUQ); gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8); tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_LEUQ); /* * For 16 and 32-bit elements, the doubleword bswap also reversed * the order of the elements. Perform a larger order swap to put * them back into place. */ switch (es) { ... } write: write_vec_element_i64(t0, get_field(s, v1), 0, ES_64); write_vec_element_i64(t1, get_field(s, v1), 1, ES_64); > + /* > + * For 16 and 32-bit elements, the doubleword bswap also reversed > + * the order of the elements. Perform a larger order swap to put > + * them back into place. For the 128-bit "element", finish the > + * bswap by swapping the doublewords. > + */ > + switch (es) { > + case ES_16: > + tcg_gen_hswap_i64(t0, t0); > + tcg_gen_hswap_i64(t1, t1); > + break; > + case ES_32: > + tcg_gen_wswap_i64(t0, t0); > + tcg_gen_wswap_i64(t1, t1); > + break; > + case ES_64: > + break; > + case ES_128: > + tt = t0, t0 = t1, t1 = tt; > + break; > + default: > + g_assert_not_reached(); > + } > + > + write_vec_element_i64(t0, get_field(s, v1), 0, ES_64); > + write_vec_element_i64(t1, get_field(s, v1), 1, ES_64); > + > + tcg_temp_free(t0); > + tcg_temp_free(t1); > + return DISAS_NEXT; > +} > + > static DisasJumpType op_vle(DisasContext *s, DisasOps *o) > { > const uint8_t es = s->insn->data; > @@ -998,6 +1048,57 @@ static DisasJumpType op_vst(DisasContext *s, DisasOps *o) > return DISAS_NEXT; > } > > +static DisasJumpType op_vstbr(DisasContext *s, DisasOps *o) > +{ > + const uint8_t es = get_field(s, m3); > + TCGv_i64 t0, t1, tt; > + > + if (es < ES_16 || es > ES_128) { > + gen_program_exception(s, PGM_SPECIFICATION); > + return DISAS_NORETURN; > + } > + > + /* Probe write access before actually modifying memory */ > + gen_helper_probe_write_access(cpu_env, o->addr1, tcg_constant_i64(16)); > + > + t0 = tcg_temp_new_i64(); > + t1 = tcg_temp_new_i64(); > + read_vec_element_i64(t0, get_field(s, v1), 0, ES_64); > + read_vec_element_i64(t1, get_field(s, v1), 1, ES_64); Dito, eventually just special case on MO_128 directly. > + > + /* > + * For 16 and 32-bit elements, the doubleword bswap below will > + * reverse the order of the elements. Perform a larger order > + * swap to put them back into place. For the 128-bit "element", > + * finish the bswap by swapping the doublewords. > + */ > + switch (es) { > + case MO_16: > + tcg_gen_hswap_i64(t0, t0); > + tcg_gen_hswap_i64(t1, t1); > + break; > + case MO_32: > + tcg_gen_wswap_i64(t0, t0); > + tcg_gen_wswap_i64(t1, t1); > + break; > + case MO_64: > + break; > + case MO_128: > + tt = t0, t0 = t1, t1 = tt; > + break; > + default: > + g_assert_not_reached(); > + } > + > + tcg_gen_qemu_st_i64(t0, o->addr1, get_mem_index(s), MO_LEUQ); > + gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8); > + tcg_gen_qemu_st_i64(t1, o->addr1, get_mem_index(s), MO_LEUQ); > + > + tcg_temp_free(t0); > + tcg_temp_free(t1); > + return DISAS_NEXT; > +} > + > static DisasJumpType op_vste(DisasContext *s, DisasOps *o) > { > const uint8_t es = s->insn->data; > diff --git a/target/s390x/tcg/insn-data.def b/target/s390x/tcg/insn-data.def > index b524541a7d..ee6e1dc9e5 100644 > --- a/target/s390x/tcg/insn-data.def > +++ b/target/s390x/tcg/insn-data.def > @@ -1027,6 +1027,8 @@ > F(0xe756, VLR, VRR_a, V, 0, 0, 0, 0, vlr, 0, IF_VEC) > /* VECTOR LOAD AND REPLICATE */ > F(0xe705, VLREP, VRX, V, la2, 0, 0, 0, vlrep, 0, IF_VEC) > +/* VECTOR LOAD BYTE REVERSED ELEMENTS */ > + F(0xe606, VLBR, VRX, VE2, la2, 0, 0, 0, vlbr, 0, IF_VEC) > /* VECTOR LOAD ELEMENT */ > E(0xe700, VLEB, VRX, V, la2, 0, 0, 0, vle, 0, ES_8, IF_VEC) > E(0xe701, VLEH, VRX, V, la2, 0, 0, 0, vle, 0, ES_16, IF_VEC) > @@ -1079,6 +1081,8 @@ > F(0xe75f, VSEG, VRR_a, V, 0, 0, 0, 0, vseg, 0, IF_VEC) > /* VECTOR STORE */ > F(0xe70e, VST, VRX, V, la2, 0, 0, 0, vst, 0, IF_VEC) > +/* VECTOR STORE BYTE REVERSED ELEMENTS */ > + F(0xe60e, VSTBR, VRX, VE2, la2, 0, 0, 0, vstbr, 0, IF_VEC) > /* VECTOR STORE ELEMENT */ > E(0xe708, VSTEB, VRX, V, la2, 0, 0, 0, vste, 0, ES_8, IF_VEC) > E(0xe709, VSTEH, VRX, V, la2, 0, 0, 0, vste, 0, ES_16, IF_VEC)
diff --git a/target/s390x/tcg/translate_vx.c.inc b/target/s390x/tcg/translate_vx.c.inc index ac807122a3..9a82401d71 100644 --- a/target/s390x/tcg/translate_vx.c.inc +++ b/target/s390x/tcg/translate_vx.c.inc @@ -457,6 +457,56 @@ static DisasJumpType op_vlrep(DisasContext *s, DisasOps *o) return DISAS_NEXT; } +static DisasJumpType op_vlbr(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s, m3); + TCGv_i64 t0, t1, tt; + + if (es < ES_16 || es > ES_128) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + t0 = tcg_temp_new_i64(); + t1 = tcg_temp_new_i64(); + + /* Begin with byte reversed doublewords... */ + tcg_gen_qemu_ld_i64(t0, o->addr1, get_mem_index(s), MO_LEUQ); + gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8); + tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_LEUQ); + + /* + * For 16 and 32-bit elements, the doubleword bswap also reversed + * the order of the elements. Perform a larger order swap to put + * them back into place. For the 128-bit "element", finish the + * bswap by swapping the doublewords. + */ + switch (es) { + case ES_16: + tcg_gen_hswap_i64(t0, t0); + tcg_gen_hswap_i64(t1, t1); + break; + case ES_32: + tcg_gen_wswap_i64(t0, t0); + tcg_gen_wswap_i64(t1, t1); + break; + case ES_64: + break; + case ES_128: + tt = t0, t0 = t1, t1 = tt; + break; + default: + g_assert_not_reached(); + } + + write_vec_element_i64(t0, get_field(s, v1), 0, ES_64); + write_vec_element_i64(t1, get_field(s, v1), 1, ES_64); + + tcg_temp_free(t0); + tcg_temp_free(t1); + return DISAS_NEXT; +} + static DisasJumpType op_vle(DisasContext *s, DisasOps *o) { const uint8_t es = s->insn->data; @@ -998,6 +1048,57 @@ static DisasJumpType op_vst(DisasContext *s, DisasOps *o) return DISAS_NEXT; } +static DisasJumpType op_vstbr(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s, m3); + TCGv_i64 t0, t1, tt; + + if (es < ES_16 || es > ES_128) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + /* Probe write access before actually modifying memory */ + gen_helper_probe_write_access(cpu_env, o->addr1, tcg_constant_i64(16)); + + t0 = tcg_temp_new_i64(); + t1 = tcg_temp_new_i64(); + read_vec_element_i64(t0, get_field(s, v1), 0, ES_64); + read_vec_element_i64(t1, get_field(s, v1), 1, ES_64); + + /* + * For 16 and 32-bit elements, the doubleword bswap below will + * reverse the order of the elements. Perform a larger order + * swap to put them back into place. For the 128-bit "element", + * finish the bswap by swapping the doublewords. + */ + switch (es) { + case MO_16: + tcg_gen_hswap_i64(t0, t0); + tcg_gen_hswap_i64(t1, t1); + break; + case MO_32: + tcg_gen_wswap_i64(t0, t0); + tcg_gen_wswap_i64(t1, t1); + break; + case MO_64: + break; + case MO_128: + tt = t0, t0 = t1, t1 = tt; + break; + default: + g_assert_not_reached(); + } + + tcg_gen_qemu_st_i64(t0, o->addr1, get_mem_index(s), MO_LEUQ); + gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8); + tcg_gen_qemu_st_i64(t1, o->addr1, get_mem_index(s), MO_LEUQ); + + tcg_temp_free(t0); + tcg_temp_free(t1); + return DISAS_NEXT; +} + static DisasJumpType op_vste(DisasContext *s, DisasOps *o) { const uint8_t es = s->insn->data; diff --git a/target/s390x/tcg/insn-data.def b/target/s390x/tcg/insn-data.def index b524541a7d..ee6e1dc9e5 100644 --- a/target/s390x/tcg/insn-data.def +++ b/target/s390x/tcg/insn-data.def @@ -1027,6 +1027,8 @@ F(0xe756, VLR, VRR_a, V, 0, 0, 0, 0, vlr, 0, IF_VEC) /* VECTOR LOAD AND REPLICATE */ F(0xe705, VLREP, VRX, V, la2, 0, 0, 0, vlrep, 0, IF_VEC) +/* VECTOR LOAD BYTE REVERSED ELEMENTS */ + F(0xe606, VLBR, VRX, VE2, la2, 0, 0, 0, vlbr, 0, IF_VEC) /* VECTOR LOAD ELEMENT */ E(0xe700, VLEB, VRX, V, la2, 0, 0, 0, vle, 0, ES_8, IF_VEC) E(0xe701, VLEH, VRX, V, la2, 0, 0, 0, vle, 0, ES_16, IF_VEC) @@ -1079,6 +1081,8 @@ F(0xe75f, VSEG, VRR_a, V, 0, 0, 0, 0, vseg, 0, IF_VEC) /* VECTOR STORE */ F(0xe70e, VST, VRX, V, la2, 0, 0, 0, vst, 0, IF_VEC) +/* VECTOR STORE BYTE REVERSED ELEMENTS */ + F(0xe60e, VSTBR, VRX, VE2, la2, 0, 0, 0, vstbr, 0, IF_VEC) /* VECTOR STORE ELEMENT */ E(0xe708, VSTEB, VRX, V, la2, 0, 0, 0, vste, 0, ES_8, IF_VEC) E(0xe709, VSTEH, VRX, V, la2, 0, 0, 0, vste, 0, ES_16, IF_VEC)