Message ID | 20220308015358.188499-4-richard.henderson@linaro.org |
---|---|
State | New |
Headers | show |
Series | s390x/tcg: Implement Vector-Enhancements Facility 2 | expand |
On 08.03.22 02:53, Richard Henderson wrote: > From: David Miller <dmiller423@gmail.com> > > Signed-off-by: David Miller <dmiller423@gmail.com> > Message-Id: <20220307020327.3003-3-dmiller423@gmail.com> > [rth: Rewrite helpers; fix validation of m6.] > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > > The substring search was incorrect, in that it didn't properly > restart the search when a match failed. Split the helper into > multiple, so that the memory accesses can be optimized. > --- > target/s390x/helper.h | 6 ++ > target/s390x/tcg/translate.c | 3 +- > target/s390x/tcg/vec_string_helper.c | 101 +++++++++++++++++++++++++++ > target/s390x/tcg/translate_vx.c.inc | 26 +++++++ > target/s390x/tcg/insn-data.def | 2 + > 5 files changed, 137 insertions(+), 1 deletion(-) > > diff --git a/target/s390x/helper.h b/target/s390x/helper.h > index 7cbcbd7f0b..7412130883 100644 > --- a/target/s390x/helper.h > +++ b/target/s390x/helper.h > @@ -246,6 +246,12 @@ DEF_HELPER_6(gvec_vstrc_cc32, void, ptr, cptr, cptr, cptr, env, i32) > DEF_HELPER_6(gvec_vstrc_cc_rt8, void, ptr, cptr, cptr, cptr, env, i32) > DEF_HELPER_6(gvec_vstrc_cc_rt16, void, ptr, cptr, cptr, cptr, env, i32) > DEF_HELPER_6(gvec_vstrc_cc_rt32, void, ptr, cptr, cptr, cptr, env, i32) > +DEF_HELPER_6(gvec_vstrs_8, void, ptr, cptr, cptr, cptr, env, i32) > +DEF_HELPER_6(gvec_vstrs_16, void, ptr, cptr, cptr, cptr, env, i32) > +DEF_HELPER_6(gvec_vstrs_32, void, ptr, cptr, cptr, cptr, env, i32) > +DEF_HELPER_6(gvec_vstrs_zs8, void, ptr, cptr, cptr, cptr, env, i32) > +DEF_HELPER_6(gvec_vstrs_zs16, void, ptr, cptr, cptr, cptr, env, i32) > +DEF_HELPER_6(gvec_vstrs_zs32, void, ptr, cptr, cptr, cptr, env, i32) > > /* === Vector Floating-Point Instructions */ > DEF_HELPER_FLAGS_5(gvec_vfa32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32) > diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c > index 904b51542f..d9ac29573d 100644 > --- a/target/s390x/tcg/translate.c > +++ b/target/s390x/tcg/translate.c > @@ -6222,7 +6222,8 @@ enum DisasInsnEnum { > #define FAC_PCI S390_FEAT_ZPCI /* z/PCI facility */ > #define FAC_AIS S390_FEAT_ADAPTER_INT_SUPPRESSION > #define FAC_V S390_FEAT_VECTOR /* vector facility */ > -#define FAC_VE S390_FEAT_VECTOR_ENH /* vector enhancements facility 1 */ > +#define FAC_VE S390_FEAT_VECTOR_ENH /* vector enhancements facility 1 */ > +#define FAC_VE2 S390_FEAT_VECTOR_ENH2 /* vector enhancements facility 2 */ > #define FAC_MIE2 S390_FEAT_MISC_INSTRUCTION_EXT2 /* miscellaneous-instruction-extensions facility 2 */ > #define FAC_MIE3 S390_FEAT_MISC_INSTRUCTION_EXT3 /* miscellaneous-instruction-extensions facility 3 */ > > diff --git a/target/s390x/tcg/vec_string_helper.c b/target/s390x/tcg/vec_string_helper.c > index ac315eb095..6c0476ecc1 100644 > --- a/target/s390x/tcg/vec_string_helper.c > +++ b/target/s390x/tcg/vec_string_helper.c > @@ -471,3 +471,104 @@ void HELPER(gvec_vstrc_cc_rt##BITS)(void *v1, const void *v2, const void *v3, \ > DEF_VSTRC_CC_RT_HELPER(8) > DEF_VSTRC_CC_RT_HELPER(16) > DEF_VSTRC_CC_RT_HELPER(32) > + > +static int vstrs(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, > + const S390Vector *v4, uint8_t es, bool zs) > +{ > + int substr_elen, substr_0, str_elen, i, j, k, cc; > + int nelem = 16 >> es; > + bool eos = false; > + > + substr_elen = s390_vec_read_element8(v4, 7) >> es; > + > + /* If ZS, bound substr length by min(nelem, strlen(v3)). */ > + if (zs) { > + int i; You can drop this "int i;" > + for (i = 0; i < nelem; i++) { > + if (s390_vec_read_element(v3, i, es) == 0) { > + break; > + } > + } > + if (i < substr_elen) { > + substr_elen = i; > + } Maybe combine both, I guess there is no need to search beyond substr_elen. substr_elen = MIN(substr_elen, nelem); for (i = 0; i < substr_elen; i++) { if (s390_vec_read_element(v3, i, es) == 0) { substr_elen = i; break; } } We should do the MIN(substr_elen, nelem) maybe right when reading it from v4. > + } > + > + if (substr_elen == 0) { > + cc = 2; /* full match for degenerate case of empty substr */ > + k = 0; > + goto done; > + } > + > + /* If ZS, look for eos in the searched string. */ > + if (zs) { > + for (k = 0; k < nelem; k++) { > + if (s390_vec_read_element(v2, k, es) == 0) { > + eos = true; > + break; > + } > + } I guess we could move that into the main search loop and avoid parsing the string twice. Not sure what's better. > + str_elen = k; > + } else { > + str_elen = nelem; > + } > + > + substr_0 = s390_vec_read_element(v3, 0, es); > + > + for (k = 0; ; k++) { > + for (; k < str_elen; k++) { > + if (s390_vec_read_element(v2, k, es) == substr_0) { > + break; > + } > + } > + > + /* If we reached the end of the string, no match. */ > + if (k == str_elen) { > + cc = eos; /* no match (with or without zero char) */ > + goto done; > + } > + > + /* If the substring is only one char, match. */ > + if (substr_elen == 1) { > + cc = 2; /* full match */ > + goto done; > + } > + > + /* If the match begins at the last char, we have a partial match. */ > + if (k == str_elen - 1) { > + cc = 3; /* partial match */ > + goto done; > + } > + > + i = MIN(nelem, k + substr_elen); > + for (j = k + 1; j < i; j++) { > + uint32_t e2 = s390_vec_read_element(v2, j, es); > + uint32_t e3 = s390_vec_read_element(v3, j - k, es); > + if (e2 != e3) { > + break; > + } > + } > + if (j == i) { > + /* Matched up until "end". */ > + cc = i - k == substr_elen ? 2 : 3; /* full or partial match */ > + goto done; > + } > + } > + > + done: > + s390_vec_write_element64(v1, 0, k << es); > + s390_vec_write_element64(v1, 1, 0); > + return cc; > +} > + > +#define DEF_VSTRS_HELPER(BITS) \ > +void QEMU_FLATTEN HELPER(gvec_vstrs_##BITS)(void *v1, const void *v2, \ > + const void *v3, const void *v4, CPUS390XState *env, uint32_t desc) \ > + { env->cc_op = vstrs(v1, v2, v3, v4, MO_##BITS, false); } \ > +void QEMU_FLATTEN HELPER(gvec_vstrs_zs##BITS)(void *v1, const void *v2, \ > + const void *v3, const void *v4, CPUS390XState *env, uint32_t desc) \ > + { env->cc_op = vstrs(v1, v2, v3, v4, MO_##BITS, true); } > + > +DEF_VSTRS_HELPER(8) > +DEF_VSTRS_HELPER(16) > +DEF_VSTRS_HELPER(32) > diff --git a/target/s390x/tcg/translate_vx.c.inc b/target/s390x/tcg/translate_vx.c.inc > index ea28e40d4f..d514e8b218 100644 > --- a/target/s390x/tcg/translate_vx.c.inc > +++ b/target/s390x/tcg/translate_vx.c.inc > @@ -2497,6 +2497,32 @@ static DisasJumpType op_vstrc(DisasContext *s, DisasOps *o) > return DISAS_NEXT; > } > > +static DisasJumpType op_vstrs(DisasContext *s, DisasOps *o) > +{ > + typedef void (*helper_vstrs)(TCGv_ptr, TCGv_ptr, TCGv_ptr, > + TCGv_ptr, TCGv_ptr, TCGv_i32); > + static const helper_vstrs fns[3][2] = { > + { gen_helper_gvec_vstrs_8, gen_helper_gvec_vstrs_zs8 }, > + { gen_helper_gvec_vstrs_16, gen_helper_gvec_vstrs_zs16 }, > + { gen_helper_gvec_vstrs_32, gen_helper_gvec_vstrs_zs32 }, > + }; > + Superfluous empty line. > + const uint8_t m5 = get_field(s, m5); Could so a s/m5/es/ , as we do it in other handlers. > + const uint8_t m6 = get_field(s, m6); > + bool zs = m6 & 2; I remember we wanted to use extract32() for such bit-tests, at least we do it in most of the other handlers :) const bool zs = extract32(m6, 1, 1); ? > + > + if (m5 > ES_32 || m6 & ~2) { > + gen_program_exception(s, PGM_SPECIFICATION); > + return DISAS_NORETURN; > + } > +
On 3/21/22 03:31, David Hildenbrand wrote: >> + for (i = 0; i < nelem; i++) { >> + if (s390_vec_read_element(v3, i, es) == 0) { >> + break; >> + } >> + } >> + if (i < substr_elen) { >> + substr_elen = i; >> + } > > Maybe combine both, I guess there is no need to search beyond substr_elen. > > substr_elen = MIN(substr_elen, nelem); > for (i = 0; i < substr_elen; i++) { > if (s390_vec_read_element(v3, i, es) == 0) { > substr_elen = i; > break; > } > } Yep. > We should do the MIN(substr_elen, nelem) maybe right when reading it > from v4. No, v4 does not get bounded until zs is set. >> + /* If ZS, look for eos in the searched string. */ >> + if (zs) { >> + for (k = 0; k < nelem; k++) { >> + if (s390_vec_read_element(v2, k, es) == 0) { >> + eos = true; >> + break; >> + } >> + } > > I guess we could move that into the main search loop and avoid parsing > the string twice. Not sure what's better. I'd leave it here, so that we only do the strlen once. There's no obvious place within the the search loop that wouldn't wind up doing the strlen more than once. r~
I came to much the same conclusion On Tue, Mar 22, 2022 at 10:42 AM Richard Henderson <richard.henderson@linaro.org> wrote: > > On 3/21/22 03:31, David Hildenbrand wrote: > >> + for (i = 0; i < nelem; i++) { > >> + if (s390_vec_read_element(v3, i, es) == 0) { > >> + break; > >> + } > >> + } > >> + if (i < substr_elen) { > >> + substr_elen = i; > >> + } > > > > Maybe combine both, I guess there is no need to search beyond substr_elen. > > > > substr_elen = MIN(substr_elen, nelem); > > for (i = 0; i < substr_elen; i++) { > > if (s390_vec_read_element(v3, i, es) == 0) { > > substr_elen = i; > > break; > > } > > } > > Yep. > > > We should do the MIN(substr_elen, nelem) maybe right when reading it > > from v4. > > No, v4 does not get bounded until zs is set. > > >> + /* If ZS, look for eos in the searched string. */ > >> + if (zs) { > >> + for (k = 0; k < nelem; k++) { > >> + if (s390_vec_read_element(v2, k, es) == 0) { > >> + eos = true; > >> + break; > >> + } > >> + } > > > > I guess we could move that into the main search loop and avoid parsing > > the string twice. Not sure what's better. > > I'd leave it here, so that we only do the strlen once. There's no obvious place within > the the search loop that wouldn't wind up doing the strlen more than once. > > > r~
diff --git a/target/s390x/helper.h b/target/s390x/helper.h index 7cbcbd7f0b..7412130883 100644 --- a/target/s390x/helper.h +++ b/target/s390x/helper.h @@ -246,6 +246,12 @@ DEF_HELPER_6(gvec_vstrc_cc32, void, ptr, cptr, cptr, cptr, env, i32) DEF_HELPER_6(gvec_vstrc_cc_rt8, void, ptr, cptr, cptr, cptr, env, i32) DEF_HELPER_6(gvec_vstrc_cc_rt16, void, ptr, cptr, cptr, cptr, env, i32) DEF_HELPER_6(gvec_vstrc_cc_rt32, void, ptr, cptr, cptr, cptr, env, i32) +DEF_HELPER_6(gvec_vstrs_8, void, ptr, cptr, cptr, cptr, env, i32) +DEF_HELPER_6(gvec_vstrs_16, void, ptr, cptr, cptr, cptr, env, i32) +DEF_HELPER_6(gvec_vstrs_32, void, ptr, cptr, cptr, cptr, env, i32) +DEF_HELPER_6(gvec_vstrs_zs8, void, ptr, cptr, cptr, cptr, env, i32) +DEF_HELPER_6(gvec_vstrs_zs16, void, ptr, cptr, cptr, cptr, env, i32) +DEF_HELPER_6(gvec_vstrs_zs32, void, ptr, cptr, cptr, cptr, env, i32) /* === Vector Floating-Point Instructions */ DEF_HELPER_FLAGS_5(gvec_vfa32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32) diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c index 904b51542f..d9ac29573d 100644 --- a/target/s390x/tcg/translate.c +++ b/target/s390x/tcg/translate.c @@ -6222,7 +6222,8 @@ enum DisasInsnEnum { #define FAC_PCI S390_FEAT_ZPCI /* z/PCI facility */ #define FAC_AIS S390_FEAT_ADAPTER_INT_SUPPRESSION #define FAC_V S390_FEAT_VECTOR /* vector facility */ -#define FAC_VE S390_FEAT_VECTOR_ENH /* vector enhancements facility 1 */ +#define FAC_VE S390_FEAT_VECTOR_ENH /* vector enhancements facility 1 */ +#define FAC_VE2 S390_FEAT_VECTOR_ENH2 /* vector enhancements facility 2 */ #define FAC_MIE2 S390_FEAT_MISC_INSTRUCTION_EXT2 /* miscellaneous-instruction-extensions facility 2 */ #define FAC_MIE3 S390_FEAT_MISC_INSTRUCTION_EXT3 /* miscellaneous-instruction-extensions facility 3 */ diff --git a/target/s390x/tcg/vec_string_helper.c b/target/s390x/tcg/vec_string_helper.c index ac315eb095..6c0476ecc1 100644 --- a/target/s390x/tcg/vec_string_helper.c +++ b/target/s390x/tcg/vec_string_helper.c @@ -471,3 +471,104 @@ void HELPER(gvec_vstrc_cc_rt##BITS)(void *v1, const void *v2, const void *v3, \ DEF_VSTRC_CC_RT_HELPER(8) DEF_VSTRC_CC_RT_HELPER(16) DEF_VSTRC_CC_RT_HELPER(32) + +static int vstrs(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, + const S390Vector *v4, uint8_t es, bool zs) +{ + int substr_elen, substr_0, str_elen, i, j, k, cc; + int nelem = 16 >> es; + bool eos = false; + + substr_elen = s390_vec_read_element8(v4, 7) >> es; + + /* If ZS, bound substr length by min(nelem, strlen(v3)). */ + if (zs) { + int i; + for (i = 0; i < nelem; i++) { + if (s390_vec_read_element(v3, i, es) == 0) { + break; + } + } + if (i < substr_elen) { + substr_elen = i; + } + } + + if (substr_elen == 0) { + cc = 2; /* full match for degenerate case of empty substr */ + k = 0; + goto done; + } + + /* If ZS, look for eos in the searched string. */ + if (zs) { + for (k = 0; k < nelem; k++) { + if (s390_vec_read_element(v2, k, es) == 0) { + eos = true; + break; + } + } + str_elen = k; + } else { + str_elen = nelem; + } + + substr_0 = s390_vec_read_element(v3, 0, es); + + for (k = 0; ; k++) { + for (; k < str_elen; k++) { + if (s390_vec_read_element(v2, k, es) == substr_0) { + break; + } + } + + /* If we reached the end of the string, no match. */ + if (k == str_elen) { + cc = eos; /* no match (with or without zero char) */ + goto done; + } + + /* If the substring is only one char, match. */ + if (substr_elen == 1) { + cc = 2; /* full match */ + goto done; + } + + /* If the match begins at the last char, we have a partial match. */ + if (k == str_elen - 1) { + cc = 3; /* partial match */ + goto done; + } + + i = MIN(nelem, k + substr_elen); + for (j = k + 1; j < i; j++) { + uint32_t e2 = s390_vec_read_element(v2, j, es); + uint32_t e3 = s390_vec_read_element(v3, j - k, es); + if (e2 != e3) { + break; + } + } + if (j == i) { + /* Matched up until "end". */ + cc = i - k == substr_elen ? 2 : 3; /* full or partial match */ + goto done; + } + } + + done: + s390_vec_write_element64(v1, 0, k << es); + s390_vec_write_element64(v1, 1, 0); + return cc; +} + +#define DEF_VSTRS_HELPER(BITS) \ +void QEMU_FLATTEN HELPER(gvec_vstrs_##BITS)(void *v1, const void *v2, \ + const void *v3, const void *v4, CPUS390XState *env, uint32_t desc) \ + { env->cc_op = vstrs(v1, v2, v3, v4, MO_##BITS, false); } \ +void QEMU_FLATTEN HELPER(gvec_vstrs_zs##BITS)(void *v1, const void *v2, \ + const void *v3, const void *v4, CPUS390XState *env, uint32_t desc) \ + { env->cc_op = vstrs(v1, v2, v3, v4, MO_##BITS, true); } + +DEF_VSTRS_HELPER(8) +DEF_VSTRS_HELPER(16) +DEF_VSTRS_HELPER(32) diff --git a/target/s390x/tcg/translate_vx.c.inc b/target/s390x/tcg/translate_vx.c.inc index ea28e40d4f..d514e8b218 100644 --- a/target/s390x/tcg/translate_vx.c.inc +++ b/target/s390x/tcg/translate_vx.c.inc @@ -2497,6 +2497,32 @@ static DisasJumpType op_vstrc(DisasContext *s, DisasOps *o) return DISAS_NEXT; } +static DisasJumpType op_vstrs(DisasContext *s, DisasOps *o) +{ + typedef void (*helper_vstrs)(TCGv_ptr, TCGv_ptr, TCGv_ptr, + TCGv_ptr, TCGv_ptr, TCGv_i32); + static const helper_vstrs fns[3][2] = { + { gen_helper_gvec_vstrs_8, gen_helper_gvec_vstrs_zs8 }, + { gen_helper_gvec_vstrs_16, gen_helper_gvec_vstrs_zs16 }, + { gen_helper_gvec_vstrs_32, gen_helper_gvec_vstrs_zs32 }, + }; + + const uint8_t m5 = get_field(s, m5); + const uint8_t m6 = get_field(s, m6); + bool zs = m6 & 2; + + if (m5 > ES_32 || m6 & ~2) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + gen_gvec_4_ptr(get_field(s, v1), get_field(s, v2), + get_field(s, v3), get_field(s, v4), + cpu_env, 0, fns[m5][zs]); + set_cc_static(s); + return DISAS_NEXT; +} + static DisasJumpType op_vfa(DisasContext *s, DisasOps *o) { const uint8_t fpf = get_field(s, m4); diff --git a/target/s390x/tcg/insn-data.def b/target/s390x/tcg/insn-data.def index 6c8a8b229f..46add91a0e 100644 --- a/target/s390x/tcg/insn-data.def +++ b/target/s390x/tcg/insn-data.def @@ -1246,6 +1246,8 @@ F(0xe75c, VISTR, VRR_a, V, 0, 0, 0, 0, vistr, 0, IF_VEC) /* VECTOR STRING RANGE COMPARE */ F(0xe78a, VSTRC, VRR_d, V, 0, 0, 0, 0, vstrc, 0, IF_VEC) +/* VECTOR STRING SEARCH */ + F(0xe78b, VSTRS, VRR_d, VE2, 0, 0, 0, 0, vstrs, 0, IF_VEC) /* === Vector Floating-Point Instructions */