Message ID | 20220620175235.60881-25-richard.henderson@linaro.org |
---|---|
State | New |
Headers | show |
Series | target/arm: Scalable Matrix Extension | expand |
On Mon, 20 Jun 2022 at 19:23, Richard Henderson <richard.henderson@linaro.org> wrote: > > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > target/arm/helper-sme.h | 2 ++ > target/arm/translate-a64.h | 1 + > target/arm/sme.decode | 4 ++++ > target/arm/sme_helper.c | 25 +++++++++++++++++++++++++ > target/arm/translate-a64.c | 14 ++++++++++++++ > target/arm/translate-sme.c | 13 +++++++++++++ > 6 files changed, 59 insertions(+) > > +void helper_sme_zero(CPUARMState *env, uint32_t imm, uint32_t svl) > +{ > + uint32_t i; > + > + /* > + * Special case clearing the entire ZA space. > + * This falls into the CONSTRAINED UNPREDICTABLE zeroing of any > + * parts of the ZA storage outside of SVL. > + */ > + if (imm == 0xff) { > + memset(env->zarray, 0, sizeof(env->zarray)); > + return; > + } > + > + /* > + * Recall that ZAnH.D[m] is spread across ZA[n+8*m]. > + * Unless SVL == ARM_MAX_VQ, each row is discontiguous. This comment led me down a garden path for a while. Each row in a tile *is* contiguous, whatever the value of SVL. What isn't contiguous is the entire tile, because the rows of the tile are striped across the ZA[] array so rows that are adjacent in the tile aren't adjacent in the ZA[] array. (And this is true even if SVL is ARM_MAX_VQ.) > + */ > + for (i = 0; i < svl; i++) { > + if (imm & (1 << (i % 8))) { > + memset(&env->zarray[i], 0, svl); > + } > + } > +} With the comment fixed, Reviewed-by: Peter Maydell <peter.maydell@linaro.org> I'll also go back to the patch that adds env->zarray to suggest a beefed-up comment there, because I just had to spend half an hour with the spec to make sure I understood what the zarray is compared to the architecture (some of which is the spec being complicated, of course ;-)) thanks -- PMM
diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h index 3bd48c235f..c4ee1f09e4 100644 --- a/target/arm/helper-sme.h +++ b/target/arm/helper-sme.h @@ -19,3 +19,5 @@ DEF_HELPER_FLAGS_2(set_pstate_sm, TCG_CALL_NO_RWG, void, env, i32) DEF_HELPER_FLAGS_2(set_pstate_za, TCG_CALL_NO_RWG, void, env, i32) + +DEF_HELPER_FLAGS_3(sme_zero, TCG_CALL_NO_RWG, void, env, i32, i32) diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h index 6bd1b2eb4b..ec5d580ba0 100644 --- a/target/arm/translate-a64.h +++ b/target/arm/translate-a64.h @@ -30,6 +30,7 @@ bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn, unsigned int imms, unsigned int immr); bool sve_access_check(DisasContext *s); bool sme_enabled_check(DisasContext *s); +bool sme_za_enabled_check(DisasContext *s); TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr); TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write, bool tag_checked, int log2_size); diff --git a/target/arm/sme.decode b/target/arm/sme.decode index c25c031a71..6e4483fdce 100644 --- a/target/arm/sme.decode +++ b/target/arm/sme.decode @@ -18,3 +18,7 @@ # # This file is processed by scripts/decodetree.py # + +### SME Misc + +ZERO 11000000 00 001 00000000000 imm:8 diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c index b215725594..e5b5723a15 100644 --- a/target/arm/sme_helper.c +++ b/target/arm/sme_helper.c @@ -59,3 +59,28 @@ void helper_set_pstate_za(CPUARMState *env, uint32_t i) memset(env->zarray, 0, sizeof(env->zarray)); } } + +void helper_sme_zero(CPUARMState *env, uint32_t imm, uint32_t svl) +{ + uint32_t i; + + /* + * Special case clearing the entire ZA space. + * This falls into the CONSTRAINED UNPREDICTABLE zeroing of any + * parts of the ZA storage outside of SVL. + */ + if (imm == 0xff) { + memset(env->zarray, 0, sizeof(env->zarray)); + return; + } + + /* + * Recall that ZAnH.D[m] is spread across ZA[n+8*m]. + * Unless SVL == ARM_MAX_VQ, each row is discontiguous. + */ + for (i = 0; i < svl; i++) { + if (imm & (1 << (i % 8))) { + memset(&env->zarray[i], 0, svl); + } + } +} diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 498970f653..df9fc42635 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -1231,6 +1231,20 @@ bool sme_enabled_check(DisasContext *s) return fp_access_check_only(s); } +/* Note that this function corresponds to CheckSMEAndZAEnabled. */ +bool sme_za_enabled_check(DisasContext *s) +{ + if (!sme_enabled_check(s)) { + return false; + } + if (!s->pstate_za) { + gen_exception_insn(s, s->pc_curr, EXCP_UDEF, + syn_smetrap(SME_ET_InactiveZA, false)); + return false; + } + return true; +} + /* * This utility function is for doing register extension with an * optional shift. You will likely want to pass a temporary for the diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c index 786c93fb2d..d526c74456 100644 --- a/target/arm/translate-sme.c +++ b/target/arm/translate-sme.c @@ -33,3 +33,16 @@ */ #include "decode-sme.c.inc" + + +static bool trans_ZERO(DisasContext *s, arg_ZERO *a) +{ + if (!dc_isar_feature(aa64_sme, s)) { + return false; + } + if (sme_za_enabled_check(s)) { + gen_helper_sme_zero(cpu_env, tcg_constant_i32(a->imm), + tcg_constant_i32(s->svl)); + } + return true; +}
Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- target/arm/helper-sme.h | 2 ++ target/arm/translate-a64.h | 1 + target/arm/sme.decode | 4 ++++ target/arm/sme_helper.c | 25 +++++++++++++++++++++++++ target/arm/translate-a64.c | 14 ++++++++++++++ target/arm/translate-sme.c | 13 +++++++++++++ 6 files changed, 59 insertions(+)