Message ID | 20220620175235.60881-35-richard.henderson@linaro.org |
---|---|
State | Superseded |
Headers | show |
Series | target/arm: Scalable Matrix Extension | expand |
On Mon, 20 Jun 2022 at 19:14, Richard Henderson <richard.henderson@linaro.org> wrote: > > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Would be helpful to note in the commit message that this is an SVE instruction that operates using the SVE vector length but that it is present only if SME is implemented. > +static bool trans_PSEL(DisasContext *s, arg_psel *a) > +{ > + int vl = vec_full_reg_size(s); > + int pl = pred_gvec_reg_size(s); > + int elements = vl >> a->esz; > + TCGv_i64 tmp, didx, dbit; > + TCGv_ptr ptr; > + > + if (!dc_isar_feature(aa64_sme, s)) { > + return false; > + } > + if (!sve_access_check(s)) { > + return true; > + } > + > + tmp = tcg_temp_new_i64(); > + dbit = tcg_temp_new_i64(); > + didx = tcg_temp_new_i64(); > + ptr = tcg_temp_new_ptr(); > + > + /* Compute the predicate element. */ > + tcg_gen_addi_i64(tmp, cpu_reg(s, a->rv), a->imm); > + if (is_power_of_2(elements)) { > + tcg_gen_andi_i64(tmp, tmp, elements - 1); > + } else { > + tcg_gen_remu_i64(tmp, tmp, tcg_constant_i64(elements)); > + } > + > + /* Extract the predicate byte and bit indices. */ > + tcg_gen_shli_i64(tmp, tmp, a->esz); > + tcg_gen_andi_i64(dbit, tmp, 7); > + tcg_gen_shri_i64(didx, tmp, 3); > + if (HOST_BIG_ENDIAN) { > + tcg_gen_xori_i64(didx, didx, 7); > + } > + > + /* Load the predicate word. */ > + tcg_gen_trunc_i64_ptr(ptr, didx); > + tcg_gen_add_ptr(ptr, ptr, cpu_env); > + tcg_gen_ld8u_i64(tmp, ptr, pred_full_reg_offset(s, a->pm)); > + > + /* Extract the predicate bit and replicate to MO_64. */ > + tcg_gen_shr_i64(tmp, tmp, dbit); > + tcg_gen_andi_i64(tmp, tmp, 1); > + tcg_gen_neg_i64(tmp, tmp); > + > + /* Apply to either copy the source, or write zeros. */ > + tcg_gen_gvec_ands(MO_64, pred_full_reg_offset(s, a->pd), > + pred_full_reg_offset(s, a->pn), tmp, pl, pl); > + > + tcg_temp_free_i64(tmp); > + tcg_temp_free_i64(dbit); > + tcg_temp_free_i64(didx); > + tcg_temp_free_ptr(ptr); > + return true; > +} Suspect this would be clearer to read as a helper function, but it's not that long as a series of TCG ops, I suppose. Reviewed-by: Peter Maydell <peter.maydell@linaro.org> thanks -- PMM
diff --git a/target/arm/sve.decode b/target/arm/sve.decode index bbdaac6ac7..bf561c270a 100644 --- a/target/arm/sve.decode +++ b/target/arm/sve.decode @@ -1674,3 +1674,23 @@ BFMLALT_zzxw 01100100 11 1 ..... 0100.1 ..... ..... @rrxr_3a esz=2 ### SVE2 floating-point bfloat16 dot-product (indexed) BFDOT_zzxz 01100100 01 1 ..... 010000 ..... ..... @rrxr_2 esz=2 + +### SVE broadcast predicate element + +&psel esz pd pn pm rv imm +%psel_rv 16:2 !function=plus_12 +%psel_imm_b 22:2 19:2 +%psel_imm_h 22:2 20:1 +%psel_imm_s 22:2 +%psel_imm_d 23:1 +@psel ........ .. . ... .. .. pn:4 . pm:4 . pd:4 \ + &psel rv=%psel_rv + +PSEL 00100101 .. 1 ..1 .. 01 .... 0 .... 0 .... \ + @psel esz=0 imm=%psel_imm_b +PSEL 00100101 .. 1 .10 .. 01 .... 0 .... 0 .... \ + @psel esz=1 imm=%psel_imm_h +PSEL 00100101 .. 1 100 .. 01 .... 0 .... 0 .... \ + @psel esz=2 imm=%psel_imm_s +PSEL 00100101 .1 1 000 .. 01 .... 0 .... 0 .... \ + @psel esz=3 imm=%psel_imm_d diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index adf0cd3e68..58d0894e15 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -7379,3 +7379,60 @@ static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel) TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false) TRANS_FEAT(BFMLALT_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, true) + +static bool trans_PSEL(DisasContext *s, arg_psel *a) +{ + int vl = vec_full_reg_size(s); + int pl = pred_gvec_reg_size(s); + int elements = vl >> a->esz; + TCGv_i64 tmp, didx, dbit; + TCGv_ptr ptr; + + if (!dc_isar_feature(aa64_sme, s)) { + return false; + } + if (!sve_access_check(s)) { + return true; + } + + tmp = tcg_temp_new_i64(); + dbit = tcg_temp_new_i64(); + didx = tcg_temp_new_i64(); + ptr = tcg_temp_new_ptr(); + + /* Compute the predicate element. */ + tcg_gen_addi_i64(tmp, cpu_reg(s, a->rv), a->imm); + if (is_power_of_2(elements)) { + tcg_gen_andi_i64(tmp, tmp, elements - 1); + } else { + tcg_gen_remu_i64(tmp, tmp, tcg_constant_i64(elements)); + } + + /* Extract the predicate byte and bit indices. */ + tcg_gen_shli_i64(tmp, tmp, a->esz); + tcg_gen_andi_i64(dbit, tmp, 7); + tcg_gen_shri_i64(didx, tmp, 3); + if (HOST_BIG_ENDIAN) { + tcg_gen_xori_i64(didx, didx, 7); + } + + /* Load the predicate word. */ + tcg_gen_trunc_i64_ptr(ptr, didx); + tcg_gen_add_ptr(ptr, ptr, cpu_env); + tcg_gen_ld8u_i64(tmp, ptr, pred_full_reg_offset(s, a->pm)); + + /* Extract the predicate bit and replicate to MO_64. */ + tcg_gen_shr_i64(tmp, tmp, dbit); + tcg_gen_andi_i64(tmp, tmp, 1); + tcg_gen_neg_i64(tmp, tmp); + + /* Apply to either copy the source, or write zeros. */ + tcg_gen_gvec_ands(MO_64, pred_full_reg_offset(s, a->pd), + pred_full_reg_offset(s, a->pn), tmp, pl, pl); + + tcg_temp_free_i64(tmp); + tcg_temp_free_i64(dbit); + tcg_temp_free_i64(didx); + tcg_temp_free_ptr(ptr); + return true; +}
Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- target/arm/sve.decode | 20 +++++++++++++ target/arm/translate-sve.c | 57 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+)