Message ID | 20200918183751.2787647-35-richard.henderson@linaro.org |
---|---|
State | Superseded |
Headers | show |
Series | target/arm: Implement SVE2 | expand |
On 2020/9/19 2:37, Richard Henderson wrote: > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > v2: Fix decodetree typo > --- > target/arm/sve.decode | 3 ++ > target/arm/translate-sve.c | 62 ++++++++++++++++++++++++++++++++++++++ > 2 files changed, 65 insertions(+) > > diff --git a/target/arm/sve.decode b/target/arm/sve.decode > index b7038f9f57..19d503e2f4 100644 > --- a/target/arm/sve.decode > +++ b/target/arm/sve.decode > @@ -702,6 +702,9 @@ CTERM 00100101 1 sf:1 1 rm:5 001000 rn:5 ne:1 0000 > # SVE integer compare scalar count and limit > WHILE 00100101 esz:2 1 rm:5 000 sf:1 u:1 lt:1 rn:5 eq:1 rd:4 > > +# SVE2 pointer conflict compare > +WHILE_ptr 00100101 esz:2 1 rm:5 001 100 rn:5 rw:1 rd:4 > + > ### SVE Integer Wide Immediate - Unpredicated Group > > # SVE broadcast floating-point immediate (unpredicated) > diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c > index f1bc4c63e6..d3241ce167 100644 > --- a/target/arm/translate-sve.c > +++ b/target/arm/translate-sve.c > @@ -3227,6 +3227,68 @@ static bool trans_WHILE(DisasContext *s, arg_WHILE *a) > return true; > } > > +static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a) > +{ > + TCGv_i64 op0, op1, diff, t1, tmax; > + TCGv_i32 t2, t3; > + TCGv_ptr ptr; > + unsigned desc, vsz = vec_full_reg_size(s); > + > + if (!dc_isar_feature(aa64_sve2, s)) { > + return false; > + } > + if (!sve_access_check(s)) { > + return true; > + } > + > + op0 = read_cpu_reg(s, a->rn, 1); > + op1 = read_cpu_reg(s, a->rm, 1); > + > + tmax = tcg_const_i64(vsz); > + diff = tcg_temp_new_i64(); > + > + if (a->rw) { > + /* WHILERW */ > + /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */ > + t1 = tcg_temp_new_i64(); > + tcg_gen_sub_i64(diff, op0, op1); > + tcg_gen_sub_i64(t1, op1, op0); > + tcg_gen_movcond_i64(TCG_COND_LTU, diff, op0, op1, diff, t1); It should be: tcg_gen_movcond_i64(TCG_COND_GTU, diff, op0, op1, diff, t1); > + tcg_temp_free_i64(t1); > + /* If op1 == op0, diff == 0, and the condition is always true. */ > + tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff); > + } else { > + /* WHILEWR */ > + tcg_gen_sub_i64(diff, op1, op0); > + /* If op0 >= op1, diff <= 0, the condition is always true. */ > + tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff); > + } > + > + /* Bound to the maximum. */ > + tcg_gen_umin_i64(diff, diff, tmax); > + tcg_temp_free_i64(tmax); > + > + /* Since we're bounded, pass as a 32-bit type. */ > + t2 = tcg_temp_new_i32(); > + tcg_gen_extrl_i64_i32(t2, diff); We should align count down to (1 << esz), tcg_gen_andi_i32(t2,~MAKE_64BIT_MASK(0, esz)); Best Regards, Zhiwei > + tcg_temp_free_i64(diff); > + > + desc = (vsz / 8) - 2; > + desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz); > + t3 = tcg_const_i32(desc); > + > + ptr = tcg_temp_new_ptr(); > + tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd)); > + > + gen_helper_sve_whilel(t2, ptr, t2, t3); > + do_pred_flags(t2); > + > + tcg_temp_free_ptr(ptr); > + tcg_temp_free_i32(t2); > + tcg_temp_free_i32(t3); > + return true; > +} > + > /* > *** SVE Integer Wide Immediate - Unpredicated Group > */ <html> <head> <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"> </head> <body> <br> <br> <div class="moz-cite-prefix">On 2020/9/19 2:37, Richard Henderson wrote:<br> </div> <blockquote type="cite" cite="mid:20200918183751.2787647-35-richard.henderson@linaro.org"> <pre class="moz-quote-pre" wrap="">Signed-off-by: Richard Henderson <a class="moz-txt-link-rfc2396E" href="mailto:richard.henderson@linaro.org"><richard.henderson@linaro.org></a> --- v2: Fix decodetree typo --- target/arm/sve.decode | 3 ++ target/arm/translate-sve.c | 62 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) diff --git a/target/arm/sve.decode b/target/arm/sve.decode index b7038f9f57..19d503e2f4 100644 --- a/target/arm/sve.decode +++ b/target/arm/sve.decode @@ -702,6 +702,9 @@ CTERM 00100101 1 sf:1 1 rm:5 001000 rn:5 ne:1 0000 # SVE integer compare scalar count and limit WHILE 00100101 esz:2 1 rm:5 000 sf:1 u:1 lt:1 rn:5 eq:1 rd:4 +# SVE2 pointer conflict compare +WHILE_ptr 00100101 esz:2 1 rm:5 001 100 rn:5 rw:1 rd:4 + ### SVE Integer Wide Immediate - Unpredicated Group # SVE broadcast floating-point immediate (unpredicated) diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index f1bc4c63e6..d3241ce167 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -3227,6 +3227,68 @@ static bool trans_WHILE(DisasContext *s, arg_WHILE *a) return true; } +static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a) +{ + TCGv_i64 op0, op1, diff, t1, tmax; + TCGv_i32 t2, t3; + TCGv_ptr ptr; + unsigned desc, vsz = vec_full_reg_size(s); + + if (!dc_isar_feature(aa64_sve2, s)) { + return false; + } + if (!sve_access_check(s)) { + return true; + } + + op0 = read_cpu_reg(s, a->rn, 1); + op1 = read_cpu_reg(s, a->rm, 1); + + tmax = tcg_const_i64(vsz); + diff = tcg_temp_new_i64(); + + if (a->rw) { + /* WHILERW */ + /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */ + t1 = tcg_temp_new_i64(); + tcg_gen_sub_i64(diff, op0, op1); + tcg_gen_sub_i64(t1, op1, op0); + tcg_gen_movcond_i64(TCG_COND_LTU, diff, op0, op1, diff, t1);</pre> </blockquote> It should be:<br> <pre>tcg_gen_movcond_i64(TCG_COND_GTU, diff, op0, op1, diff, t1);</pre> <blockquote type="cite" cite="mid:20200918183751.2787647-35-richard.henderson@linaro.org"> <pre class="moz-quote-pre" wrap=""> + tcg_temp_free_i64(t1); + /* If op1 == op0, diff == 0, and the condition is always true. */ + tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff); + } else { + /* WHILEWR */ + tcg_gen_sub_i64(diff, op1, op0); + /* If op0 >= op1, diff <= 0, the condition is always true. */ + tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff); + } + + /* Bound to the maximum. */ + tcg_gen_umin_i64(diff, diff, tmax); + tcg_temp_free_i64(tmax); + + /* Since we're bounded, pass as a 32-bit type. */ + t2 = tcg_temp_new_i32(); + tcg_gen_extrl_i64_i32(t2, diff);</pre> </blockquote> We should align count down to (1 << esz), <br> <pre>tcg_gen_andi_i32(t2,~MAKE_64BIT_MASK(0, esz)); </pre> Best Regards,<br> Zhiwei<br> <blockquote type="cite" cite="mid:20200918183751.2787647-35-richard.henderson@linaro.org"> <pre class="moz-quote-pre" wrap=""> + tcg_temp_free_i64(diff); + + desc = (vsz / 8) - 2; + desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz); + t3 = tcg_const_i32(desc); + + ptr = tcg_temp_new_ptr(); + tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd)); + + gen_helper_sve_whilel(t2, ptr, t2, t3);</pre> </blockquote> <blockquote type="cite" cite="mid:20200918183751.2787647-35-richard.henderson@linaro.org"> <pre class="moz-quote-pre" wrap=""> + do_pred_flags(t2); + + tcg_temp_free_ptr(ptr); + tcg_temp_free_i32(t2); + tcg_temp_free_i32(t3); + return true; +} + /* *** SVE Integer Wide Immediate - Unpredicated Group */ </pre> </blockquote> <br> </body> </html>
On 10/12/20 7:33 PM, LIU Zhiwei wrote: >> + if (a->rw) { >> + /* WHILERW */ >> + /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */ >> + t1 = tcg_temp_new_i64(); >> + tcg_gen_sub_i64(diff, op0, op1); >> + tcg_gen_sub_i64(t1, op1, op0); >> + tcg_gen_movcond_i64(TCG_COND_LTU, diff, op0, op1, diff, t1); > It should be: > > tcg_gen_movcond_i64(TCG_COND_GTU, diff, op0, op1, diff, t1); Yep. > >> + tcg_temp_free_i64(t1); >> + /* If op1 == op0, diff == 0, and the condition is always true. */ >> + tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff); >> + } else { >> + /* WHILEWR */ >> + tcg_gen_sub_i64(diff, op1, op0); >> + /* If op0 >= op1, diff <= 0, the condition is always true. */ >> + tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff); >> + } >> + >> + /* Bound to the maximum. */ >> + tcg_gen_umin_i64(diff, diff, tmax); >> + tcg_temp_free_i64(tmax); >> + >> + /* Since we're bounded, pass as a 32-bit type. */ >> + t2 = tcg_temp_new_i32(); >> + tcg_gen_extrl_i64_i32(t2, diff); > We should align count down to (1 << esz), > > tcg_gen_andi_i32(t2,~MAKE_64BIT_MASK(0, esz)); Yep, this corresponds to the "DIV (esize DIV 8)" portion of the psuedo code. But it needs to go earlier, before we compare diff against 0 in the two movcond above. Will fix. Thanks, r~
diff --git a/target/arm/sve.decode b/target/arm/sve.decode index b7038f9f57..19d503e2f4 100644 --- a/target/arm/sve.decode +++ b/target/arm/sve.decode @@ -702,6 +702,9 @@ CTERM 00100101 1 sf:1 1 rm:5 001000 rn:5 ne:1 0000 # SVE integer compare scalar count and limit WHILE 00100101 esz:2 1 rm:5 000 sf:1 u:1 lt:1 rn:5 eq:1 rd:4 +# SVE2 pointer conflict compare +WHILE_ptr 00100101 esz:2 1 rm:5 001 100 rn:5 rw:1 rd:4 + ### SVE Integer Wide Immediate - Unpredicated Group # SVE broadcast floating-point immediate (unpredicated) diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index f1bc4c63e6..d3241ce167 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -3227,6 +3227,68 @@ static bool trans_WHILE(DisasContext *s, arg_WHILE *a) return true; } +static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a) +{ + TCGv_i64 op0, op1, diff, t1, tmax; + TCGv_i32 t2, t3; + TCGv_ptr ptr; + unsigned desc, vsz = vec_full_reg_size(s); + + if (!dc_isar_feature(aa64_sve2, s)) { + return false; + } + if (!sve_access_check(s)) { + return true; + } + + op0 = read_cpu_reg(s, a->rn, 1); + op1 = read_cpu_reg(s, a->rm, 1); + + tmax = tcg_const_i64(vsz); + diff = tcg_temp_new_i64(); + + if (a->rw) { + /* WHILERW */ + /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */ + t1 = tcg_temp_new_i64(); + tcg_gen_sub_i64(diff, op0, op1); + tcg_gen_sub_i64(t1, op1, op0); + tcg_gen_movcond_i64(TCG_COND_LTU, diff, op0, op1, diff, t1); + tcg_temp_free_i64(t1); + /* If op1 == op0, diff == 0, and the condition is always true. */ + tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff); + } else { + /* WHILEWR */ + tcg_gen_sub_i64(diff, op1, op0); + /* If op0 >= op1, diff <= 0, the condition is always true. */ + tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff); + } + + /* Bound to the maximum. */ + tcg_gen_umin_i64(diff, diff, tmax); + tcg_temp_free_i64(tmax); + + /* Since we're bounded, pass as a 32-bit type. */ + t2 = tcg_temp_new_i32(); + tcg_gen_extrl_i64_i32(t2, diff); + tcg_temp_free_i64(diff); + + desc = (vsz / 8) - 2; + desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz); + t3 = tcg_const_i32(desc); + + ptr = tcg_temp_new_ptr(); + tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd)); + + gen_helper_sve_whilel(t2, ptr, t2, t3); + do_pred_flags(t2); + + tcg_temp_free_ptr(ptr); + tcg_temp_free_i32(t2); + tcg_temp_free_i32(t3); + return true; +} + /* *** SVE Integer Wide Immediate - Unpredicated Group */
Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- v2: Fix decodetree typo --- target/arm/sve.decode | 3 ++ target/arm/translate-sve.c | 62 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+)