Message ID | 20221202065200.224537-7-richard.henderson@linaro.org |
---|---|
State | New |
Headers | show |
Series | tcg/s390x: misc patches | expand |
On Thu, Dec 01, 2022 at 10:51:53PM -0800, Richard Henderson wrote: > The MIE2 facility adds 3-operand versions of multiply. > > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > tcg/s390x/tcg-target-con-set.h | 1 + > tcg/s390x/tcg-target.h | 1 + > tcg/s390x/tcg-target.c.inc | 34 ++++++++++++++++++++++++---------- > 3 files changed, 26 insertions(+), 10 deletions(-) Reviewed-by: Ilya Leoshkevich <iii@linux.ibm.com> I have one small suggestion, see below. > diff --git a/tcg/s390x/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h > index 00ba727b70..33a82e3286 100644 > --- a/tcg/s390x/tcg-target-con-set.h > +++ b/tcg/s390x/tcg-target-con-set.h > @@ -23,6 +23,7 @@ C_O1_I2(r, 0, ri) > C_O1_I2(r, 0, rI) > C_O1_I2(r, 0, rJ) > C_O1_I2(r, r, ri) > +C_O1_I2(r, r, rJ) > C_O1_I2(r, rZ, r) > C_O1_I2(v, v, r) > C_O1_I2(v, v, v) > diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h > index 645f522058..bfd623a639 100644 > --- a/tcg/s390x/tcg-target.h > +++ b/tcg/s390x/tcg-target.h > @@ -63,6 +63,7 @@ typedef enum TCGReg { > #define FACILITY_FAST_BCR_SER FACILITY_LOAD_ON_COND > #define FACILITY_DISTINCT_OPS FACILITY_LOAD_ON_COND > #define FACILITY_LOAD_ON_COND2 53 > +#define FACILITY_MISC_INSN_EXT2 58 > #define FACILITY_VECTOR 129 > #define FACILITY_VECTOR_ENH1 135 > > diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc > index d02b433271..cd39b2a208 100644 > --- a/tcg/s390x/tcg-target.c.inc > +++ b/tcg/s390x/tcg-target.c.inc > @@ -180,6 +180,8 @@ typedef enum S390Opcode { > RRE_SLBGR = 0xb989, > RRE_XGR = 0xb982, > > + RRFa_MSRKC = 0xb9fd, > + RRFa_MSGRKC = 0xb9ed, > RRFa_NRK = 0xb9f4, > RRFa_NGRK = 0xb9e4, > RRFa_ORK = 0xb9f6, > @@ -2140,14 +2142,18 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, > break; > > case INDEX_op_mul_i32: > + a0 = args[0], a1 = args[1], a2 = (int32_t)args[2]; > if (const_args[2]) { > - if ((int32_t)args[2] == (int16_t)args[2]) { > - tcg_out_insn(s, RI, MHI, args[0], args[2]); > + tcg_out_mov(s, TCG_TYPE_I32, a0, a1); Should we consider a0 == a1 case here as well, in order to get rid of this extra move when possible? > + if (a2 == (int16_t)a2) { > + tcg_out_insn(s, RI, MHI, a0, a2); > } else { > - tcg_out_insn(s, RIL, MSFI, args[0], args[2]); > + tcg_out_insn(s, RIL, MSFI, a0, a2); > } > + } else if (a0 == a1) { > + tcg_out_insn(s, RRE, MSR, a0, a2); > } else { > - tcg_out_insn(s, RRE, MSR, args[0], args[2]); > + tcg_out_insn(s, RRFa, MSRKC, a0, a1, a2); > } > break; > > @@ -2405,14 +2411,18 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, > break; > > case INDEX_op_mul_i64: > + a0 = args[0], a1 = args[1], a2 = args[2]; > if (const_args[2]) { > - if (args[2] == (int16_t)args[2]) { > - tcg_out_insn(s, RI, MGHI, args[0], args[2]); > + tcg_out_mov(s, TCG_TYPE_I64, a0, a1); Same here. > + if (a2 == (int16_t)a2) { > + tcg_out_insn(s, RI, MGHI, a0, a2); > } else { > - tcg_out_insn(s, RIL, MSGFI, args[0], args[2]); > + tcg_out_insn(s, RIL, MSGFI, a0, a2); > } > + } else if (a0 == a1) { > + tcg_out_insn(s, RRE, MSGR, a0, a2); > } else { > - tcg_out_insn(s, RRE, MSGR, args[0], args[2]); > + tcg_out_insn(s, RRFa, MSGRKC, a0, a1, a2); > } > break; > > @@ -3072,12 +3082,16 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) > MULTIPLY SINGLE IMMEDIATE with a signed 32-bit, otherwise we > have only MULTIPLY HALFWORD IMMEDIATE, with a signed 16-bit. */ > return (HAVE_FACILITY(GEN_INST_EXT) > - ? C_O1_I2(r, 0, ri) > + ? (HAVE_FACILITY(MISC_INSN_EXT2) > + ? C_O1_I2(r, r, ri) > + : C_O1_I2(r, 0, ri)) > : C_O1_I2(r, 0, rI)); > > case INDEX_op_mul_i64: > return (HAVE_FACILITY(GEN_INST_EXT) > - ? C_O1_I2(r, 0, rJ) > + ? (HAVE_FACILITY(MISC_INSN_EXT2) > + ? C_O1_I2(r, r, rJ) > + : C_O1_I2(r, 0, rJ)) > : C_O1_I2(r, 0, rI)); > > case INDEX_op_shl_i32: > -- > 2.34.1 > >
On Tue, 6 Dec 2022, 14:02 Ilya Leoshkevich, <iii@linux.ibm.com> wrote: > On Thu, Dec 01, 2022 at 10:51:53PM -0800, Richard Henderson wrote: > > The MIE2 facility adds 3-operand versions of multiply. > > > > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > > --- > > tcg/s390x/tcg-target-con-set.h | 1 + > > tcg/s390x/tcg-target.h | 1 + > > tcg/s390x/tcg-target.c.inc | 34 ++++++++++++++++++++++++---------- > > 3 files changed, 26 insertions(+), 10 deletions(-) > > Reviewed-by: Ilya Leoshkevich <iii@linux.ibm.com> > > I have one small suggestion, see below. > > > diff --git a/tcg/s390x/tcg-target-con-set.h > b/tcg/s390x/tcg-target-con-set.h > > index 00ba727b70..33a82e3286 100644 > > --- a/tcg/s390x/tcg-target-con-set.h > > +++ b/tcg/s390x/tcg-target-con-set.h > > @@ -23,6 +23,7 @@ C_O1_I2(r, 0, ri) > > C_O1_I2(r, 0, rI) > > C_O1_I2(r, 0, rJ) > > C_O1_I2(r, r, ri) > > +C_O1_I2(r, r, rJ) > > C_O1_I2(r, rZ, r) > > C_O1_I2(v, v, r) > > C_O1_I2(v, v, v) > > diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h > > index 645f522058..bfd623a639 100644 > > --- a/tcg/s390x/tcg-target.h > > +++ b/tcg/s390x/tcg-target.h > > @@ -63,6 +63,7 @@ typedef enum TCGReg { > > #define FACILITY_FAST_BCR_SER FACILITY_LOAD_ON_COND > > #define FACILITY_DISTINCT_OPS FACILITY_LOAD_ON_COND > > #define FACILITY_LOAD_ON_COND2 53 > > +#define FACILITY_MISC_INSN_EXT2 58 > > #define FACILITY_VECTOR 129 > > #define FACILITY_VECTOR_ENH1 135 > > > > diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc > > index d02b433271..cd39b2a208 100644 > > --- a/tcg/s390x/tcg-target.c.inc > > +++ b/tcg/s390x/tcg-target.c.inc > > @@ -180,6 +180,8 @@ typedef enum S390Opcode { > > RRE_SLBGR = 0xb989, > > RRE_XGR = 0xb982, > > > > + RRFa_MSRKC = 0xb9fd, > > + RRFa_MSGRKC = 0xb9ed, > > RRFa_NRK = 0xb9f4, > > RRFa_NGRK = 0xb9e4, > > RRFa_ORK = 0xb9f6, > > @@ -2140,14 +2142,18 @@ static inline void tcg_out_op(TCGContext *s, > TCGOpcode opc, > > break; > > > > case INDEX_op_mul_i32: > > + a0 = args[0], a1 = args[1], a2 = (int32_t)args[2]; > > if (const_args[2]) { > > - if ((int32_t)args[2] == (int16_t)args[2]) { > > - tcg_out_insn(s, RI, MHI, args[0], args[2]); > > + tcg_out_mov(s, TCG_TYPE_I32, a0, a1); > > Should we consider a0 == a1 case here as well, in order to get rid of > this extra move when possible? > tcg_out_mov already does that. r~
diff --git a/tcg/s390x/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h index 00ba727b70..33a82e3286 100644 --- a/tcg/s390x/tcg-target-con-set.h +++ b/tcg/s390x/tcg-target-con-set.h @@ -23,6 +23,7 @@ C_O1_I2(r, 0, ri) C_O1_I2(r, 0, rI) C_O1_I2(r, 0, rJ) C_O1_I2(r, r, ri) +C_O1_I2(r, r, rJ) C_O1_I2(r, rZ, r) C_O1_I2(v, v, r) C_O1_I2(v, v, v) diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h index 645f522058..bfd623a639 100644 --- a/tcg/s390x/tcg-target.h +++ b/tcg/s390x/tcg-target.h @@ -63,6 +63,7 @@ typedef enum TCGReg { #define FACILITY_FAST_BCR_SER FACILITY_LOAD_ON_COND #define FACILITY_DISTINCT_OPS FACILITY_LOAD_ON_COND #define FACILITY_LOAD_ON_COND2 53 +#define FACILITY_MISC_INSN_EXT2 58 #define FACILITY_VECTOR 129 #define FACILITY_VECTOR_ENH1 135 diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index d02b433271..cd39b2a208 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -180,6 +180,8 @@ typedef enum S390Opcode { RRE_SLBGR = 0xb989, RRE_XGR = 0xb982, + RRFa_MSRKC = 0xb9fd, + RRFa_MSGRKC = 0xb9ed, RRFa_NRK = 0xb9f4, RRFa_NGRK = 0xb9e4, RRFa_ORK = 0xb9f6, @@ -2140,14 +2142,18 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_mul_i32: + a0 = args[0], a1 = args[1], a2 = (int32_t)args[2]; if (const_args[2]) { - if ((int32_t)args[2] == (int16_t)args[2]) { - tcg_out_insn(s, RI, MHI, args[0], args[2]); + tcg_out_mov(s, TCG_TYPE_I32, a0, a1); + if (a2 == (int16_t)a2) { + tcg_out_insn(s, RI, MHI, a0, a2); } else { - tcg_out_insn(s, RIL, MSFI, args[0], args[2]); + tcg_out_insn(s, RIL, MSFI, a0, a2); } + } else if (a0 == a1) { + tcg_out_insn(s, RRE, MSR, a0, a2); } else { - tcg_out_insn(s, RRE, MSR, args[0], args[2]); + tcg_out_insn(s, RRFa, MSRKC, a0, a1, a2); } break; @@ -2405,14 +2411,18 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_mul_i64: + a0 = args[0], a1 = args[1], a2 = args[2]; if (const_args[2]) { - if (args[2] == (int16_t)args[2]) { - tcg_out_insn(s, RI, MGHI, args[0], args[2]); + tcg_out_mov(s, TCG_TYPE_I64, a0, a1); + if (a2 == (int16_t)a2) { + tcg_out_insn(s, RI, MGHI, a0, a2); } else { - tcg_out_insn(s, RIL, MSGFI, args[0], args[2]); + tcg_out_insn(s, RIL, MSGFI, a0, a2); } + } else if (a0 == a1) { + tcg_out_insn(s, RRE, MSGR, a0, a2); } else { - tcg_out_insn(s, RRE, MSGR, args[0], args[2]); + tcg_out_insn(s, RRFa, MSGRKC, a0, a1, a2); } break; @@ -3072,12 +3082,16 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) MULTIPLY SINGLE IMMEDIATE with a signed 32-bit, otherwise we have only MULTIPLY HALFWORD IMMEDIATE, with a signed 16-bit. */ return (HAVE_FACILITY(GEN_INST_EXT) - ? C_O1_I2(r, 0, ri) + ? (HAVE_FACILITY(MISC_INSN_EXT2) + ? C_O1_I2(r, r, ri) + : C_O1_I2(r, 0, ri)) : C_O1_I2(r, 0, rI)); case INDEX_op_mul_i64: return (HAVE_FACILITY(GEN_INST_EXT) - ? C_O1_I2(r, 0, rJ) + ? (HAVE_FACILITY(MISC_INSN_EXT2) + ? C_O1_I2(r, r, rJ) + : C_O1_I2(r, 0, rJ)) : C_O1_I2(r, 0, rI)); case INDEX_op_shl_i32:
The MIE2 facility adds 3-operand versions of multiply. Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- tcg/s390x/tcg-target-con-set.h | 1 + tcg/s390x/tcg-target.h | 1 + tcg/s390x/tcg-target.c.inc | 34 ++++++++++++++++++++++++---------- 3 files changed, 26 insertions(+), 10 deletions(-)