@@ -91,7 +91,7 @@ extern uint64_t s390_facilities[3];
#define TCG_TARGET_HAS_nor_i32 HAVE_FACILITY(MISC_INSN_EXT3)
#define TCG_TARGET_HAS_clz_i32 0
#define TCG_TARGET_HAS_ctz_i32 0
-#define TCG_TARGET_HAS_ctpop_i32 0
+#define TCG_TARGET_HAS_ctpop_i32 1
#define TCG_TARGET_HAS_deposit_i32 1
#define TCG_TARGET_HAS_extract_i32 1
#define TCG_TARGET_HAS_sextract_i32 0
@@ -128,7 +128,7 @@ extern uint64_t s390_facilities[3];
#define TCG_TARGET_HAS_nor_i64 HAVE_FACILITY(MISC_INSN_EXT3)
#define TCG_TARGET_HAS_clz_i64 1
#define TCG_TARGET_HAS_ctz_i64 0
-#define TCG_TARGET_HAS_ctpop_i64 0
+#define TCG_TARGET_HAS_ctpop_i64 1
#define TCG_TARGET_HAS_deposit_i64 1
#define TCG_TARGET_HAS_extract_i64 1
#define TCG_TARGET_HAS_sextract_i64 0
@@ -206,6 +206,7 @@ typedef enum S390Opcode {
RRFc_LOCR = 0xb9f2,
RRFc_LOCGR = 0xb9e2,
+ RRFc_POPCNT = 0xb9e1,
RR_AR = 0x1a,
RR_ALR = 0x1e,
@@ -1435,6 +1436,32 @@ static void tgen_clz(TCGContext *s, TCGReg dest, TCGReg a1,
tgen_movcond_int(s, TCG_TYPE_I64, dest, a2, a2const, TCG_REG_R0, 8, 2);
}
+static void tgen_ctpop(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
+{
+ /* With MIE3, and bit 0 of m4 set, we get the complete result. */
+ if (HAVE_FACILITY(MISC_INSN_EXT3)) {
+ if (type == TCG_TYPE_I32) {
+ tgen_ext32u(s, dest, src);
+ src = dest;
+ }
+ tcg_out_insn(s, RRFc, POPCNT, dest, src, 8);
+ return;
+ }
+
+ /* Without MIE3, each byte gets the count of bits for the byte. */
+ tcg_out_insn(s, RRFc, POPCNT, dest, src, 0);
+
+ /* Multiply to sum each byte at the top of the word. */
+ if (type == TCG_TYPE_I32) {
+ tcg_out_insn(s, RIL, MSFI, dest, 0x01010101);
+ tcg_out_sh32(s, RS_SRL, dest, TCG_REG_NONE, 24);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 0x0101010101010101ull);
+ tcg_out_insn(s, RRE, MSGR, dest, TCG_TMP0);
+ tcg_out_sh64(s, RSY_SRLG, dest, dest, TCG_REG_NONE, 56);
+ }
+}
+
static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src,
int ofs, int len, int z)
{
@@ -2584,6 +2611,13 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tgen_clz(s, args[0], args[1], args[2], const_args[2]);
break;
+ case INDEX_op_ctpop_i32:
+ tgen_ctpop(s, TCG_TYPE_I32, args[0], args[1]);
+ break;
+ case INDEX_op_ctpop_i64:
+ tgen_ctpop(s, TCG_TYPE_I64, args[0], args[1]);
+ break;
+
case INDEX_op_mb:
/* The host memory model is quite strong, we simply need to
serialize the instruction stream. */
@@ -3146,6 +3180,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_extu_i32_i64:
case INDEX_op_extract_i32:
case INDEX_op_extract_i64:
+ case INDEX_op_ctpop_i32:
+ case INDEX_op_ctpop_i64:
return C_O1_I1(r, r);
case INDEX_op_qemu_ld_i32: