@@ -951,6 +951,8 @@ typedef struct TCGArgConstraint {
unsigned ct : 16;
unsigned alias_index : 4;
unsigned sort_index : 4;
+ unsigned pair_index : 4;
+ unsigned pair : 2; /* 0: none, 1: first, 2: second, 3: second alias */
bool oalias : 1;
bool ialias : 1;
bool newreg : 1;
@@ -1969,15 +1969,32 @@ static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
static int get_constraint_priority(const TCGOpDef *def, int k)
{
const TCGArgConstraint *arg_ct = &def->args_ct[k];
- int n;
+ int n = ctpop64(arg_ct->regs);
- if (arg_ct->oalias) {
- /* an alias is equivalent to a single register */
- n = 1;
- } else {
- n = ctpop64(arg_ct->regs);
+ /*
+ * Sort constraints of a single register first, which includes output
+ * aliases (which must exactly match the input already allocated).
+ */
+ if (n == 1 || arg_ct->oalias) {
+ return INT_MAX;
}
- return TCG_TARGET_NB_REGS - n + 1;
+
+ /*
+ * Sort register pairs next, first then second immediately after.
+ * Arbitrarily sort multiple pairs by the index of the first reg;
+ * there shouldn't be many pairs.
+ */
+ switch (arg_ct->pair) {
+ case 1:
+ case 3:
+ return (k + 1) * 2;
+ case 2:
+ return (arg_ct->pair_index + 1) * 2 - 1;
+ }
+
+ /* Finally, sort by decreasing register count. */
+ assert(n > 1);
+ return -n;
}
/* sort from highest priority to lowest */
@@ -2012,7 +2029,8 @@ static void process_op_defs(TCGContext *s)
for (op = 0; op < NB_OPS; op++) {
TCGOpDef *def = &tcg_op_defs[op];
const TCGTargetOpDef *tdefs;
- int i, o, nb_args;
+ bool saw_alias_pair = false;
+ int i, o, i2, o2, nb_args;
if (def->flags & TCG_OPF_NOT_PRESENT) {
continue;
@@ -2053,6 +2071,9 @@ static void process_op_defs(TCGContext *s)
/* The input sets ialias. */
def->args_ct[i].ialias = 1;
def->args_ct[i].alias_index = o;
+ if (def->args_ct[i].pair) {
+ saw_alias_pair = true;
+ }
tcg_debug_assert(ct_str[1] == '\0');
continue;
@@ -2061,6 +2082,38 @@ static void process_op_defs(TCGContext *s)
def->args_ct[i].newreg = true;
ct_str++;
break;
+
+ case 'p': /* plus */
+ /* Allocate to the register after the previous. */
+ tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
+ o = i - 1;
+ tcg_debug_assert(!def->args_ct[o].pair);
+ tcg_debug_assert(!def->args_ct[o].ct);
+ def->args_ct[i] = (TCGArgConstraint){
+ .pair = 2,
+ .pair_index = o,
+ .regs = def->args_ct[o].regs << 1,
+ };
+ def->args_ct[o].pair = 1;
+ def->args_ct[o].pair_index = i;
+ tcg_debug_assert(ct_str[1] == '\0');
+ continue;
+
+ case 'm': /* minus */
+ /* Allocate to the register before the previous. */
+ tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
+ o = i - 1;
+ tcg_debug_assert(!def->args_ct[o].pair);
+ tcg_debug_assert(!def->args_ct[o].ct);
+ def->args_ct[i] = (TCGArgConstraint){
+ .pair = 1,
+ .pair_index = o,
+ .regs = def->args_ct[o].regs >> 1,
+ };
+ def->args_ct[o].pair = 2;
+ def->args_ct[o].pair_index = i;
+ tcg_debug_assert(ct_str[1] == '\0');
+ continue;
}
do {
@@ -2084,6 +2137,8 @@ static void process_op_defs(TCGContext *s)
default:
case '0' ... '9':
case '&':
+ case 'p':
+ case 'm':
/* Typo in TCGTargetOpDef constraint. */
g_assert_not_reached();
}
@@ -2093,6 +2148,79 @@ static void process_op_defs(TCGContext *s)
/* TCGTargetOpDef entry with too much information? */
tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
+ /*
+ * Fix up output pairs that are aliased with inputs.
+ * When we created the alias, we copied pair from the output.
+ * There are three cases:
+ * (1a) Pairs of inputs alias pairs of outputs.
+ * (1b) One input aliases the first of a pair of outputs.
+ * (2) One input aliases the second of a pair of outputs.
+ *
+ * Case 1a is handled by making sure that the pair_index'es are
+ * properly updated so that they appear the same as a pair of inputs.
+ *
+ * Case 1b is handled by setting the pair_index of the input to
+ * itself, simply so it doesn't point to an unrelated argument.
+ * Since we don't encounter the "second" during the input allocation
+ * phase, nothing happens with the second half of the input pair.
+ *
+ * Case 2 is handled by setting the second input to pair=3, the
+ * first output to pair=3, and the pair_index'es to match.
+ */
+ if (saw_alias_pair) {
+ for (i = def->nb_oargs; i < nb_args; i++) {
+ /*
+ * Since [0-9pm] must be alone in the constraint string,
+ * the only way they can both be set is if the pair comes
+ * from the output alias.
+ */
+ if (!def->args_ct[i].ialias) {
+ continue;
+ }
+ switch (def->args_ct[i].pair) {
+ case 0:
+ break;
+ case 1:
+ o = def->args_ct[i].alias_index;
+ o2 = def->args_ct[o].pair_index;
+ tcg_debug_assert(def->args_ct[o].pair == 1);
+ tcg_debug_assert(def->args_ct[o2].pair == 2);
+ if (def->args_ct[o2].oalias) {
+ /* Case 1a */
+ i2 = def->args_ct[o2].alias_index;
+ tcg_debug_assert(def->args_ct[i2].pair == 2);
+ def->args_ct[i2].pair_index = i;
+ def->args_ct[i].pair_index = i2;
+ } else {
+ /* Case 1b */
+ def->args_ct[i].pair_index = i;
+ }
+ break;
+ case 2:
+ o = def->args_ct[i].alias_index;
+ o2 = def->args_ct[o].pair_index;
+ tcg_debug_assert(def->args_ct[o].pair == 2);
+ tcg_debug_assert(def->args_ct[o2].pair == 1);
+ if (def->args_ct[o2].oalias) {
+ /* Case 1a */
+ i2 = def->args_ct[o2].alias_index;
+ tcg_debug_assert(def->args_ct[i2].pair == 1);
+ def->args_ct[i2].pair_index = i;
+ def->args_ct[i].pair_index = i2;
+ } else {
+ /* Case 2 */
+ def->args_ct[i].pair = 3;
+ def->args_ct[o2].pair = 3;
+ def->args_ct[i].pair_index = o2;
+ def->args_ct[o2].pair_index = i;
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ }
+ }
+
/* sort the constraints (XXX: this is just an heuristic) */
sort_constraints(def, 0, def->nb_oargs);
sort_constraints(def, def->nb_oargs, def->nb_iargs);
@@ -3141,6 +3269,52 @@ static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
tcg_abort();
}
+static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
+ TCGRegSet allocated_regs,
+ TCGRegSet preferred_regs, bool rev)
+{
+ int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
+ TCGRegSet reg_ct[2];
+ const int *order;
+
+ /* Ensure that if I is not in allocated_regs, I+1 is not either. */
+ reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
+ tcg_debug_assert(reg_ct[1] != 0);
+ reg_ct[0] = reg_ct[1] & preferred_regs;
+
+ order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
+
+ /*
+ * Skip the preferred_regs option if it cannot be satisfied,
+ * or if the preference made no difference.
+ */
+ k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
+
+ /*
+ * Minimize the number of flushes by looking for 2 free registers first,
+ * then a single flush, then two flushes.
+ */
+ for (fmin = 2; fmin >= 0; fmin--) {
+ for (j = k; j < 2; j++) {
+ TCGRegSet set = reg_ct[j];
+
+ for (i = 0; i < n; i++) {
+ TCGReg reg = order[i];
+
+ if (tcg_regset_test_reg(set, reg)) {
+ int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
+ if (f >= fmin) {
+ tcg_reg_free(s, reg, allocated_regs);
+ tcg_reg_free(s, reg + 1, allocated_regs);
+ return reg;
+ }
+ }
+ }
+ }
+ }
+ tcg_abort();
+}
+
/* Make sure the temporary is in a register. If needed, allocate the register
from DESIRED while avoiding ALLOCATED. */
static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
@@ -3550,8 +3724,10 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
/* satisfy input constraints */
for (k = 0; k < nb_iargs; k++) {
- TCGRegSet i_preferred_regs;
- bool allocate_new_reg;
+ TCGRegSet i_preferred_regs, i_required_regs;
+ bool allocate_new_reg, copyto_new_reg;
+ TCGTemp *ts2;
+ int i1, i2;
i = def->args_ct[nb_oargs + k].sort_index;
arg = op->args[i];
@@ -3568,43 +3744,164 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
reg = ts->reg;
i_preferred_regs = 0;
+ i_required_regs = arg_ct->regs;
allocate_new_reg = false;
+ copyto_new_reg = false;
- if (arg_ct->ialias) {
+ switch (arg_ct->pair) {
+ case 0: /* not paired */
+ if (arg_ct->ialias) {
+ i_preferred_regs = op->output_pref[arg_ct->alias_index];
+
+ /*
+ * If the input is readonly, then it cannot also be an
+ * output and aliased to itself. If the input is not
+ * dead after the instruction, we must allocate a new
+ * register and move it.
+ */
+ if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
+ allocate_new_reg = true;
+ } else if (ts->val_type == TEMP_VAL_REG) {
+ /*
+ * Check if the current register has already been
+ * allocated for another input.
+ */
+ allocate_new_reg =
+ tcg_regset_test_reg(i_allocated_regs, reg);
+ }
+ }
+ if (!allocate_new_reg) {
+ temp_load(s, ts, i_required_regs, i_allocated_regs,
+ i_preferred_regs);
+ reg = ts->reg;
+ allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
+ }
+ if (allocate_new_reg) {
+ /*
+ * Allocate a new register matching the constraint
+ * and move the temporary register into it.
+ */
+ temp_load(s, ts, tcg_target_available_regs[ts->type],
+ i_allocated_regs, 0);
+ reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
+ i_preferred_regs, ts->indirect_base);
+ copyto_new_reg = true;
+ }
+ break;
+
+ case 1:
+ /* First of an input pair; if i1 == i2, the second is an output. */
+ i1 = i;
+ i2 = arg_ct->pair_index;
+ ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
+
+ /*
+ * It is easier to default to allocating a new pair
+ * and to identify a few cases where it's not required.
+ */
+ if (arg_ct->ialias) {
+ i_preferred_regs = op->output_pref[arg_ct->alias_index];
+ if (IS_DEAD_ARG(i1) &&
+ IS_DEAD_ARG(i2) &&
+ !temp_readonly(ts) &&
+ ts->val_type == TEMP_VAL_REG &&
+ ts->reg < TCG_TARGET_NB_REGS - 1 &&
+ tcg_regset_test_reg(i_required_regs, reg) &&
+ !tcg_regset_test_reg(i_allocated_regs, reg) &&
+ !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
+ (ts2
+ ? ts2->val_type == TEMP_VAL_REG &&
+ ts2->reg == reg + 1 &&
+ !temp_readonly(ts2)
+ : s->reg_to_temp[reg + 1] == NULL)) {
+ break;
+ }
+ } else {
+ /* Without aliasing, the pair must also be an input. */
+ tcg_debug_assert(ts2);
+ if (ts->val_type == TEMP_VAL_REG &&
+ ts2->val_type == TEMP_VAL_REG &&
+ ts2->reg == reg + 1 &&
+ tcg_regset_test_reg(i_required_regs, reg)) {
+ break;
+ }
+ }
+ reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
+ 0, ts->indirect_base);
+ goto do_pair;
+
+ case 2: /* pair second */
+ reg = new_args[arg_ct->pair_index] + 1;
+ goto do_pair;
+
+ case 3: /* ialias with second output, no first input */
+ tcg_debug_assert(arg_ct->ialias);
i_preferred_regs = op->output_pref[arg_ct->alias_index];
- /*
- * If the input is readonly, then it cannot also be an
- * output and aliased to itself. If the input is not
- * dead after the instruction, we must allocate a new
- * register and move it.
- */
- if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
- allocate_new_reg = true;
- } else if (ts->val_type == TEMP_VAL_REG) {
- /*
- * Check if the current register has already been
- * allocated for another input.
- */
- allocate_new_reg = tcg_regset_test_reg(i_allocated_regs, reg);
+ if (IS_DEAD_ARG(i) &&
+ !temp_readonly(ts) &&
+ ts->val_type == TEMP_VAL_REG &&
+ reg > 0 &&
+ s->reg_to_temp[reg - 1] == NULL &&
+ tcg_regset_test_reg(i_required_regs, reg) &&
+ !tcg_regset_test_reg(i_allocated_regs, reg) &&
+ !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
+ tcg_regset_set_reg(i_allocated_regs, reg - 1);
+ break;
}
- }
+ reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
+ i_allocated_regs, 0,
+ ts->indirect_base);
+ tcg_regset_set_reg(i_allocated_regs, reg);
+ reg += 1;
+ goto do_pair;
- if (!allocate_new_reg) {
- temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs);
- reg = ts->reg;
- allocate_new_reg = !tcg_regset_test_reg(arg_ct->regs, reg);
- }
-
- if (allocate_new_reg) {
+ do_pair:
/*
- * Allocate a new register matching the constraint
- * and move the temporary register into it.
+ * If an aliased input is not dead after the instruction,
+ * we must allocate a new register and move it.
*/
- temp_load(s, ts, tcg_target_available_regs[ts->type],
- i_allocated_regs, 0);
- reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs,
- i_preferred_regs, ts->indirect_base);
+ if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
+ TCGRegSet t_allocated_regs = i_allocated_regs;
+
+ /*
+ * Because of the alias, and the continued life, make sure
+ * that the temp is somewhere *other* than the reg pair,
+ * and we get a copy in reg.
+ */
+ tcg_regset_set_reg(t_allocated_regs, reg);
+ tcg_regset_set_reg(t_allocated_regs, reg + 1);
+ if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
+ /* If ts was already in reg, copy it somewhere else. */
+ TCGReg nr;
+ bool ok;
+
+ tcg_debug_assert(ts->kind != TEMP_FIXED);
+ nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
+ t_allocated_regs, 0, ts->indirect_base);
+ ok = tcg_out_mov(s, ts->type, nr, reg);
+ tcg_debug_assert(ok);
+
+ set_temp_val_reg(s, ts, nr);
+ } else {
+ temp_load(s, ts, tcg_target_available_regs[ts->type],
+ t_allocated_regs, 0);
+ copyto_new_reg = true;
+ }
+ } else {
+ /* Preferably allocate to reg, otherwise copy. */
+ i_required_regs = (TCGRegSet)1 << reg;
+ temp_load(s, ts, i_required_regs, i_allocated_regs,
+ i_preferred_regs);
+ copyto_new_reg = ts->reg != reg;
+ }
+ break;
+
+ default:
+ g_assert_not_reached();
+ }
+
+ if (copyto_new_reg) {
if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
/*
* Cross register class move not supported. Sync the
@@ -3656,15 +3953,46 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
/* ENV should not be modified. */
tcg_debug_assert(!temp_readonly(ts));
- if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
- reg = new_args[arg_ct->alias_index];
- } else if (arg_ct->newreg) {
- reg = tcg_reg_alloc(s, arg_ct->regs,
- i_allocated_regs | o_allocated_regs,
- op->output_pref[k], ts->indirect_base);
- } else {
- reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
- op->output_pref[k], ts->indirect_base);
+ switch (arg_ct->pair) {
+ case 0: /* not paired */
+ if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
+ reg = new_args[arg_ct->alias_index];
+ } else if (arg_ct->newreg) {
+ reg = tcg_reg_alloc(s, arg_ct->regs,
+ i_allocated_regs | o_allocated_regs,
+ op->output_pref[k], ts->indirect_base);
+ } else {
+ reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
+ op->output_pref[k], ts->indirect_base);
+ }
+ break;
+
+ case 1: /* first of pair */
+ tcg_debug_assert(!arg_ct->newreg);
+ if (arg_ct->oalias) {
+ reg = new_args[arg_ct->alias_index];
+ break;
+ }
+ reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
+ op->output_pref[k], ts->indirect_base);
+ break;
+
+ case 2: /* second of pair */
+ tcg_debug_assert(!arg_ct->newreg);
+ if (arg_ct->oalias) {
+ reg = new_args[arg_ct->alias_index];
+ } else {
+ reg = new_args[arg_ct->pair_index] + 1;
+ }
+ break;
+
+ case 3: /* first of pair, aliasing with a second input */
+ tcg_debug_assert(!arg_ct->newreg);
+ reg = new_args[arg_ct->pair_index] - 1;
+ break;
+
+ default:
+ g_assert_not_reached();
}
tcg_regset_set_reg(o_allocated_regs, reg);
set_temp_val_reg(s, ts, reg);
There are several instances where we need to be able to allocate a pair of registers to related inputs/outputs. Add 'p' and 'm' register constraints for this, in order to be able to allocate the even/odd register first or second. Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- include/tcg/tcg.h | 2 + tcg/tcg.c | 424 ++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 378 insertions(+), 48 deletions(-)