@@ -524,9 +524,35 @@ typedef struct TCGTemp {
typedef struct TCGContext TCGContext;
typedef struct TCGTempSet {
- unsigned long l[BITS_TO_LONGS(TCG_MAX_TEMPS)];
+ unsigned long *data;
+ size_t word_len;
} TCGTempSet;
+void tempset_init(TCGTempSet *set, size_t len);
+bool tempset_find_first(const TCGTempSet *set, size_t *i);
+void tempset_set(TCGTempSet *set, size_t i);
+
+static inline void tempset_clear_all(TCGTempSet *set)
+{
+ memset(set->data, 0, set->word_len * sizeof(unsigned long));
+}
+
+static inline void tempset_clear(TCGTempSet *set, size_t i)
+{
+ size_t l = i / BITS_PER_LONG;
+ size_t b = i % BITS_PER_LONG;
+ if (likely(l < set->word_len)) {
+ set->data[l] &= ~BIT(b);
+ }
+}
+
+static inline bool tempset_test(const TCGTempSet *set, size_t i)
+{
+ size_t l = i / BITS_PER_LONG;
+ size_t b = i % BITS_PER_LONG;
+ return l < set->word_len && (set->data[l] & BIT(b));
+}
+
/* While we limit helpers to 6 arguments, for 32-bit hosts, with padding,
this imples a max of 6*2 (64-bit in) + 2 (64-bit out) = 14 operands.
There are never more than 2 outputs, which means that we can store all
@@ -94,10 +94,10 @@ static void init_ts_info(TCGTempSet *temps_used, TCGTemp *ts)
size_t idx = temp_idx(ts);
TempOptInfo *ti;
- if (test_bit(idx, temps_used->l)) {
+ if (tempset_test(temps_used, idx)) {
return;
}
- set_bit(idx, temps_used->l);
+ tempset_set(temps_used, idx);
ti = ts->state_ptr;
if (ti == NULL) {
@@ -612,7 +612,7 @@ void tcg_optimize(TCGContext *s)
nb_temps = s->nb_temps;
nb_globals = s->nb_globals;
- memset(&temps_used, 0, sizeof(temps_used));
+ tempset_init(&temps_used, nb_temps);
for (i = 0; i < nb_temps; ++i) {
TCGTemp *ts = tcg_temp(s, i);
ts->state_ptr = NULL;
@@ -1254,7 +1254,7 @@ void tcg_optimize(TCGContext *s)
op->args[1], op->args[2]);
if (tmp != 2) {
if (tmp) {
- memset(&temps_used, 0, sizeof(temps_used));
+ tempset_clear_all(&temps_used);
op->opc = INDEX_op_br;
op->args[0] = op->args[3];
} else {
@@ -1338,7 +1338,7 @@ void tcg_optimize(TCGContext *s)
if (tmp != 2) {
if (tmp) {
do_brcond_true:
- memset(&temps_used, 0, sizeof(temps_used));
+ tempset_clear_all(&temps_used);
op->opc = INDEX_op_br;
op->args[0] = op->args[5];
} else {
@@ -1354,7 +1354,7 @@ void tcg_optimize(TCGContext *s)
/* Simplify LT/GE comparisons vs zero to a single compare
vs the high word of the input. */
do_brcond_high:
- memset(&temps_used, 0, sizeof(temps_used));
+ tempset_clear_all(&temps_used);
op->opc = INDEX_op_brcond_i32;
op->args[0] = op->args[1];
op->args[1] = op->args[3];
@@ -1380,7 +1380,7 @@ void tcg_optimize(TCGContext *s)
goto do_default;
}
do_brcond_low:
- memset(&temps_used, 0, sizeof(temps_used));
+ tempset_clear_all(&temps_used);
op->opc = INDEX_op_brcond_i32;
op->args[1] = op->args[2];
op->args[2] = op->args[4];
@@ -1485,7 +1485,7 @@ void tcg_optimize(TCGContext *s)
if (!(op->args[nb_oargs + nb_iargs + 1]
& (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
for (i = 0; i < nb_globals; i++) {
- if (test_bit(i, temps_used.l)) {
+ if (tempset_test(&temps_used, i)) {
reset_ts(tcg_temp(s, i));
}
}
@@ -1500,7 +1500,7 @@ void tcg_optimize(TCGContext *s)
block, otherwise we only trash the output args. "mask" is
the non-zero bits mask for the first output arg. */
if (def->flags & TCG_OPF_BB_END) {
- memset(&temps_used, 0, sizeof(temps_used));
+ tempset_clear_all(&temps_used);
} else {
do_reset_output:
for (i = 0; i < nb_oargs; i++) {
@@ -1182,7 +1182,9 @@ void tcg_func_start(TCGContext *s)
s->nb_temps = s->nb_globals;
/* No temps have been previously allocated for size or locality. */
- memset(s->free_temps, 0, sizeof(s->free_temps));
+ for (int i = 0; i < ARRAY_SIZE(s->free_temps); ++i) {
+ tempset_init(&s->free_temps[i], TCG_MAX_TEMPS);
+ }
/* No constant temps have been previously allocated. */
for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
@@ -1324,13 +1326,12 @@ TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
TCGContext *s = tcg_ctx;
TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL;
TCGTemp *ts;
- int idx, k;
+ size_t idx, k;
k = type + (temp_local ? TCG_TYPE_COUNT : 0);
- idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
- if (idx < TCG_MAX_TEMPS) {
+ if (tempset_find_first(&s->free_temps[k], &idx)) {
/* There is already an available temp with the right type. */
- clear_bit(idx, s->free_temps[k].l);
+ tempset_clear(&s->free_temps[k], idx);
ts = tcg_temp(s, idx);
ts->temp_allocated = 1;
@@ -1403,7 +1404,7 @@ TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
void tcg_temp_free_internal(TCGTemp *ts)
{
TCGContext *s = tcg_ctx;
- int k, idx;
+ size_t k, idx;
/* In order to simplify users of tcg_constant_*, silently ignore free. */
if (ts->kind == TEMP_CONST) {
@@ -1423,7 +1424,7 @@ void tcg_temp_free_internal(TCGTemp *ts)
idx = temp_idx(ts);
k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT);
- set_bit(idx, s->free_temps[k].l);
+ tempset_set(&s->free_temps[k], idx);
}
TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
@@ -4665,6 +4666,42 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
return tcg_current_code_size(s);
}
+void tempset_init(TCGTempSet *set, size_t len)
+{
+ size_t word_len = BITS_TO_LONGS(len);
+
+ set->word_len = word_len;
+ set->data = tcg_malloc(word_len * sizeof(unsigned long));
+ memset(set->data, 0, word_len * sizeof(unsigned long));
+}
+
+void tempset_set(TCGTempSet *set, size_t i)
+{
+ size_t l = i / BITS_PER_LONG;
+ size_t b = i % BITS_PER_LONG;
+
+ if (l >= set->word_len) {
+ size_t old_blen = set->word_len * sizeof(unsigned long);
+ size_t new_wlen = set->word_len * 2;
+ unsigned long *new_data = tcg_malloc(old_blen * 2);
+
+ memcpy(new_data, set->data, old_blen);
+ memset((char *)new_data + old_blen, 0, old_blen);
+
+ set->data = new_data;
+ set->word_len = new_wlen;
+ }
+ set->data[l] |= BIT(b);
+}
+
+bool tempset_find_first(const TCGTempSet *set, size_t *i)
+{
+ size_t max = set->word_len * BITS_PER_LONG;
+ size_t ret = find_first_bit(set->data, max);
+ *i = ret;
+ return ret < max;
+}
+
#ifdef CONFIG_PROFILER
void tcg_dump_info(void)
{
Introduce a complete set of operations on TCGTempSet, and do not directly call <qemu/bitops.h> functions. Expand the array as necessary on SET. Use the tcg allocation pool so that we do not have to worry about explicitly freeing the array. Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- include/tcg/tcg.h | 28 +++++++++++++++++++++++++- tcg/optimize.c | 18 ++++++++--------- tcg/tcg.c | 51 ++++++++++++++++++++++++++++++++++++++++------- 3 files changed, 80 insertions(+), 17 deletions(-) -- 2.25.1