@@ -88,39 +88,6 @@ void dump_jit_profile_info(TCGProfile *s, GString *buf)
jpi->host / (double) jpi->translations);
g_string_append_printf(buf, "avg search data/TB %0.1f\n",
jpi->search_data / (double) jpi->translations);
-
- if (s) {
- int64_t tot = s->interm_time + s->code_time;
- g_string_append_printf(buf, "JIT cycles %" PRId64
- " (%0.3f s at 2.4 GHz)\n",
- tot, tot / 2.4e9);
- g_string_append_printf(buf, "cycles/op %0.1f\n",
- jpi->ops ? (double)tot / jpi->ops : 0);
- g_string_append_printf(buf, "cycles/in byte %0.1f\n",
- jpi->guest ? (double)tot / jpi->guest : 0);
- g_string_append_printf(buf, "cycles/out byte %0.1f\n",
- jpi->host ? (double)tot / jpi->host : 0);
- g_string_append_printf(buf, "cycles/search byte %0.1f\n",
- jpi->search_data ? (double)tot / jpi->search_data : 0);
- if (tot == 0) {
- tot = 1;
- }
- g_string_append_printf(buf, " gen_interm time %0.1f%%\n",
- (double)s->interm_time / tot * 100.0);
- g_string_append_printf(buf, " gen_code time %0.1f%%\n",
- (double)s->code_time / tot * 100.0);
- g_string_append_printf(buf, "optim./code time %0.1f%%\n",
- (double)s->opt_time / (s->code_time ? s->code_time : 1)
- * 100.0);
- g_string_append_printf(buf, "liveness/code time %0.1f%%\n",
- (double)s->la_time / (s->code_time ? s->code_time : 1)
- * 100.0);
- g_string_append_printf(buf, "cpu_restore count %" PRId64 "\n",
- s->restore_count);
- g_string_append_printf(buf, " avg cycles %0.1f\n",
- s->restore_count ?
- (double)s->restore_time / s->restore_count : 0);
- }
}
g_free(jpi);
}
@@ -282,8 +282,9 @@ void page_init(void)
*/
static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb,
target_ulong pc, void *host_pc,
- int *max_insns, int64_t *ti)
+ int *max_insns)
{
+ TCGProfile *prof = &tcg_ctx->prof;
int ret = sigsetjmp(tcg_ctx->jmp_trans, 0);
if (unlikely(ret != 0)) {
return ret;
@@ -297,11 +298,9 @@ static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb,
tcg_ctx->cpu = NULL;
*max_insns = tb->icount;
-#ifdef CONFIG_PROFILER
- qatomic_set(&tcg_ctx->prof.interm_time,
- tcg_ctx->prof.interm_time + profile_getclock() - *ti);
- *ti = profile_getclock();
-#endif
+ if (tb_stats_enabled(tb, TB_JIT_TIME)) {
+ prof->gen_ir_done_time = profile_getclock();
+ }
return tcg_gen_code(tcg_ctx, tb, pc);
}
@@ -352,7 +351,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
tcg_insn_unit *gen_code_buf;
int gen_code_size, search_size, max_insns;
TCGProfile *prof = &tcg_ctx->prof;
- int64_t ti;
void *host_pc;
assert_memory_lock();
@@ -403,10 +401,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
tb_overflow:
-#ifdef CONFIG_PROFILER
- ti = profile_getclock();
-#endif
-
trace_translate_block(tb, pc, tb->tc.ptr);
/*
@@ -418,11 +412,14 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
if (tb_stats_collection_enabled() &&
qemu_log_in_addr_range(tb->pc)) {
tb->tb_stats = tb_get_stats(phys_pc, pc, cs_base, flags);
+ if (tb_stats_enabled(tb, TB_JIT_TIME)) {
+ prof->gen_start_time = profile_getclock();
+ }
} else {
tb->tb_stats = NULL;
}
- gen_code_size = setjmp_gen_code(env, tb, pc, host_pc, &max_insns, &ti);
+ gen_code_size = setjmp_gen_code(env, tb, pc, host_pc, &max_insns);
if (unlikely(gen_code_size < 0)) {
switch (gen_code_size) {
case -1:
@@ -474,9 +471,9 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
*/
perf_report_code(pc, tb, tcg_splitwx_to_rx(gen_code_buf));
-#ifdef CONFIG_PROFILER
- qatomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti);
-#endif
+ if (tb_stats_enabled(tb, TB_JIT_TIME)) {
+ prof->gen_code_done_time = profile_getclock();
+ }
#ifdef DEBUG_DISAS
if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
@@ -586,26 +583,38 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
* Collect JIT stats when enabled. We batch them all up here to
* avoid spamming the cache with atomic accesses
*/
- if (tb_stats_enabled(tb, TB_JIT_STATS)) {
+ if (tb_stats_enabled(tb, (TB_JIT_STATS | TB_JIT_TIME))) {
TBStatistics *ts = tb->tb_stats;
qemu_mutex_lock(&ts->jit_stats_lock);
- ts->code.num_guest_inst += prof->translation.nb_guest_insns;
- ts->code.num_tcg_ops += prof->translation.nb_ops_pre_opt;
- ts->code.num_tcg_ops_opt += tcg_ctx->nb_ops;
- ts->code.spills += prof->translation.nb_spills;
- ts->code.temps += prof->translation.temp_count;
- ts->code.deleted_ops += prof->translation.del_op_count;
- ts->code.in_len += tb->size;
- ts->code.out_len += tb->tc.size;
- ts->code.search_out_len += search_size;
-
- ts->translations.total++;
- if (tb_page_addr1(tb) != -1) {
- ts->translations.spanning++;
+ if (tb_stats_enabled(tb, TB_JIT_STATS)) {
+ ts->code.num_guest_inst += prof->translation.nb_guest_insns;
+ ts->code.num_tcg_ops += prof->translation.nb_ops_pre_opt;
+ ts->code.num_tcg_ops_opt += tcg_ctx->nb_ops;
+ ts->code.spills += prof->translation.nb_spills;
+ ts->code.temps += prof->translation.temp_count;
+ ts->code.deleted_ops += prof->translation.del_op_count;
+ ts->code.in_len += tb->size;
+ ts->code.out_len += tb->tc.size;
+ ts->code.search_out_len += search_size;
+
+ ts->translations.total++;
+ if (tb_page_addr1(tb) != -1) {
+ ts->translations.spanning++;
+ }
+
+ g_ptr_array_add(ts->tbs, tb);
}
- g_ptr_array_add(ts->tbs, tb);
+ if (tb_stats_enabled(tb, TB_JIT_TIME)) {
+ ts->gen_times.ir += prof->gen_ir_done_time - prof->gen_start_time;
+ ts->gen_times.ir_opt +=
+ prof->gen_opt_done_time - prof->gen_ir_done_time;
+ ts->gen_times.la +=
+ prof->gen_la_done_time - prof->gen_opt_done_time;
+ ts->gen_times.code +=
+ prof->gen_code_done_time - prof->gen_la_done_time;
+ }
qemu_mutex_unlock(&ts->jit_stats_lock);
}
@@ -97,6 +97,12 @@ struct TBStatistics {
uint64_t tb_restore_time;
uint64_t tb_restore_count;
+ struct {
+ uint64_t ir;
+ uint64_t ir_opt;
+ uint64_t la;
+ uint64_t code;
+ } gen_times;
};
bool tb_stats_cmp(const void *ap, const void *bp);
@@ -104,5 +110,6 @@ bool tb_stats_cmp(const void *ap, const void *bp);
void init_tb_stats_htable(void);
void dump_jit_profile_info(TCGProfile *s, GString *buf);
+void dump_jit_exec_time_info(uint64_t dev_time);
#endif
@@ -557,13 +557,13 @@ typedef struct TCGProfile {
int64_t code_in_len;
int64_t code_out_len;
int64_t search_out_len;
- int64_t interm_time;
- int64_t code_time;
- int64_t la_time;
- int64_t opt_time;
- int64_t restore_count;
- int64_t restore_time;
- int64_t table_op_count[NB_OPS];
+
+ /* Timestamps during translation */
+ uint64_t gen_start_time;
+ uint64_t gen_ir_done_time;
+ uint64_t gen_opt_done_time;
+ uint64_t gen_la_done_time;
+ uint64_t gen_code_done_time;
} TCGProfile;
struct TCGContext {
@@ -6005,18 +6005,13 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
}
#endif
-#ifdef CONFIG_PROFILER
- qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
-#endif
-
#ifdef USE_TCG_OPTIMIZATIONS
tcg_optimize(s);
#endif
-#ifdef CONFIG_PROFILER
- qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
- qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
-#endif
+ if (tb_stats_enabled(tb, TB_JIT_TIME)) {
+ prof->gen_opt_done_time = profile_getclock();
+ }
reachable_code_pass(s);
liveness_pass_0(s);
@@ -6042,9 +6037,9 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
}
}
-#ifdef CONFIG_PROFILER
- qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
-#endif
+ if (tb_stats_enabled(tb, TB_JIT_TIME)) {
+ prof->gen_la_done_time = profile_getclock();
+ }
#ifdef DEBUG_DISAS
if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)