[01/35] accel/tcg: Move HMP info jit and info opcount code

Message ID	20231107024842.7650-2-richard.henderson@linaro.org
State	New
Headers	show Delivered-To: patch@linaro.org Received-SPF: pass (google.com: domain of qemu-devel-bounces+patch=linaro.org@nongnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; From: Richard Henderson <richard.henderson@linaro.org> To: qemu-devel@nongnu.org Cc: =?utf-8?q?Philippe_Mathieu-Daud=C3=A9?= <philmd@linaro.org>, =?utf-8?q?A?= =?utf-8?q?lex_Benn=C3=A9e?= <alex.bennee@linaro.org> Subject: [PATCH 01/35] accel/tcg: Move HMP info jit and info opcount code Date: Mon, 6 Nov 2023 18:48:08 -0800 Message-Id: <20231107024842.7650-2-richard.henderson@linaro.org> In-Reply-To: <20231107024842.7650-1-richard.henderson@linaro.org> References: <20231107024842.7650-1-richard.henderson@linaro.org> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Received-SPF: pass client-ip=2607:f8b0:4864:20::431; envelope-from=richard.henderson@linaro.org; helo=mail-pf1-x431.google.com X-Spam_score_int: -20 X-Spam_score: -2.1 X-Spam_bar: -- X-Spam_report: (-2.1 / 5.0 requ) BAYES_00=-1.9, DKIM_SIGNED=0.1, DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, DKIM_VALID_EF=-0.1, RCVD_IN_DNSWL_NONE=-0.0001, SPF_HELO_NONE=0.001, SPF_PASS=-0.001, T_SCC_BODY_TEXT_LINE=-0.01 autolearn=ham autolearn_force=no X-Spam_action: no action Precedence: list Errors-To: qemu-devel-bounces+patch=linaro.org@nongnu.org Sender: qemu-devel-bounces+patch=linaro.org@nongnu.org
Series	[01/35] accel/tcg: Move HMP info jit and info opcount code \| expand [01/35] accel/tcg: Move HMP info jit and info opcount code [02/35] tcg: Add C_N2_I1 [03/35] tcg/loongarch64: Use C_N2_I1 for INDEX_op_qemu_ld_a_i128 [04/35] util: Add cpuinfo for loongarch64 [05/35] tcg/loongarch64: Use cpuinfo.h [06/35] host/include/loongarch64: Add atomic16 load and store [07/35] accel/tcg: Remove redundant case in store_atom_16 [08/35] accel/tcg: Fix condition for store_atom_insert_al16 [09/35] tcg: Mark tcg_gen_op as noinline [10/35] tcg: Move tcg_gen_op* out of line [11/35] tcg: Move generic expanders out of line [12/35] tcg: Move 32-bit expanders out of line [13/35] tcg: Move 64-bit expanders out of line [14/35] tcg: Move vec_gen_* declarations to tcg-internal.h [15/35] tcg: Move tcg_gen_opN declarations to tcg-internal.h [16/35] tcg: Unexport tcg_gen_op_{i32,i64} [17/35] tcg: Move tcg_constant_ out of line [18/35] tcg: Move tcg_temp_new_, tcg_global_mem_new_ out of line [19/35] tcg: Move tcg_temp_free_* out of line [20/35] tcg/mips: Split out tcg_out_setcond_int [21/35] tcg/mips: Always implement movcond [22/35] tcg: Remove TCG_TARGET_HAS_movcond_{i32,i64} [23/35] tcg/mips: Implement neg opcodes [24/35] tcg/loongarch64: Implement neg opcodes [25/35] tcg: Remove TCG_TARGET_HAS_neg_{i32,i64} [26/35] tcg: Don't free vector results [27/35] tcg/optimize: Pipe OptContext into reset_ts [28/35] tcg/optimize: Split out cmp_better_copy [29/35] tcg/optimize: Optimize env memory operations [30/35] tcg: Eliminate duplicate env store operations [31/35] tcg/optimize: Split out arg_new_constant [32/35] tcg: Canonicalize subi to addi during opcode generation [33/35] tcg/optimize: Canonicalize subi to addi during optimization [34/35] tcg/optimize: Canonicalize sub2 with constants to add2 [35/35] tcg/sparc64: Implement tcg_out_extrl_i64_i32

diff --git a/accel/tcg/internal-common.h b/accel/tcg/internal-common.h index 3b2277e6e9..edefd0dcb7 100644 --- a/accel/tcg/internal-common.h +++ b/accel/tcg/internal-common.h @@ -14,8 +14,6 @@ extern int64_t max_delay; extern int64_t max_advance; -void dump_exec_info(GString *buf); - /* * Return true if CS is not running in parallel with other cpus, either * because there are no other cpus or we are within an exclusive context. diff --git a/include/exec/cputlb.h b/include/exec/cputlb.h index 19b16e58f8..6da1462c4f 100644 --- a/include/exec/cputlb.h +++ b/include/exec/cputlb.h @@ -26,6 +26,5 @@ /* cputlb.c */ void tlb_protect_code(ram_addr_t ram_addr); void tlb_unprotect_code(ram_addr_t ram_addr); -void tlb_flush_counts(size_t *full, size_t *part, size_t *elide); #endif #endif diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h index a9282cdcc6..3a4c0f124f 100644 --- a/include/tcg/tcg.h +++ b/include/tcg/tcg.h @@ -846,9 +846,6 @@ static inline TCGv_ptr tcg_temp_new_ptr(void) return temp_tcgv_ptr(t); } -void tcg_dump_info(GString *buf); -void tcg_dump_op_count(GString *buf); - #define TCG_CT_CONST 1 /* any constant of register size */ typedef struct TCGArgConstraint { diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index b8c5e345b8..13986820fe 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -321,21 +321,6 @@ static void flush_all_helper(CPUState *src, run_on_cpu_func fn, } } -void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide) -{ - CPUState *cpu; - size_t full = 0, part = 0, elide = 0; - - CPU_FOREACH(cpu) { - full += qatomic_read(&cpu->neg.tlb.c.full_flush_count); - part += qatomic_read(&cpu->neg.tlb.c.part_flush_count); - elide += qatomic_read(&cpu->neg.tlb.c.elide_flush_count); - } - *pfull = full; - *ppart = part; - *pelide = elide; -} - static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data) { uint16_t asked = data.host_int; diff --git a/accel/tcg/monitor.c b/accel/tcg/monitor.c index caf1189e0b..093efe9714 100644 --- a/accel/tcg/monitor.c +++ b/accel/tcg/monitor.c @@ -8,6 +8,7 @@ #include "qemu/osdep.h" #include "qemu/accel.h" +#include "qemu/qht.h" #include "qapi/error.h" #include "qapi/type-helpers.h" #include "qapi/qapi-commands-machine.h" @@ -17,6 +18,7 @@ #include "sysemu/tcg.h" #include "tcg/tcg.h" #include "internal-common.h" +#include "tb-context.h" static void dump_drift_info(GString *buf) @@ -50,6 +52,153 @@ static void dump_accel_info(GString *buf) one_insn_per_tb ? "on" : "off"); } +static void print_qht_statistics(struct qht_stats hst, GString *buf) +{ + uint32_t hgram_opts; + size_t hgram_bins; + char *hgram; + + if (!hst.head_buckets) { + return; + } + g_string_append_printf(buf, "TB hash buckets %zu/%zu " + "(%0.2f%% head buckets used)\n", + hst.used_head_buckets, hst.head_buckets, + (double)hst.used_head_buckets / + hst.head_buckets * 100); + + hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS; + hgram_opts |= QDIST_PR_100X | QDIST_PR_PERCENT; + if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) { + hgram_opts |= QDIST_PR_NODECIMAL; + } + hgram = qdist_pr(&hst.occupancy, 10, hgram_opts); + g_string_append_printf(buf, "TB hash occupancy %0.2f%% avg chain occ. " + "Histogram: %s\n", + qdist_avg(&hst.occupancy) * 100, hgram); + g_free(hgram); + + hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS; + hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain); + if (hgram_bins > 10) { + hgram_bins = 10; + } else { + hgram_bins = 0; + hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE; + } + hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts); + g_string_append_printf(buf, "TB hash avg chain %0.3f buckets. " + "Histogram: %s\n", + qdist_avg(&hst.chain), hgram); + g_free(hgram); +} + +struct tb_tree_stats { + size_t nb_tbs; + size_t host_size; + size_t target_size; + size_t max_target_size; + size_t direct_jmp_count; + size_t direct_jmp2_count; + size_t cross_page; +}; + +static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data) +{ + const TranslationBlock *tb = value; + struct tb_tree_stats *tst = data; + + tst->nb_tbs++; + tst->host_size += tb->tc.size; + tst->target_size += tb->size; + if (tb->size > tst->max_target_size) { + tst->max_target_size = tb->size; + } + if (tb->page_addr[1] != -1) { + tst->cross_page++; + } + if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) { + tst->direct_jmp_count++; + if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) { + tst->direct_jmp2_count++; + } + } + return false; +} + +static void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide) +{ + CPUState *cpu; + size_t full = 0, part = 0, elide = 0; + + CPU_FOREACH(cpu) { + full += qatomic_read(&cpu->neg.tlb.c.full_flush_count); + part += qatomic_read(&cpu->neg.tlb.c.part_flush_count); + elide += qatomic_read(&cpu->neg.tlb.c.elide_flush_count); + } + *pfull = full; + *ppart = part; + *pelide = elide; +} + +static void tcg_dump_info(GString *buf) +{ + g_string_append_printf(buf, "[TCG profiler not compiled]\n"); +} + +static void dump_exec_info(GString *buf) +{ + struct tb_tree_stats tst = {}; + struct qht_stats hst; + size_t nb_tbs, flush_full, flush_part, flush_elide; + + tcg_tb_foreach(tb_tree_stats_iter, &tst); + nb_tbs = tst.nb_tbs; + /* XXX: avoid using doubles ? */ + g_string_append_printf(buf, "Translation buffer state:\n"); + /* + * Report total code size including the padding and TB structs; + * otherwise users might think "-accel tcg,tb-size" is not honoured. + * For avg host size we use the precise numbers from tb_tree_stats though. + */ + g_string_append_printf(buf, "gen code size %zu/%zu\n", + tcg_code_size(), tcg_code_capacity()); + g_string_append_printf(buf, "TB count %zu\n", nb_tbs); + g_string_append_printf(buf, "TB avg target size %zu max=%zu bytes\n", + nb_tbs ? tst.target_size / nb_tbs : 0, + tst.max_target_size); + g_string_append_printf(buf, "TB avg host size %zu bytes " + "(expansion ratio: %0.1f)\n", + nb_tbs ? tst.host_size / nb_tbs : 0, + tst.target_size ? + (double)tst.host_size / tst.target_size : 0); + g_string_append_printf(buf, "cross page TB count %zu (%zu%%)\n", + tst.cross_page, + nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0); + g_string_append_printf(buf, "direct jump count %zu (%zu%%) " + "(2 jumps=%zu %zu%%)\n", + tst.direct_jmp_count, + nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0, + tst.direct_jmp2_count, + nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0); + + qht_statistics_init(&tb_ctx.htable, &hst); + print_qht_statistics(hst, buf); + qht_statistics_destroy(&hst); + + g_string_append_printf(buf, "\nStatistics:\n"); + g_string_append_printf(buf, "TB flush count %u\n", + qatomic_read(&tb_ctx.tb_flush_count)); + g_string_append_printf(buf, "TB invalidate count %u\n", + qatomic_read(&tb_ctx.tb_phys_invalidate_count)); + + tlb_flush_counts(&flush_full, &flush_part, &flush_elide); + g_string_append_printf(buf, "TLB full flushes %zu\n", flush_full); + g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part); + g_string_append_printf(buf, "TLB elided flushes %zu\n", flush_elide); + tcg_dump_info(buf); +} + HumanReadableText *qmp_x_query_jit(Error **errp) { g_autoptr(GString) buf = g_string_new(""); @@ -66,6 +215,11 @@ HumanReadableText *qmp_x_query_jit(Error **errp) return human_readable_text_from_str(buf); } +static void tcg_dump_op_count(GString *buf) +{ + g_string_append_printf(buf, "[TCG profiler not compiled]\n"); +} + HumanReadableText *qmp_x_query_opcount(Error **errp) { g_autoptr(GString) buf = g_string_new(""); diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 8cb6ad3511..e579b0891d 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -645,133 +645,6 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) cpu_loop_exit_noexc(cpu); } -static void print_qht_statistics(struct qht_stats hst, GString *buf) -{ - uint32_t hgram_opts; - size_t hgram_bins; - char *hgram; - - if (!hst.head_buckets) { - return; - } - g_string_append_printf(buf, "TB hash buckets %zu/%zu " - "(%0.2f%% head buckets used)\n", - hst.used_head_buckets, hst.head_buckets, - (double)hst.used_head_buckets / - hst.head_buckets * 100); - - hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS; - hgram_opts |= QDIST_PR_100X | QDIST_PR_PERCENT; - if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) { - hgram_opts |= QDIST_PR_NODECIMAL; - } - hgram = qdist_pr(&hst.occupancy, 10, hgram_opts); - g_string_append_printf(buf, "TB hash occupancy %0.2f%% avg chain occ. " - "Histogram: %s\n", - qdist_avg(&hst.occupancy) * 100, hgram); - g_free(hgram); - - hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS; - hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain); - if (hgram_bins > 10) { - hgram_bins = 10; - } else { - hgram_bins = 0; - hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE; - } - hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts); - g_string_append_printf(buf, "TB hash avg chain %0.3f buckets. " - "Histogram: %s\n", - qdist_avg(&hst.chain), hgram); - g_free(hgram); -} - -struct tb_tree_stats { - size_t nb_tbs; - size_t host_size; - size_t target_size; - size_t max_target_size; - size_t direct_jmp_count; - size_t direct_jmp2_count; - size_t cross_page; -}; - -static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data) -{ - const TranslationBlock *tb = value; - struct tb_tree_stats *tst = data; - - tst->nb_tbs++; - tst->host_size += tb->tc.size; - tst->target_size += tb->size; - if (tb->size > tst->max_target_size) { - tst->max_target_size = tb->size; - } - if (tb_page_addr1(tb) != -1) { - tst->cross_page++; - } - if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) { - tst->direct_jmp_count++; - if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) { - tst->direct_jmp2_count++; - } - } - return false; -} - -void dump_exec_info(GString *buf) -{ - struct tb_tree_stats tst = {}; - struct qht_stats hst; - size_t nb_tbs, flush_full, flush_part, flush_elide; - - tcg_tb_foreach(tb_tree_stats_iter, &tst); - nb_tbs = tst.nb_tbs; - /* XXX: avoid using doubles ? */ - g_string_append_printf(buf, "Translation buffer state:\n"); - /* - * Report total code size including the padding and TB structs; - * otherwise users might think "-accel tcg,tb-size" is not honoured. - * For avg host size we use the precise numbers from tb_tree_stats though. - */ - g_string_append_printf(buf, "gen code size %zu/%zu\n", - tcg_code_size(), tcg_code_capacity()); - g_string_append_printf(buf, "TB count %zu\n", nb_tbs); - g_string_append_printf(buf, "TB avg target size %zu max=%zu bytes\n", - nb_tbs ? tst.target_size / nb_tbs : 0, - tst.max_target_size); - g_string_append_printf(buf, "TB avg host size %zu bytes " - "(expansion ratio: %0.1f)\n", - nb_tbs ? tst.host_size / nb_tbs : 0, - tst.target_size ? - (double)tst.host_size / tst.target_size : 0); - g_string_append_printf(buf, "cross page TB count %zu (%zu%%)\n", - tst.cross_page, - nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0); - g_string_append_printf(buf, "direct jump count %zu (%zu%%) " - "(2 jumps=%zu %zu%%)\n", - tst.direct_jmp_count, - nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0, - tst.direct_jmp2_count, - nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0); - - qht_statistics_init(&tb_ctx.htable, &hst); - print_qht_statistics(hst, buf); - qht_statistics_destroy(&hst); - - g_string_append_printf(buf, "\nStatistics:\n"); - g_string_append_printf(buf, "TB flush count %u\n", - qatomic_read(&tb_ctx.tb_flush_count)); - g_string_append_printf(buf, "TB invalidate count %u\n", - qatomic_read(&tb_ctx.tb_phys_invalidate_count)); - - tlb_flush_counts(&flush_full, &flush_part, &flush_elide); - g_string_append_printf(buf, "TLB full flushes %zu\n", flush_full); - g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part); - g_string_append_printf(buf, "TLB elided flushes %zu\n", flush_elide); - tcg_dump_info(buf); -} - #else /* CONFIG_USER_ONLY */ void cpu_interrupt(CPUState *cpu, int mask) diff --git a/tcg/tcg.c b/tcg/tcg.c index 35158a0846..847d749a7e 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -5927,11 +5927,6 @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst, tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg); } -void tcg_dump_op_count(GString *buf) -{ - g_string_append_printf(buf, "[TCG profiler not compiled]\n"); -} - int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) { int i, start_words, num_insns; @@ -6128,11 +6123,6 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) return tcg_current_code_size(s); } -void tcg_dump_info(GString *buf) -{ - g_string_append_printf(buf, "[TCG profiler not compiled]\n"); -} - #ifdef ELF_HOST_MACHINE /* In order to use this feature, the backend needs to do three things:

[01/35] accel/tcg: Move HMP info jit and info opcount code

Commit Message

Patch