Message ID | 20240226091446.479436-9-pierrick.bouvier@linaro.org |
---|---|
State | Superseded |
Headers | show |
Series | TCG Plugin inline operation enhancement | expand |
On 13:14 Mon 26 Feb , Pierrick Bouvier wrote: > Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org> Reviewed-by: Luc Michel <luc.michel@amd.com> > --- > tests/plugin/bb.c | 63 +++++++++++++++++++---------------------------- > 1 file changed, 26 insertions(+), 37 deletions(-) > > diff --git a/tests/plugin/bb.c b/tests/plugin/bb.c > index df50d1fd3bc..36776dee1e1 100644 > --- a/tests/plugin/bb.c > +++ b/tests/plugin/bb.c > @@ -17,27 +17,25 @@ > QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION; > > typedef struct { > - GMutex lock; > - int index; > uint64_t bb_count; > uint64_t insn_count; > } CPUCount; > > -/* Used by the inline & linux-user counts */ > +static struct qemu_plugin_scoreboard *counts; > +static qemu_plugin_u64 bb_count; > +static qemu_plugin_u64 insn_count; > + > static bool do_inline; > -static CPUCount inline_count; > - > /* Dump running CPU total on idle? */ > static bool idle_report; > -static GPtrArray *counts; > -static int max_cpus; > > -static void gen_one_cpu_report(CPUCount *count, GString *report) > +static void gen_one_cpu_report(CPUCount *count, GString *report, > + unsigned int cpu_index) > { > if (count->bb_count) { > g_string_append_printf(report, "CPU%d: " > "bb's: %" PRIu64", insns: %" PRIu64 "\n", > - count->index, > + cpu_index, > count->bb_count, count->insn_count); > } > } > @@ -46,20 +44,23 @@ static void plugin_exit(qemu_plugin_id_t id, void *p) > { > g_autoptr(GString) report = g_string_new(""); > > - if (do_inline || !max_cpus) { > - g_string_printf(report, "bb's: %" PRIu64", insns: %" PRIu64 "\n", > - inline_count.bb_count, inline_count.insn_count); > - } else { > - g_ptr_array_foreach(counts, (GFunc) gen_one_cpu_report, report); > + for (int i = 0; i < qemu_plugin_num_vcpus(); ++i) { > + CPUCount *count = qemu_plugin_scoreboard_find(counts, i); > + gen_one_cpu_report(count, report, i); > } > + g_string_append_printf(report, "Total: " > + "bb's: %" PRIu64", insns: %" PRIu64 "\n", > + qemu_plugin_u64_sum(bb_count), > + qemu_plugin_u64_sum(insn_count)); > qemu_plugin_outs(report->str); > + qemu_plugin_scoreboard_free(counts); > } > > static void vcpu_idle(qemu_plugin_id_t id, unsigned int cpu_index) > { > - CPUCount *count = g_ptr_array_index(counts, cpu_index); > + CPUCount *count = qemu_plugin_scoreboard_find(counts, cpu_index); > g_autoptr(GString) report = g_string_new(""); > - gen_one_cpu_report(count, report); > + gen_one_cpu_report(count, report, cpu_index); > > if (report->len > 0) { > g_string_prepend(report, "Idling "); > @@ -69,14 +70,11 @@ static void vcpu_idle(qemu_plugin_id_t id, unsigned int cpu_index) > > static void vcpu_tb_exec(unsigned int cpu_index, void *udata) > { > - CPUCount *count = max_cpus ? > - g_ptr_array_index(counts, cpu_index) : &inline_count; > + CPUCount *count = qemu_plugin_scoreboard_find(counts, cpu_index); > > uintptr_t n_insns = (uintptr_t)udata; > - g_mutex_lock(&count->lock); > count->insn_count += n_insns; > count->bb_count++; > - g_mutex_unlock(&count->lock); > } > > static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb) > @@ -84,11 +82,10 @@ static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb) > size_t n_insns = qemu_plugin_tb_n_insns(tb); > > if (do_inline) { > - qemu_plugin_register_vcpu_tb_exec_inline(tb, QEMU_PLUGIN_INLINE_ADD_U64, > - &inline_count.bb_count, 1); > - qemu_plugin_register_vcpu_tb_exec_inline(tb, QEMU_PLUGIN_INLINE_ADD_U64, > - &inline_count.insn_count, > - n_insns); > + qemu_plugin_register_vcpu_tb_exec_inline_per_vcpu( > + tb, QEMU_PLUGIN_INLINE_ADD_U64, bb_count, 1); > + qemu_plugin_register_vcpu_tb_exec_inline_per_vcpu( > + tb, QEMU_PLUGIN_INLINE_ADD_U64, insn_count, n_insns); > } else { > qemu_plugin_register_vcpu_tb_exec_cb(tb, vcpu_tb_exec, > QEMU_PLUGIN_CB_NO_REGS, > @@ -121,18 +118,10 @@ QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id, > } > } > > - if (info->system_emulation && !do_inline) { > - max_cpus = info->system.max_vcpus; > - counts = g_ptr_array_new(); > - for (i = 0; i < max_cpus; i++) { > - CPUCount *count = g_new0(CPUCount, 1); > - g_mutex_init(&count->lock); > - count->index = i; > - g_ptr_array_add(counts, count); > - } > - } else if (!do_inline) { > - g_mutex_init(&inline_count.lock); > - } > + counts = qemu_plugin_scoreboard_new(sizeof(CPUCount)); > + bb_count = qemu_plugin_scoreboard_u64_in_struct(counts, CPUCount, bb_count); > + insn_count = qemu_plugin_scoreboard_u64_in_struct( > + counts, CPUCount, insn_count); > > if (idle_report) { > qemu_plugin_register_vcpu_idle_cb(id, vcpu_idle); > -- > 2.43.0 > > --
Pierrick Bouvier <pierrick.bouvier@linaro.org> writes: > Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org> I did notice there is a discrepancy between what libisns and libb report. The libb looks like an overcount so I wonder if there are some instructions we are not picking up but I can't see where that would be. ➜ ./qemu-hppa -plugin ./tests/plugin/libinsn.so -plugin ./tests/plugin/libbb.so,inline=true -d plugin ./tests/tcg/hppa-linux-user/sha512 1..10 ok 1 - do_test(&tests[i]) ok 2 - do_test(&tests[i]) ok 3 - do_test(&tests[i]) ok 4 - do_test(&tests[i]) ok 5 - do_test(&tests[i]) ok 6 - do_test(&tests[i]) ok 7 - do_test(&tests[i]) ok 8 - do_test(&tests[i]) ok 9 - do_test(&tests[i]) ok 10 - do_test(&tests[i]) CPU0: bb's: 54282, insns: 775697 Total: bb's: 54282, insns: 775697 cpu 0 insns: 774827 total insns: 774827 Although weirdly maybe only an hppa thing. Richard? ➜ ./qemu-aarch64 -plugin ./tests/plugin/libinsn.so -plugin ./tests/plugin/libbb.so,inline=true -d plugin ./tests/tcg/aarch64-linux-user/sha512 1..10 ok 1 - do_test(&tests[i]) ok 2 - do_test(&tests[i]) ok 3 - do_test(&tests[i]) ok 4 - do_test(&tests[i]) ok 5 - do_test(&tests[i]) ok 6 - do_test(&tests[i]) ok 7 - do_test(&tests[i]) ok 8 - do_test(&tests[i]) ok 9 - do_test(&tests[i]) ok 10 - do_test(&tests[i]) CPU0: bb's: 41513, insns: 302671 Total: bb's: 41513, insns: 302671 cpu 0 insns: 302671 total insns: 302671 Anyway: Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
On 2/29/24 6:21 PM, Alex Bennée wrote: > Pierrick Bouvier <pierrick.bouvier@linaro.org> writes: > >> Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org> > > I did notice there is a discrepancy between what libisns and libb > report. The libb looks like an overcount so I wonder if there are some > instructions we are not picking up but I can't see where that would be. > > ➜ ./qemu-hppa -plugin ./tests/plugin/libinsn.so -plugin ./tests/plugin/libbb.so,inline=true -d plugin ./tests/tcg/hppa-linux-user/sha512 > 1..10 > ok 1 - do_test(&tests[i]) > ok 2 - do_test(&tests[i]) > ok 3 - do_test(&tests[i]) > ok 4 - do_test(&tests[i]) > ok 5 - do_test(&tests[i]) > ok 6 - do_test(&tests[i]) > ok 7 - do_test(&tests[i]) > ok 8 - do_test(&tests[i]) > ok 9 - do_test(&tests[i]) > ok 10 - do_test(&tests[i]) > CPU0: bb's: 54282, insns: 775697 > Total: bb's: 54282, insns: 775697 > cpu 0 insns: 774827 > total insns: 774827 > > Although weirdly maybe only an hppa thing. Richard? > Do you observe the exact same number if you run only one of the plugin? bb count number of instructions in an executed block, while insn effectively count every instructions ran. Maybe there is hppa specifity that makes some tb exit in the middle, thus executing less instructions than expected from bb count. I don't know how to reproduce this test. Did you run it from a specific docker env? > ➜ ./qemu-aarch64 -plugin ./tests/plugin/libinsn.so -plugin ./tests/plugin/libbb.so,inline=true -d plugin ./tests/tcg/aarch64-linux-user/sha512 > 1..10 > ok 1 - do_test(&tests[i]) > ok 2 - do_test(&tests[i]) > ok 3 - do_test(&tests[i]) > ok 4 - do_test(&tests[i]) > ok 5 - do_test(&tests[i]) > ok 6 - do_test(&tests[i]) > ok 7 - do_test(&tests[i]) > ok 8 - do_test(&tests[i]) > ok 9 - do_test(&tests[i]) > ok 10 - do_test(&tests[i]) > CPU0: bb's: 41513, insns: 302671 > Total: bb's: 41513, insns: 302671 > cpu 0 insns: 302671 > total insns: 302671 > > Anyway: > > Reviewed-by: Alex Bennée <alex.bennee@linaro.org> >
Pierrick Bouvier <pierrick.bouvier@linaro.org> writes: > On 2/29/24 6:21 PM, Alex Bennée wrote: >> Pierrick Bouvier <pierrick.bouvier@linaro.org> writes: >> >>> Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org> >> I did notice there is a discrepancy between what libisns and libb >> report. The libb looks like an overcount so I wonder if there are some >> instructions we are not picking up but I can't see where that would be. >> ➜ ./qemu-hppa -plugin ./tests/plugin/libinsn.so -plugin >> ./tests/plugin/libbb.so,inline=true -d plugin >> ./tests/tcg/hppa-linux-user/sha512 >> 1..10 >> ok 1 - do_test(&tests[i]) >> ok 2 - do_test(&tests[i]) >> ok 3 - do_test(&tests[i]) >> ok 4 - do_test(&tests[i]) >> ok 5 - do_test(&tests[i]) >> ok 6 - do_test(&tests[i]) >> ok 7 - do_test(&tests[i]) >> ok 8 - do_test(&tests[i]) >> ok 9 - do_test(&tests[i]) >> ok 10 - do_test(&tests[i]) >> CPU0: bb's: 54282, insns: 775697 >> Total: bb's: 54282, insns: 775697 >> cpu 0 insns: 774827 >> total insns: 774827 >> Although weirdly maybe only an hppa thing. Richard? >> > > Do you observe the exact same number if you run only one of the plugin? > > bb count number of instructions in an executed block, while insn > effectively count every instructions ran. > Maybe there is hppa specifity that makes some tb exit in the middle, > thus executing less instructions than expected from bb count. Almost certainly - I just wasn't sure what would do that on straight line code. Probably some funky aspect of HPPA I'm not aware off ;-) > > I don't know how to reproduce this test. Did you run it from a > specific docker env? If you have docker enabled the "make check-tcg" will build and use a container to build the test cases. If you are on debian you just need: apt install gcc-hppa-linux-gnu libc6-dev-hppa-cross and re-run configure. > >> ➜ ./qemu-aarch64 -plugin ./tests/plugin/libinsn.so -plugin ./tests/plugin/libbb.so,inline=true -d plugin ./tests/tcg/aarch64-linux-user/sha512 >> 1..10 >> ok 1 - do_test(&tests[i]) >> ok 2 - do_test(&tests[i]) >> ok 3 - do_test(&tests[i]) >> ok 4 - do_test(&tests[i]) >> ok 5 - do_test(&tests[i]) >> ok 6 - do_test(&tests[i]) >> ok 7 - do_test(&tests[i]) >> ok 8 - do_test(&tests[i]) >> ok 9 - do_test(&tests[i]) >> ok 10 - do_test(&tests[i]) >> CPU0: bb's: 41513, insns: 302671 >> Total: bb's: 41513, insns: 302671 >> cpu 0 insns: 302671 >> total insns: 302671 >> Anyway: >> Reviewed-by: Alex Bennée <alex.bennee@linaro.org> >>
On 3/1/24 2:26 PM, Alex Bennée wrote: > Pierrick Bouvier <pierrick.bouvier@linaro.org> writes: > >> On 2/29/24 6:21 PM, Alex Bennée wrote: >>> Pierrick Bouvier <pierrick.bouvier@linaro.org> writes: >>> >>>> Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org> >>> I did notice there is a discrepancy between what libisns and libb >>> report. The libb looks like an overcount so I wonder if there are some >>> instructions we are not picking up but I can't see where that would be. >>> ➜ ./qemu-hppa -plugin ./tests/plugin/libinsn.so -plugin >>> ./tests/plugin/libbb.so,inline=true -d plugin >>> ./tests/tcg/hppa-linux-user/sha512 >>> 1..10 >>> ok 1 - do_test(&tests[i]) >>> ok 2 - do_test(&tests[i]) >>> ok 3 - do_test(&tests[i]) >>> ok 4 - do_test(&tests[i]) >>> ok 5 - do_test(&tests[i]) >>> ok 6 - do_test(&tests[i]) >>> ok 7 - do_test(&tests[i]) >>> ok 8 - do_test(&tests[i]) >>> ok 9 - do_test(&tests[i]) >>> ok 10 - do_test(&tests[i]) >>> CPU0: bb's: 54282, insns: 775697 >>> Total: bb's: 54282, insns: 775697 >>> cpu 0 insns: 774827 >>> total insns: 774827 >>> Although weirdly maybe only an hppa thing. Richard? >>> >> >> Do you observe the exact same number if you run only one of the plugin? >> >> bb count number of instructions in an executed block, while insn >> effectively count every instructions ran. >> Maybe there is hppa specifity that makes some tb exit in the middle, >> thus executing less instructions than expected from bb count. > > Almost certainly - I just wasn't sure what would do that on straight > line code. Probably some funky aspect of HPPA I'm not aware off ;-) > >> >> I don't know how to reproduce this test. Did you run it from a >> specific docker env? > > If you have docker enabled the "make check-tcg" will build and use a > container to build the test cases. If you are on debian you just need: > > apt install gcc-hppa-linux-gnu libc6-dev-hppa-cross > > and re-run configure. > Thanks. The difference observed predates this series, so there should definitely be something specific to this arch. >> >>> ➜ ./qemu-aarch64 -plugin ./tests/plugin/libinsn.so -plugin ./tests/plugin/libbb.so,inline=true -d plugin ./tests/tcg/aarch64-linux-user/sha512 >>> 1..10 >>> ok 1 - do_test(&tests[i]) >>> ok 2 - do_test(&tests[i]) >>> ok 3 - do_test(&tests[i]) >>> ok 4 - do_test(&tests[i]) >>> ok 5 - do_test(&tests[i]) >>> ok 6 - do_test(&tests[i]) >>> ok 7 - do_test(&tests[i]) >>> ok 8 - do_test(&tests[i]) >>> ok 9 - do_test(&tests[i]) >>> ok 10 - do_test(&tests[i]) >>> CPU0: bb's: 41513, insns: 302671 >>> Total: bb's: 41513, insns: 302671 >>> cpu 0 insns: 302671 >>> total insns: 302671 >>> Anyway: >>> Reviewed-by: Alex Bennée <alex.bennee@linaro.org> >>> >
diff --git a/tests/plugin/bb.c b/tests/plugin/bb.c index df50d1fd3bc..36776dee1e1 100644 --- a/tests/plugin/bb.c +++ b/tests/plugin/bb.c @@ -17,27 +17,25 @@ QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION; typedef struct { - GMutex lock; - int index; uint64_t bb_count; uint64_t insn_count; } CPUCount; -/* Used by the inline & linux-user counts */ +static struct qemu_plugin_scoreboard *counts; +static qemu_plugin_u64 bb_count; +static qemu_plugin_u64 insn_count; + static bool do_inline; -static CPUCount inline_count; - /* Dump running CPU total on idle? */ static bool idle_report; -static GPtrArray *counts; -static int max_cpus; -static void gen_one_cpu_report(CPUCount *count, GString *report) +static void gen_one_cpu_report(CPUCount *count, GString *report, + unsigned int cpu_index) { if (count->bb_count) { g_string_append_printf(report, "CPU%d: " "bb's: %" PRIu64", insns: %" PRIu64 "\n", - count->index, + cpu_index, count->bb_count, count->insn_count); } } @@ -46,20 +44,23 @@ static void plugin_exit(qemu_plugin_id_t id, void *p) { g_autoptr(GString) report = g_string_new(""); - if (do_inline || !max_cpus) { - g_string_printf(report, "bb's: %" PRIu64", insns: %" PRIu64 "\n", - inline_count.bb_count, inline_count.insn_count); - } else { - g_ptr_array_foreach(counts, (GFunc) gen_one_cpu_report, report); + for (int i = 0; i < qemu_plugin_num_vcpus(); ++i) { + CPUCount *count = qemu_plugin_scoreboard_find(counts, i); + gen_one_cpu_report(count, report, i); } + g_string_append_printf(report, "Total: " + "bb's: %" PRIu64", insns: %" PRIu64 "\n", + qemu_plugin_u64_sum(bb_count), + qemu_plugin_u64_sum(insn_count)); qemu_plugin_outs(report->str); + qemu_plugin_scoreboard_free(counts); } static void vcpu_idle(qemu_plugin_id_t id, unsigned int cpu_index) { - CPUCount *count = g_ptr_array_index(counts, cpu_index); + CPUCount *count = qemu_plugin_scoreboard_find(counts, cpu_index); g_autoptr(GString) report = g_string_new(""); - gen_one_cpu_report(count, report); + gen_one_cpu_report(count, report, cpu_index); if (report->len > 0) { g_string_prepend(report, "Idling "); @@ -69,14 +70,11 @@ static void vcpu_idle(qemu_plugin_id_t id, unsigned int cpu_index) static void vcpu_tb_exec(unsigned int cpu_index, void *udata) { - CPUCount *count = max_cpus ? - g_ptr_array_index(counts, cpu_index) : &inline_count; + CPUCount *count = qemu_plugin_scoreboard_find(counts, cpu_index); uintptr_t n_insns = (uintptr_t)udata; - g_mutex_lock(&count->lock); count->insn_count += n_insns; count->bb_count++; - g_mutex_unlock(&count->lock); } static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb) @@ -84,11 +82,10 @@ static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb) size_t n_insns = qemu_plugin_tb_n_insns(tb); if (do_inline) { - qemu_plugin_register_vcpu_tb_exec_inline(tb, QEMU_PLUGIN_INLINE_ADD_U64, - &inline_count.bb_count, 1); - qemu_plugin_register_vcpu_tb_exec_inline(tb, QEMU_PLUGIN_INLINE_ADD_U64, - &inline_count.insn_count, - n_insns); + qemu_plugin_register_vcpu_tb_exec_inline_per_vcpu( + tb, QEMU_PLUGIN_INLINE_ADD_U64, bb_count, 1); + qemu_plugin_register_vcpu_tb_exec_inline_per_vcpu( + tb, QEMU_PLUGIN_INLINE_ADD_U64, insn_count, n_insns); } else { qemu_plugin_register_vcpu_tb_exec_cb(tb, vcpu_tb_exec, QEMU_PLUGIN_CB_NO_REGS, @@ -121,18 +118,10 @@ QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id, } } - if (info->system_emulation && !do_inline) { - max_cpus = info->system.max_vcpus; - counts = g_ptr_array_new(); - for (i = 0; i < max_cpus; i++) { - CPUCount *count = g_new0(CPUCount, 1); - g_mutex_init(&count->lock); - count->index = i; - g_ptr_array_add(counts, count); - } - } else if (!do_inline) { - g_mutex_init(&inline_count.lock); - } + counts = qemu_plugin_scoreboard_new(sizeof(CPUCount)); + bb_count = qemu_plugin_scoreboard_u64_in_struct(counts, CPUCount, bb_count); + insn_count = qemu_plugin_scoreboard_u64_in_struct( + counts, CPUCount, insn_count); if (idle_report) { qemu_plugin_register_vcpu_idle_cb(id, vcpu_idle);
Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org> --- tests/plugin/bb.c | 63 +++++++++++++++++++---------------------------- 1 file changed, 26 insertions(+), 37 deletions(-)