diff mbox series

[v1,08/13] plugins: expand the bb plugin to be thread safe and track per-cpu

Message ID 20200709141327.14631-9-alex.bennee@linaro.org
State Superseded
Headers show
Series misc rc0 fixes (docs, plugins, docker) | expand

Commit Message

Alex Bennée July 9, 2020, 2:13 p.m. UTC
While there isn't any easy way to make the inline counts thread safe
we can ensure the callback based ones are. While we are at it we can
reduce introduce a new option ("idle") to dump a report of the current
bb and insn count each time a vCPU enters the idle state.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

Cc: Dave Bort <dbort@dbort.com>

---
v2
  - fixup for non-inline linux-user case
  - minor cleanup and re-factor
---
 tests/plugin/bb.c | 96 ++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 83 insertions(+), 13 deletions(-)

-- 
2.20.1

Comments

Robert Foley July 9, 2020, 5:12 p.m. UTC | #1
Reviewed-by: Robert Foley <robert.foley@linaro.org>


On Thu, 9 Jul 2020 at 10:13, Alex Bennée <alex.bennee@linaro.org> wrote:
>

> While there isn't any easy way to make the inline counts thread safe

> we can ensure the callback based ones are. While we are at it we can

> reduce introduce a new option ("idle") to dump a report of the current

> bb and insn count each time a vCPU enters the idle state.

>

> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

> Cc: Dave Bort <dbort@dbort.com>

>

> ---

> v2

>   - fixup for non-inline linux-user case

>   - minor cleanup and re-factor

> ---

>  tests/plugin/bb.c | 96 ++++++++++++++++++++++++++++++++++++++++-------

>  1 file changed, 83 insertions(+), 13 deletions(-)

>

> diff --git a/tests/plugin/bb.c b/tests/plugin/bb.c

> index df19fd359df3..89c373e19cd8 100644

> --- a/tests/plugin/bb.c

> +++ b/tests/plugin/bb.c

> @@ -16,24 +16,67 @@

>

>  QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;

>

> -static uint64_t bb_count;

> -static uint64_t insn_count;

> +typedef struct {

> +    GMutex lock;

> +    int index;

> +    uint64_t bb_count;

> +    uint64_t insn_count;

> +} CPUCount;

> +

> +/* Used by the inline & linux-user counts */

>  static bool do_inline;

> +static CPUCount inline_count;

> +

> +/* Dump running CPU total on idle? */

> +static bool idle_report;

> +static GPtrArray *counts;

> +static int max_cpus;

> +

> +static void gen_one_cpu_report(CPUCount *count, GString *report)

> +{

> +    if (count->bb_count) {

> +        g_string_append_printf(report, "CPU%d: "

> +                               "bb's: %" PRIu64", insns: %" PRIu64 "\n",

> +                               count->index,

> +                               count->bb_count, count->insn_count);

> +    }

> +}

>

>  static void plugin_exit(qemu_plugin_id_t id, void *p)

>  {

> -    g_autofree gchar *out = g_strdup_printf(

> -        "bb's: %" PRIu64", insns: %" PRIu64 "\n",

> -        bb_count, insn_count);

> -    qemu_plugin_outs(out);

> +    g_autoptr(GString) report = g_string_new("");

> +

> +    if (do_inline || !max_cpus) {

> +        g_string_printf(report, "bb's: %" PRIu64", insns: %" PRIu64 "\n",

> +                        inline_count.bb_count, inline_count.insn_count);

> +    } else {

> +        g_ptr_array_foreach(counts, (GFunc) gen_one_cpu_report, report);

> +    }

> +    qemu_plugin_outs(report->str);

> +}

> +

> +static void vcpu_idle(qemu_plugin_id_t id, unsigned int cpu_index)

> +{

> +    CPUCount *count = g_ptr_array_index(counts, cpu_index);

> +    g_autoptr(GString) report = g_string_new("");

> +    gen_one_cpu_report(count, report);

> +

> +    if (report->len > 0) {

> +        g_string_prepend(report, "Idling ");

> +        qemu_plugin_outs(report->str);

> +    }

>  }

>

>  static void vcpu_tb_exec(unsigned int cpu_index, void *udata)

>  {

> -    unsigned long n_insns = (unsigned long)udata;

> +    CPUCount *count = max_cpus ?

> +        g_ptr_array_index(counts, cpu_index) : &inline_count;

>

> -    insn_count += n_insns;

> -    bb_count++;

> +    unsigned long n_insns = (unsigned long)udata;

> +    g_mutex_lock(&count->lock);

> +    count->insn_count += n_insns;

> +    count->bb_count++;

> +    g_mutex_unlock(&count->lock);

>  }

>

>  static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)

> @@ -42,9 +85,9 @@ static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)

>

>      if (do_inline) {

>          qemu_plugin_register_vcpu_tb_exec_inline(tb, QEMU_PLUGIN_INLINE_ADD_U64,

> -                                                 &bb_count, 1);

> +                                                 &inline_count.bb_count, 1);

>          qemu_plugin_register_vcpu_tb_exec_inline(tb, QEMU_PLUGIN_INLINE_ADD_U64,

> -                                                 &insn_count, n_insns);

> +                                                 &inline_count.insn_count, n_insns);

>      } else {

>          qemu_plugin_register_vcpu_tb_exec_cb(tb, vcpu_tb_exec,

>                                               QEMU_PLUGIN_CB_NO_REGS,

> @@ -56,8 +99,35 @@ QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,

>                                             const qemu_info_t *info,

>                                             int argc, char **argv)

>  {

> -    if (argc && strcmp(argv[0], "inline") == 0) {

> -        do_inline = true;

> +    int i;

> +

> +    for (i = 0; i < argc; i++) {

> +        char *opt = argv[i];

> +        if (g_strcmp0(opt, "inline") == 0) {

> +            do_inline = true;

> +        } else if (g_strcmp0(opt, "idle") == 0) {

> +            idle_report = true;

> +        } else {

> +            fprintf(stderr, "option parsing failed: %s\n", opt);

> +            return -1;

> +        }

> +    }

> +

> +    if (info->system_emulation && !do_inline) {

> +        max_cpus = info->system.max_vcpus;

> +        counts = g_ptr_array_new();

> +        for (i = 0; i < max_cpus; i++) {

> +            CPUCount *count = g_new0(CPUCount, 1);

> +            g_mutex_init(&count->lock);

> +            count->index = i;

> +            g_ptr_array_add(counts, count);

> +        }

> +    } else if (!do_inline) {

> +        g_mutex_init(&inline_count.lock);

> +    }

> +

> +    if (idle_report) {

> +        qemu_plugin_register_vcpu_idle_cb(id, vcpu_idle);

>      }

>

>      qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);

> --

> 2.20.1

>
Emilio Cota July 11, 2020, 9:56 p.m. UTC | #2
On Thu, Jul 09, 2020 at 15:13:22 +0100, Alex Bennée wrote:
> While there isn't any easy way to make the inline counts thread safe


Why not? At least in 64-bit hosts TCG will emit a single write to
update the 64-bit counter.

> we can ensure the callback based ones are. While we are at it we can

> reduce introduce a new option ("idle") to dump a report of the current


s/reduce//

> bb and insn count each time a vCPU enters the idle state.

> 

> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

> Cc: Dave Bort <dbort@dbort.com>

> 

> ---

> v2

>   - fixup for non-inline linux-user case

>   - minor cleanup and re-factor

> ---

>  tests/plugin/bb.c | 96 ++++++++++++++++++++++++++++++++++++++++-------

>  1 file changed, 83 insertions(+), 13 deletions(-)

> 

> diff --git a/tests/plugin/bb.c b/tests/plugin/bb.c

> index df19fd359df3..89c373e19cd8 100644

> --- a/tests/plugin/bb.c

> +++ b/tests/plugin/bb.c

> @@ -16,24 +16,67 @@

>  

>  QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;

>  

> -static uint64_t bb_count;

> -static uint64_t insn_count;

> +typedef struct {

> +    GMutex lock;

> +    int index;

> +    uint64_t bb_count;

> +    uint64_t insn_count;

> +} CPUCount;


Why use a mutex?

Just have a per-vCPU struct that each vCPU thread updates with atomic_write.
Then when we want to print a report we just have to collect the counts
with atomic_read().

Also, consider just adding a comment to bb.c noting that it is not thread-safe,
and having a separate bb-threadsafe.c plugin for patch. The reason is that bb.c is
very simple, which is useful to understand the interface.

Thanks,
		E.
Alex Bennée July 12, 2020, 9:48 a.m. UTC | #3
Emilio G. Cota <cota@braap.org> writes:

> On Thu, Jul 09, 2020 at 15:13:22 +0100, Alex Bennée wrote:

>> While there isn't any easy way to make the inline counts thread safe

>

> Why not? At least in 64-bit hosts TCG will emit a single write to

> update the 64-bit counter.


Well the operation is an add so that is a load/add/store cycle on
non-x86. If we want to do it properly we should expose an atomic add
operation which may be helper based or maybe generate an atomic
operation if the backend can support it easily.

>

>> we can ensure the callback based ones are. While we are at it we can

>> reduce introduce a new option ("idle") to dump a report of the current

>

> s/reduce//

>

>> bb and insn count each time a vCPU enters the idle state.

>> 

>> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

>> Cc: Dave Bort <dbort@dbort.com>

>> 

>> ---

>> v2

>>   - fixup for non-inline linux-user case

>>   - minor cleanup and re-factor

>> ---

>>  tests/plugin/bb.c | 96 ++++++++++++++++++++++++++++++++++++++++-------

>>  1 file changed, 83 insertions(+), 13 deletions(-)

>> 

>> diff --git a/tests/plugin/bb.c b/tests/plugin/bb.c

>> index df19fd359df3..89c373e19cd8 100644

>> --- a/tests/plugin/bb.c

>> +++ b/tests/plugin/bb.c

>> @@ -16,24 +16,67 @@

>>  

>>  QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;

>>  

>> -static uint64_t bb_count;

>> -static uint64_t insn_count;

>> +typedef struct {

>> +    GMutex lock;

>> +    int index;

>> +    uint64_t bb_count;

>> +    uint64_t insn_count;

>> +} CPUCount;

>

> Why use a mutex?

>

> Just have a per-vCPU struct that each vCPU thread updates with atomic_write.

> Then when we want to print a report we just have to collect the counts

> with atomic_read().

>

> Also, consider just adding a comment to bb.c noting that it is not thread-safe,

> and having a separate bb-threadsafe.c plugin for patch. The reason is that bb.c is

> very simple, which is useful to understand the interface.

>

> Thanks,

> 		E.



-- 
Alex Bennée
diff mbox series

Patch

diff --git a/tests/plugin/bb.c b/tests/plugin/bb.c
index df19fd359df3..89c373e19cd8 100644
--- a/tests/plugin/bb.c
+++ b/tests/plugin/bb.c
@@ -16,24 +16,67 @@ 
 
 QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
 
-static uint64_t bb_count;
-static uint64_t insn_count;
+typedef struct {
+    GMutex lock;
+    int index;
+    uint64_t bb_count;
+    uint64_t insn_count;
+} CPUCount;
+
+/* Used by the inline & linux-user counts */
 static bool do_inline;
+static CPUCount inline_count;
+
+/* Dump running CPU total on idle? */
+static bool idle_report;
+static GPtrArray *counts;
+static int max_cpus;
+
+static void gen_one_cpu_report(CPUCount *count, GString *report)
+{
+    if (count->bb_count) {
+        g_string_append_printf(report, "CPU%d: "
+                               "bb's: %" PRIu64", insns: %" PRIu64 "\n",
+                               count->index,
+                               count->bb_count, count->insn_count);
+    }
+}
 
 static void plugin_exit(qemu_plugin_id_t id, void *p)
 {
-    g_autofree gchar *out = g_strdup_printf(
-        "bb's: %" PRIu64", insns: %" PRIu64 "\n",
-        bb_count, insn_count);
-    qemu_plugin_outs(out);
+    g_autoptr(GString) report = g_string_new("");
+
+    if (do_inline || !max_cpus) {
+        g_string_printf(report, "bb's: %" PRIu64", insns: %" PRIu64 "\n",
+                        inline_count.bb_count, inline_count.insn_count);
+    } else {
+        g_ptr_array_foreach(counts, (GFunc) gen_one_cpu_report, report);
+    }
+    qemu_plugin_outs(report->str);
+}
+
+static void vcpu_idle(qemu_plugin_id_t id, unsigned int cpu_index)
+{
+    CPUCount *count = g_ptr_array_index(counts, cpu_index);
+    g_autoptr(GString) report = g_string_new("");
+    gen_one_cpu_report(count, report);
+
+    if (report->len > 0) {
+        g_string_prepend(report, "Idling ");
+        qemu_plugin_outs(report->str);
+    }
 }
 
 static void vcpu_tb_exec(unsigned int cpu_index, void *udata)
 {
-    unsigned long n_insns = (unsigned long)udata;
+    CPUCount *count = max_cpus ?
+        g_ptr_array_index(counts, cpu_index) : &inline_count;
 
-    insn_count += n_insns;
-    bb_count++;
+    unsigned long n_insns = (unsigned long)udata;
+    g_mutex_lock(&count->lock);
+    count->insn_count += n_insns;
+    count->bb_count++;
+    g_mutex_unlock(&count->lock);
 }
 
 static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
@@ -42,9 +85,9 @@  static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
 
     if (do_inline) {
         qemu_plugin_register_vcpu_tb_exec_inline(tb, QEMU_PLUGIN_INLINE_ADD_U64,
-                                                 &bb_count, 1);
+                                                 &inline_count.bb_count, 1);
         qemu_plugin_register_vcpu_tb_exec_inline(tb, QEMU_PLUGIN_INLINE_ADD_U64,
-                                                 &insn_count, n_insns);
+                                                 &inline_count.insn_count, n_insns);
     } else {
         qemu_plugin_register_vcpu_tb_exec_cb(tb, vcpu_tb_exec,
                                              QEMU_PLUGIN_CB_NO_REGS,
@@ -56,8 +99,35 @@  QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
                                            const qemu_info_t *info,
                                            int argc, char **argv)
 {
-    if (argc && strcmp(argv[0], "inline") == 0) {
-        do_inline = true;
+    int i;
+
+    for (i = 0; i < argc; i++) {
+        char *opt = argv[i];
+        if (g_strcmp0(opt, "inline") == 0) {
+            do_inline = true;
+        } else if (g_strcmp0(opt, "idle") == 0) {
+            idle_report = true;
+        } else {
+            fprintf(stderr, "option parsing failed: %s\n", opt);
+            return -1;
+        }
+    }
+
+    if (info->system_emulation && !do_inline) {
+        max_cpus = info->system.max_vcpus;
+        counts = g_ptr_array_new();
+        for (i = 0; i < max_cpus; i++) {
+            CPUCount *count = g_new0(CPUCount, 1);
+            g_mutex_init(&count->lock);
+            count->index = i;
+            g_ptr_array_add(counts, count);
+        }
+    } else if (!do_inline) {
+        g_mutex_init(&inline_count.lock);
+    }
+
+    if (idle_report) {
+        qemu_plugin_register_vcpu_idle_cb(id, vcpu_idle);
     }
 
     qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);