diff mbox series

[v4,2/7] plugins: save value during memory accesses

Message ID 20240702184448.551705-3-pierrick.bouvier@linaro.org
State Superseded
Headers show
Series plugins: access values during a memory read/write | expand

Commit Message

Pierrick Bouvier July 2, 2024, 6:44 p.m. UTC
Different code paths handle memory accesses:
- tcg generated code
- load/store helpers
- atomic helpers

This value is saved in cpu->plugin_state.

Atomic operations are doing read/write at the same time, so we generate
two memory callbacks instead of one, to allow plugins to access distinct
values.

For now, we can have access only up to 128 bits, thus split this in two
64 bits words. When QEMU will support wider operations, we'll be able to
reconsider this.

Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
---
 accel/tcg/atomic_template.h   | 66 ++++++++++++++++++++++++++++----
 include/qemu/plugin.h         |  8 ++++
 plugins/core.c                |  7 ++++
 tcg/tcg-op-ldst.c             | 72 +++++++++++++++++++++++++++++++----
 accel/tcg/atomic_common.c.inc | 13 ++++++-
 accel/tcg/ldst_common.c.inc   | 38 +++++++++++-------
 6 files changed, 173 insertions(+), 31 deletions(-)

Comments

Richard Henderson July 3, 2024, 6:58 p.m. UTC | #1
On 7/2/24 11:44, Pierrick Bouvier wrote:
> Different code paths handle memory accesses:
> - tcg generated code
> - load/store helpers
> - atomic helpers
> 
> This value is saved in cpu->plugin_state.
> 
> Atomic operations are doing read/write at the same time, so we generate
> two memory callbacks instead of one, to allow plugins to access distinct
> values.
> 
> For now, we can have access only up to 128 bits, thus split this in two
> 64 bits words. When QEMU will support wider operations, we'll be able to
> reconsider this.
> 
> Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
> ---
>   accel/tcg/atomic_template.h   | 66 ++++++++++++++++++++++++++++----
>   include/qemu/plugin.h         |  8 ++++
>   plugins/core.c                |  7 ++++
>   tcg/tcg-op-ldst.c             | 72 +++++++++++++++++++++++++++++++----
>   accel/tcg/atomic_common.c.inc | 13 ++++++-
>   accel/tcg/ldst_common.c.inc   | 38 +++++++++++-------
>   6 files changed, 173 insertions(+), 31 deletions(-)

It looks correct so,
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>

Possibilities for follow-up improvement:


> --- a/tcg/tcg-op-ldst.c
> +++ b/tcg/tcg-op-ldst.c
> @@ -148,14 +148,24 @@ static TCGv_i64 plugin_maybe_preserve_addr(TCGTemp *addr)
>       return NULL;
>   }
>   
> +#ifdef CONFIG_PLUGIN
>   static void
> -plugin_gen_mem_callbacks(TCGv_i64 copy_addr, TCGTemp *orig_addr, MemOpIdx oi,
> +plugin_gen_mem_callbacks(TCGv_i64 value_low, TCGv_i64 value_high,
> +                         TCGv_i64 copy_addr, TCGTemp *orig_addr, MemOpIdx oi,
>                            enum qemu_plugin_mem_rw rw)
>   {
> -#ifdef CONFIG_PLUGIN
>       if (tcg_ctx->plugin_insn != NULL) {
>           qemu_plugin_meminfo_t info = make_plugin_meminfo(oi, rw);
>   
> +        TCGv_ptr plugin_state = tcg_temp_ebb_new_ptr();
> +        tcg_gen_ld_ptr(plugin_state, tcg_env,
> +                       offsetof(CPUState, plugin_state) - sizeof(CPUState));
> +        tcg_gen_st_i64(value_low, plugin_state,
> +                       offsetof(CPUPluginState, mem_value_low));
> +        tcg_gen_st_i64(value_high, plugin_state,
> +                       offsetof(CPUPluginState, mem_value_high));

Maybe better to place this data at the beginning of CPUNegativeOffsetState?
This would eliminate a load dependency and most hosts would be able to use (relatively) 
small negative offset efficiently.

> +static void
> +plugin_gen_mem_callbacks_i32(TCGv_i32 val,
> +                             TCGv_i64 copy_addr, TCGTemp *orig_addr,
> +                             MemOpIdx oi, enum qemu_plugin_mem_rw rw)
> +{
> +#ifdef CONFIG_PLUGIN
> +    if (tcg_ctx->plugin_insn != NULL) {
> +        TCGv_i64 ext_val = tcg_temp_ebb_new_i64();
> +        tcg_gen_extu_i32_i64(ext_val, val);
> +        plugin_gen_mem_callbacks(ext_val, tcg_constant_i64(0),

This zero extension got me to thinking:
(1) why zero extension and not sign-extension based on MO_SIGN from oi?
(2) given that the callback will have oi, do we really need any extension
     at all here?  We could allow the bits outside oi be garbage.
     This would eliminate the store to value_high entirely for most ops,
     and would allow this i32 op to avoid the extension -- simply perform
     a 32-bit store into the low half of value_low.

That appears to be what you're doing anyway with qemu_plugin_mem_value in the next patch.


r~
Pierrick Bouvier July 5, 2024, 12:33 a.m. UTC | #2
On 7/3/24 11:58, Richard Henderson wrote:
> On 7/2/24 11:44, Pierrick Bouvier wrote:
>> Different code paths handle memory accesses:
>> - tcg generated code
>> - load/store helpers
>> - atomic helpers
>>
>> This value is saved in cpu->plugin_state.
>>
>> Atomic operations are doing read/write at the same time, so we generate
>> two memory callbacks instead of one, to allow plugins to access distinct
>> values.
>>
>> For now, we can have access only up to 128 bits, thus split this in two
>> 64 bits words. When QEMU will support wider operations, we'll be able to
>> reconsider this.
>>
>> Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
>> ---
>>    accel/tcg/atomic_template.h   | 66 ++++++++++++++++++++++++++++----
>>    include/qemu/plugin.h         |  8 ++++
>>    plugins/core.c                |  7 ++++
>>    tcg/tcg-op-ldst.c             | 72 +++++++++++++++++++++++++++++++----
>>    accel/tcg/atomic_common.c.inc | 13 ++++++-
>>    accel/tcg/ldst_common.c.inc   | 38 +++++++++++-------
>>    6 files changed, 173 insertions(+), 31 deletions(-)
> 
> It looks correct so,
> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
> 
> Possibilities for follow-up improvement:
> 
> 
>> --- a/tcg/tcg-op-ldst.c
>> +++ b/tcg/tcg-op-ldst.c
>> @@ -148,14 +148,24 @@ static TCGv_i64 plugin_maybe_preserve_addr(TCGTemp *addr)
>>        return NULL;
>>    }
>>    
>> +#ifdef CONFIG_PLUGIN
>>    static void
>> -plugin_gen_mem_callbacks(TCGv_i64 copy_addr, TCGTemp *orig_addr, MemOpIdx oi,
>> +plugin_gen_mem_callbacks(TCGv_i64 value_low, TCGv_i64 value_high,
>> +                         TCGv_i64 copy_addr, TCGTemp *orig_addr, MemOpIdx oi,
>>                             enum qemu_plugin_mem_rw rw)
>>    {
>> -#ifdef CONFIG_PLUGIN
>>        if (tcg_ctx->plugin_insn != NULL) {
>>            qemu_plugin_meminfo_t info = make_plugin_meminfo(oi, rw);
>>    
>> +        TCGv_ptr plugin_state = tcg_temp_ebb_new_ptr();
>> +        tcg_gen_ld_ptr(plugin_state, tcg_env,
>> +                       offsetof(CPUState, plugin_state) - sizeof(CPUState));
>> +        tcg_gen_st_i64(value_low, plugin_state,
>> +                       offsetof(CPUPluginState, mem_value_low));
>> +        tcg_gen_st_i64(value_high, plugin_state,
>> +                       offsetof(CPUPluginState, mem_value_high));
> 
> Maybe better to place this data at the beginning of CPUNegativeOffsetState?
> This would eliminate a load dependency and most hosts would be able to use (relatively)
> small negative offset efficiently.
> 

That's a good suggestion. Moved it here for v5.

>> +static void
>> +plugin_gen_mem_callbacks_i32(TCGv_i32 val,
>> +                             TCGv_i64 copy_addr, TCGTemp *orig_addr,
>> +                             MemOpIdx oi, enum qemu_plugin_mem_rw rw)
>> +{
>> +#ifdef CONFIG_PLUGIN
>> +    if (tcg_ctx->plugin_insn != NULL) {
>> +        TCGv_i64 ext_val = tcg_temp_ebb_new_i64();
>> +        tcg_gen_extu_i32_i64(ext_val, val);
>> +        plugin_gen_mem_callbacks(ext_val, tcg_constant_i64(0),
> 
> This zero extension got me to thinking:
> (1) why zero extension and not sign-extension based on MO_SIGN from oi?

This was to truncate upper value, but as you mentioned, I simply clip it 
later, so it's not important.

> (2) given that the callback will have oi, do we really need any extension
>       at all here?  We could allow the bits outside oi be garbage.
>       This would eliminate the store to value_high entirely for most ops,
>       and would allow this i32 op to avoid the extension -- simply perform
>       a 32-bit store into the low half of value_low.
> 

I implemented selective store for plugin_gen_mem_callbacks function for 
next version.

However, for helpers based memory accesses, I prefer to keep existing 
version. It would require to create several small functions 
(atomic_trace_rmw_post, plugin_load/store_cb and qemu_plugin_vcpu_mem_cb 
for every size). It's something that could be desirable later when we'll 
introduce bigger tcg word size though.

> That appears to be what you're doing anyway with qemu_plugin_mem_value in the next patch.
> 
> 
> r~
diff mbox series

Patch

diff --git a/accel/tcg/atomic_template.h b/accel/tcg/atomic_template.h
index 1dc2151dafd..89593b2502f 100644
--- a/accel/tcg/atomic_template.h
+++ b/accel/tcg/atomic_template.h
@@ -53,6 +53,14 @@ 
 # error unsupported data size
 #endif
 
+#if DATA_SIZE == 16
+# define VALUE_LOW(val) int128_getlo(val)
+# define VALUE_HIGH(val) int128_gethi(val)
+#else
+# define VALUE_LOW(val) val
+# define VALUE_HIGH(val) 0
+#endif
+
 #if DATA_SIZE >= 4
 # define ABI_TYPE  DATA_TYPE
 #else
@@ -83,7 +91,12 @@  ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, abi_ptr addr,
     ret = qatomic_cmpxchg__nocheck(haddr, cmpv, newv);
 #endif
     ATOMIC_MMU_CLEANUP;
-    atomic_trace_rmw_post(env, addr, oi);
+    atomic_trace_rmw_post(env, addr,
+                          VALUE_LOW(ret),
+                          VALUE_HIGH(ret),
+                          VALUE_LOW(newv),
+                          VALUE_HIGH(newv),
+                          oi);
     return ret;
 }
 
@@ -97,7 +110,12 @@  ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, abi_ptr addr, ABI_TYPE val,
 
     ret = qatomic_xchg__nocheck(haddr, val);
     ATOMIC_MMU_CLEANUP;
-    atomic_trace_rmw_post(env, addr, oi);
+    atomic_trace_rmw_post(env, addr,
+                          VALUE_LOW(ret),
+                          VALUE_HIGH(ret),
+                          VALUE_LOW(val),
+                          VALUE_HIGH(val),
+                          oi);
     return ret;
 }
 
@@ -109,7 +127,12 @@  ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, abi_ptr addr,            \
     haddr = atomic_mmu_lookup(env_cpu(env), addr, oi, DATA_SIZE, retaddr);   \
     ret = qatomic_##X(haddr, val);                                  \
     ATOMIC_MMU_CLEANUP;                                             \
-    atomic_trace_rmw_post(env, addr, oi);                           \
+    atomic_trace_rmw_post(env, addr,                                \
+                          VALUE_LOW(ret),                           \
+                          VALUE_HIGH(ret),                          \
+                          VALUE_LOW(val),                           \
+                          VALUE_HIGH(val),                          \
+                          oi);                                      \
     return ret;                                                     \
 }
 
@@ -145,7 +168,12 @@  ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, abi_ptr addr,            \
         cmp = qatomic_cmpxchg__nocheck(haddr, old, new);            \
     } while (cmp != old);                                           \
     ATOMIC_MMU_CLEANUP;                                             \
-    atomic_trace_rmw_post(env, addr, oi);                           \
+    atomic_trace_rmw_post(env, addr,                                \
+                          VALUE_LOW(old),                           \
+                          VALUE_HIGH(old),                          \
+                          VALUE_LOW(xval),                          \
+                          VALUE_HIGH(xval),                         \
+                          oi);                                      \
     return RET;                                                     \
 }
 
@@ -188,7 +216,12 @@  ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, abi_ptr addr,
     ret = qatomic_cmpxchg__nocheck(haddr, BSWAP(cmpv), BSWAP(newv));
 #endif
     ATOMIC_MMU_CLEANUP;
-    atomic_trace_rmw_post(env, addr, oi);
+    atomic_trace_rmw_post(env, addr,
+                          VALUE_LOW(ret),
+                          VALUE_HIGH(ret),
+                          VALUE_LOW(newv),
+                          VALUE_HIGH(newv),
+                          oi);
     return BSWAP(ret);
 }
 
@@ -202,7 +235,12 @@  ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, abi_ptr addr, ABI_TYPE val,
 
     ret = qatomic_xchg__nocheck(haddr, BSWAP(val));
     ATOMIC_MMU_CLEANUP;
-    atomic_trace_rmw_post(env, addr, oi);
+    atomic_trace_rmw_post(env, addr,
+                          VALUE_LOW(ret),
+                          VALUE_HIGH(ret),
+                          VALUE_LOW(val),
+                          VALUE_HIGH(val),
+                          oi);
     return BSWAP(ret);
 }
 
@@ -214,7 +252,12 @@  ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, abi_ptr addr,            \
     haddr = atomic_mmu_lookup(env_cpu(env), addr, oi, DATA_SIZE, retaddr);   \
     ret = qatomic_##X(haddr, BSWAP(val));                           \
     ATOMIC_MMU_CLEANUP;                                             \
-    atomic_trace_rmw_post(env, addr, oi);                           \
+    atomic_trace_rmw_post(env, addr,                                \
+                          VALUE_LOW(ret),                           \
+                          VALUE_HIGH(ret),                          \
+                          VALUE_LOW(val),                           \
+                          VALUE_HIGH(val),                          \
+                          oi);                                      \
     return BSWAP(ret);                                              \
 }
 
@@ -247,7 +290,12 @@  ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, abi_ptr addr,            \
         ldn = qatomic_cmpxchg__nocheck(haddr, ldo, BSWAP(new));     \
     } while (ldo != ldn);                                           \
     ATOMIC_MMU_CLEANUP;                                             \
-    atomic_trace_rmw_post(env, addr, oi);                           \
+    atomic_trace_rmw_post(env, addr,                                \
+                          VALUE_LOW(old),                           \
+                          VALUE_HIGH(old),                          \
+                          VALUE_LOW(xval),                          \
+                          VALUE_HIGH(xval),                         \
+                          oi);                                      \
     return RET;                                                     \
 }
 
@@ -281,3 +329,5 @@  GEN_ATOMIC_HELPER_FN(add_fetch, ADD, DATA_TYPE, new)
 #undef SUFFIX
 #undef DATA_SIZE
 #undef SHIFT
+#undef VALUE_LOW
+#undef VALUE_HIGH
diff --git a/include/qemu/plugin.h b/include/qemu/plugin.h
index bc5aef979e7..0081991dab0 100644
--- a/include/qemu/plugin.h
+++ b/include/qemu/plugin.h
@@ -142,9 +142,13 @@  struct qemu_plugin_tb {
 /**
  * struct CPUPluginState - per-CPU state for plugins
  * @event_mask: plugin event bitmap. Modified only via async work.
+ * @mem_value_low: 64 lower bits of latest accessed mem value.
+ * @mem_value_high: 64 higher bits of latest accessed mem value.
  */
 struct CPUPluginState {
     DECLARE_BITMAP(event_mask, QEMU_PLUGIN_EV_MAX);
+    uint64_t mem_value_low;
+    uint64_t mem_value_high;
 };
 
 /**
@@ -164,6 +168,8 @@  qemu_plugin_vcpu_syscall(CPUState *cpu, int64_t num, uint64_t a1,
 void qemu_plugin_vcpu_syscall_ret(CPUState *cpu, int64_t num, int64_t ret);
 
 void qemu_plugin_vcpu_mem_cb(CPUState *cpu, uint64_t vaddr,
+                             uint64_t value_low,
+                             uint64_t value_high,
                              MemOpIdx oi, enum qemu_plugin_mem_rw rw);
 
 void qemu_plugin_flush_cb(void);
@@ -248,6 +254,8 @@  void qemu_plugin_vcpu_syscall_ret(CPUState *cpu, int64_t num, int64_t ret)
 { }
 
 static inline void qemu_plugin_vcpu_mem_cb(CPUState *cpu, uint64_t vaddr,
+                                           uint64_t value_low,
+                                           uint64_t value_high,
                                            MemOpIdx oi,
                                            enum qemu_plugin_mem_rw rw)
 { }
diff --git a/plugins/core.c b/plugins/core.c
index 9d737d82787..a899eaf37af 100644
--- a/plugins/core.c
+++ b/plugins/core.c
@@ -575,14 +575,21 @@  void exec_inline_op(enum plugin_dyn_cb_type type,
 }
 
 void qemu_plugin_vcpu_mem_cb(CPUState *cpu, uint64_t vaddr,
+                             uint64_t value_low,
+                             uint64_t value_high,
                              MemOpIdx oi, enum qemu_plugin_mem_rw rw)
 {
     GArray *arr = cpu->neg.plugin_mem_cbs;
+    CPUPluginState *plugin_state = cpu->plugin_state;
     size_t i;
 
     if (arr == NULL) {
         return;
     }
+
+    plugin_state->mem_value_low = value_low;
+    plugin_state->mem_value_high = value_high;
+
     for (i = 0; i < arr->len; i++) {
         struct qemu_plugin_dyn_cb *cb =
             &g_array_index(arr, struct qemu_plugin_dyn_cb, i);
diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c
index 85101602581..ea7f4fa8ded 100644
--- a/tcg/tcg-op-ldst.c
+++ b/tcg/tcg-op-ldst.c
@@ -148,14 +148,24 @@  static TCGv_i64 plugin_maybe_preserve_addr(TCGTemp *addr)
     return NULL;
 }
 
+#ifdef CONFIG_PLUGIN
 static void
-plugin_gen_mem_callbacks(TCGv_i64 copy_addr, TCGTemp *orig_addr, MemOpIdx oi,
+plugin_gen_mem_callbacks(TCGv_i64 value_low, TCGv_i64 value_high,
+                         TCGv_i64 copy_addr, TCGTemp *orig_addr, MemOpIdx oi,
                          enum qemu_plugin_mem_rw rw)
 {
-#ifdef CONFIG_PLUGIN
     if (tcg_ctx->plugin_insn != NULL) {
         qemu_plugin_meminfo_t info = make_plugin_meminfo(oi, rw);
 
+        TCGv_ptr plugin_state = tcg_temp_ebb_new_ptr();
+        tcg_gen_ld_ptr(plugin_state, tcg_env,
+                       offsetof(CPUState, plugin_state) - sizeof(CPUState));
+        tcg_gen_st_i64(value_low, plugin_state,
+                       offsetof(CPUPluginState, mem_value_low));
+        tcg_gen_st_i64(value_high, plugin_state,
+                       offsetof(CPUPluginState, mem_value_high));
+        tcg_temp_free_ptr(plugin_state);
+
         if (tcg_ctx->addr_type == TCG_TYPE_I32) {
             if (!copy_addr) {
                 copy_addr = tcg_temp_ebb_new_i64();
@@ -172,6 +182,48 @@  plugin_gen_mem_callbacks(TCGv_i64 copy_addr, TCGTemp *orig_addr, MemOpIdx oi,
             }
         }
     }
+}
+#endif
+
+static void
+plugin_gen_mem_callbacks_i32(TCGv_i32 val,
+                             TCGv_i64 copy_addr, TCGTemp *orig_addr,
+                             MemOpIdx oi, enum qemu_plugin_mem_rw rw)
+{
+#ifdef CONFIG_PLUGIN
+    if (tcg_ctx->plugin_insn != NULL) {
+        TCGv_i64 ext_val = tcg_temp_ebb_new_i64();
+        tcg_gen_extu_i32_i64(ext_val, val);
+        plugin_gen_mem_callbacks(ext_val, tcg_constant_i64(0),
+                                 copy_addr, orig_addr, oi, rw);
+        tcg_temp_free_i64(ext_val);
+    }
+#endif
+}
+
+static void
+plugin_gen_mem_callbacks_i64(TCGv_i64 val,
+                             TCGv_i64 copy_addr, TCGTemp *orig_addr,
+                             MemOpIdx oi, enum qemu_plugin_mem_rw rw)
+{
+#ifdef CONFIG_PLUGIN
+    if (tcg_ctx->plugin_insn != NULL) {
+        plugin_gen_mem_callbacks(val, tcg_constant_i64(0),
+                                 copy_addr, orig_addr, oi, rw);
+    }
+#endif
+}
+
+static void
+plugin_gen_mem_callbacks_i128(TCGv_i128 val,
+                             TCGv_i64 copy_addr, TCGTemp *orig_addr,
+                             MemOpIdx oi, enum qemu_plugin_mem_rw rw)
+{
+#ifdef CONFIG_PLUGIN
+    if (tcg_ctx->plugin_insn != NULL) {
+        plugin_gen_mem_callbacks(TCGV128_LOW(val), TCGV128_HIGH(val),
+                                 copy_addr, orig_addr, oi, rw);
+    }
 #endif
 }
 
@@ -203,7 +255,8 @@  static void tcg_gen_qemu_ld_i32_int(TCGv_i32 val, TCGTemp *addr,
         opc = INDEX_op_qemu_ld_a64_i32;
     }
     gen_ldst(opc, tcgv_i32_temp(val), NULL, addr, oi);
-    plugin_gen_mem_callbacks(copy_addr, addr, orig_oi, QEMU_PLUGIN_MEM_R);
+    plugin_gen_mem_callbacks_i32(val, copy_addr, addr, orig_oi,
+                                 QEMU_PLUGIN_MEM_R);
 
     if ((orig_memop ^ memop) & MO_BSWAP) {
         switch (orig_memop & MO_SIZE) {
@@ -271,7 +324,7 @@  static void tcg_gen_qemu_st_i32_int(TCGv_i32 val, TCGTemp *addr,
         }
     }
     gen_ldst(opc, tcgv_i32_temp(val), NULL, addr, oi);
-    plugin_gen_mem_callbacks(NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W);
+    plugin_gen_mem_callbacks_i32(val, NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W);
 
     if (swap) {
         tcg_temp_free_i32(swap);
@@ -324,7 +377,8 @@  static void tcg_gen_qemu_ld_i64_int(TCGv_i64 val, TCGTemp *addr,
         opc = INDEX_op_qemu_ld_a64_i64;
     }
     gen_ldst_i64(opc, val, addr, oi);
-    plugin_gen_mem_callbacks(copy_addr, addr, orig_oi, QEMU_PLUGIN_MEM_R);
+    plugin_gen_mem_callbacks_i64(val, copy_addr, addr, orig_oi,
+                                 QEMU_PLUGIN_MEM_R);
 
     if ((orig_memop ^ memop) & MO_BSWAP) {
         int flags = (orig_memop & MO_SIGN
@@ -396,7 +450,7 @@  static void tcg_gen_qemu_st_i64_int(TCGv_i64 val, TCGTemp *addr,
         opc = INDEX_op_qemu_st_a64_i64;
     }
     gen_ldst_i64(opc, val, addr, oi);
-    plugin_gen_mem_callbacks(NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W);
+    plugin_gen_mem_callbacks_i64(val, NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W);
 
     if (swap) {
         tcg_temp_free_i64(swap);
@@ -606,7 +660,8 @@  static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr,
                            tcg_constant_i32(orig_oi));
     }
 
-    plugin_gen_mem_callbacks(ext_addr, addr, orig_oi, QEMU_PLUGIN_MEM_R);
+    plugin_gen_mem_callbacks_i128(val, ext_addr, addr, orig_oi,
+                                  QEMU_PLUGIN_MEM_R);
 }
 
 void tcg_gen_qemu_ld_i128_chk(TCGv_i128 val, TCGTemp *addr, TCGArg idx,
@@ -722,7 +777,8 @@  static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr,
                            tcg_constant_i32(orig_oi));
     }
 
-    plugin_gen_mem_callbacks(ext_addr, addr, orig_oi, QEMU_PLUGIN_MEM_W);
+    plugin_gen_mem_callbacks_i128(val, ext_addr, addr, orig_oi,
+                                  QEMU_PLUGIN_MEM_W);
 }
 
 void tcg_gen_qemu_st_i128_chk(TCGv_i128 val, TCGTemp *addr, TCGArg idx,
diff --git a/accel/tcg/atomic_common.c.inc b/accel/tcg/atomic_common.c.inc
index 95a5c5ff12d..6056598c23d 100644
--- a/accel/tcg/atomic_common.c.inc
+++ b/accel/tcg/atomic_common.c.inc
@@ -14,9 +14,20 @@ 
  */
 
 static void atomic_trace_rmw_post(CPUArchState *env, uint64_t addr,
+                                  uint64_t read_value_low,
+                                  uint64_t read_value_high,
+                                  uint64_t write_value_low,
+                                  uint64_t write_value_high,
                                   MemOpIdx oi)
 {
-    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_RW);
+    if (cpu_plugin_mem_cbs_enabled(env_cpu(env))) {
+        qemu_plugin_vcpu_mem_cb(env_cpu(env), addr,
+                                read_value_low, read_value_high,
+                                oi, QEMU_PLUGIN_MEM_R);
+        qemu_plugin_vcpu_mem_cb(env_cpu(env), addr,
+                                write_value_low, write_value_high,
+                                oi, QEMU_PLUGIN_MEM_W);
+    }
 }
 
 /*
diff --git a/accel/tcg/ldst_common.c.inc b/accel/tcg/ldst_common.c.inc
index 87ceb954873..ebbf380d767 100644
--- a/accel/tcg/ldst_common.c.inc
+++ b/accel/tcg/ldst_common.c.inc
@@ -123,10 +123,15 @@  void helper_st_i128(CPUArchState *env, uint64_t addr, Int128 val, MemOpIdx oi)
  * Load helpers for cpu_ldst.h
  */
 
-static void plugin_load_cb(CPUArchState *env, abi_ptr addr, MemOpIdx oi)
+static void plugin_load_cb(CPUArchState *env, abi_ptr addr,
+                           uint64_t value_low,
+                           uint64_t value_high,
+                           MemOpIdx oi)
 {
     if (cpu_plugin_mem_cbs_enabled(env_cpu(env))) {
-        qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
+        qemu_plugin_vcpu_mem_cb(env_cpu(env), addr,
+                                value_low, value_high,
+                                oi, QEMU_PLUGIN_MEM_R);
     }
 }
 
@@ -136,7 +141,7 @@  uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr addr, MemOpIdx oi, uintptr_t ra)
 
     tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_UB);
     ret = do_ld1_mmu(env_cpu(env), addr, oi, ra, MMU_DATA_LOAD);
-    plugin_load_cb(env, addr, oi);
+    plugin_load_cb(env, addr, ret, 0, oi);
     return ret;
 }
 
@@ -147,7 +152,7 @@  uint16_t cpu_ldw_mmu(CPUArchState *env, abi_ptr addr,
 
     tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_16);
     ret = do_ld2_mmu(env_cpu(env), addr, oi, ra, MMU_DATA_LOAD);
-    plugin_load_cb(env, addr, oi);
+    plugin_load_cb(env, addr, ret, 0, oi);
     return ret;
 }
 
@@ -158,7 +163,7 @@  uint32_t cpu_ldl_mmu(CPUArchState *env, abi_ptr addr,
 
     tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_32);
     ret = do_ld4_mmu(env_cpu(env), addr, oi, ra, MMU_DATA_LOAD);
-    plugin_load_cb(env, addr, oi);
+    plugin_load_cb(env, addr, ret, 0, oi);
     return ret;
 }
 
@@ -169,7 +174,7 @@  uint64_t cpu_ldq_mmu(CPUArchState *env, abi_ptr addr,
 
     tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_64);
     ret = do_ld8_mmu(env_cpu(env), addr, oi, ra, MMU_DATA_LOAD);
-    plugin_load_cb(env, addr, oi);
+    plugin_load_cb(env, addr, ret, 0, oi);
     return ret;
 }
 
@@ -180,7 +185,7 @@  Int128 cpu_ld16_mmu(CPUArchState *env, abi_ptr addr,
 
     tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_128);
     ret = do_ld16_mmu(env_cpu(env), addr, oi, ra);
-    plugin_load_cb(env, addr, oi);
+    plugin_load_cb(env, addr, int128_getlo(ret), int128_gethi(ret), oi);
     return ret;
 }
 
@@ -188,10 +193,15 @@  Int128 cpu_ld16_mmu(CPUArchState *env, abi_ptr addr,
  * Store helpers for cpu_ldst.h
  */
 
-static void plugin_store_cb(CPUArchState *env, abi_ptr addr, MemOpIdx oi)
+static void plugin_store_cb(CPUArchState *env, abi_ptr addr,
+                            uint64_t value_low,
+                            uint64_t value_high,
+                            MemOpIdx oi)
 {
     if (cpu_plugin_mem_cbs_enabled(env_cpu(env))) {
-        qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
+        qemu_plugin_vcpu_mem_cb(env_cpu(env), addr,
+                                value_low, value_high,
+                                oi, QEMU_PLUGIN_MEM_W);
     }
 }
 
@@ -199,7 +209,7 @@  void cpu_stb_mmu(CPUArchState *env, abi_ptr addr, uint8_t val,
                  MemOpIdx oi, uintptr_t retaddr)
 {
     helper_stb_mmu(env, addr, val, oi, retaddr);
-    plugin_store_cb(env, addr, oi);
+    plugin_store_cb(env, addr, val, 0, oi);
 }
 
 void cpu_stw_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
@@ -207,7 +217,7 @@  void cpu_stw_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
 {
     tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_16);
     do_st2_mmu(env_cpu(env), addr, val, oi, retaddr);
-    plugin_store_cb(env, addr, oi);
+    plugin_store_cb(env, addr, val, 0, oi);
 }
 
 void cpu_stl_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
@@ -215,7 +225,7 @@  void cpu_stl_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
 {
     tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_32);
     do_st4_mmu(env_cpu(env), addr, val, oi, retaddr);
-    plugin_store_cb(env, addr, oi);
+    plugin_store_cb(env, addr, val, 0, oi);
 }
 
 void cpu_stq_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
@@ -223,7 +233,7 @@  void cpu_stq_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
 {
     tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_64);
     do_st8_mmu(env_cpu(env), addr, val, oi, retaddr);
-    plugin_store_cb(env, addr, oi);
+    plugin_store_cb(env, addr, val, 0, oi);
 }
 
 void cpu_st16_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
@@ -231,7 +241,7 @@  void cpu_st16_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
 {
     tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_128);
     do_st16_mmu(env_cpu(env), addr, val, oi, retaddr);
-    plugin_store_cb(env, addr, oi);
+    plugin_store_cb(env, addr, int128_getlo(val), int128_gethi(val), oi);
 }
 
 /*