diff mbox series

[for-8.0,1/1] target/ppc: Use tcg_gen_atomic_cmpxchg_i128 for STQCX

Message ID 20221112061122.2720163-2-richard.henderson@linaro.org
State Superseded
Headers show
Series target/ppc: Use tcg_gen_atomic_cmpxchg_i128 | expand

Commit Message

Richard Henderson Nov. 12, 2022, 6:11 a.m. UTC
Note that the previous direct reference to reserve_val,

-   tcg_gen_ld_i64(t1, cpu_env, (ctx->le_mode
-                                ? offsetof(CPUPPCState, reserve_val2)
-                                : offsetof(CPUPPCState, reserve_val)));

was incorrect because all references should have gone through
cpu_reserve_val.  Create a cpu_reserve_val2 tcg temp to fix this.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/ppc/helper.h     |   2 -
 target/ppc/mem_helper.c |  44 -----------------
 target/ppc/translate.c  | 102 ++++++++++++++++++----------------------
 3 files changed, 47 insertions(+), 101 deletions(-)

Comments

Daniel Henrique Barboza Nov. 16, 2022, 1:38 p.m. UTC | #1
Richard,

I believe the ppc64-linux-user target didn't like what you did in this
patch. Here's the error:

$ ../configure --target-list=ppc64-softmmu,ppc64-linux-user,ppc-softmmu,ppc-linux-user,ppc64le-linux-user
$ make -j

(...)

[15/133] Compiling C object libqemu-ppc64-linux-user.fa.p/target_ppc_translate.c.o
FAILED: libqemu-ppc64-linux-user.fa.p/target_ppc_translate.c.o
cc -m64 -mcx16 -Ilibqemu-ppc64-linux-user.fa.p -I. -I.. -Itarget/ppc -I../target/ppc -I../common-user/host/x86_64 -I../linux-user/include/host/x86_64 -I../linux-user/include -Ilinux-user -I../linux-user -Ilinux-user/ppc -I../linux-user/ppc -Iqapi -Itrace -Iui -Iui/shader -I/usr/include/glib-2.0 -I/usr/lib64/glib-2.0/include -I/usr/include/sysprof-4 -fdiagnostics-color=auto -Wall -Winvalid-pch -Werror -std=gnu11 -O2 -g -isystem /home/danielhb/kvm-project/qemu/linux-headers -isystem linux-headers -iquote . -iquote /home/danielhb/kvm-project/qemu -iquote /home/danielhb/kvm-project/qemu/include -iquote /home/danielhb/kvm-project/qemu/tcg/i386 -pthread -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -Wstrict-prototypes -Wredundant-decls -Wundef -Wwrite-strings -Wmissing-prototypes -fno-strict-aliasing -fno-common -fwrapv -Wold-style-declaration -Wold-style-definition -Wtype-limits -Wformat-security -Wformat-y2k -Winit-self -Wignored-qualifiers -Wempty-body -Wnested-externs -Wendif-labels -Wexpansion-to-defined -Wimplicit-fallthrough=2 -Wno-missing-include-dirs -Wno-shift-negative-value -Wno-psabi -fstack-protector-strong -fPIE -isystem../linux-headers -isystemlinux-headers -DNEED_CPU_H '-DCONFIG_TARGET="ppc64-linux-user-config-target.h"' '-DCONFIG_DEVICES="ppc64-linux-user-config-devices.h"' -MD -MQ libqemu-ppc64-linux-user.fa.p/target_ppc_translate.c.o -MF libqemu-ppc64-linux-user.fa.p/target_ppc_translate.c.o.d -o libqemu-ppc64-linux-user.fa.p/target_ppc_translate.c.o -c ../target/ppc/translate.c
../target/ppc/translate.c: In function ‘gen_stqcx_’:
../target/ppc/translate.c:3989:5: error: unknown type name ‘TCGv_i128’; did you mean ‘TCGv_i32’?
  3989 |     TCGv_i128 cmp, val;
       |     ^~~~~~~~~
       |     TCGv_i32
../target/ppc/translate.c:4006:11: error: implicit declaration of function ‘tcg_temp_new_i128’; did you mean ‘tcg_temp_new_i32’? [-Werror=implicit-function-declaration]
  4006 |     cmp = tcg_temp_new_i128();
       |           ^~~~~~~~~~~~~~~~~
       |           tcg_temp_new_i32
../target/ppc/translate.c:4006:11: error: nested extern declaration of ‘tcg_temp_new_i128’ [-Werror=nested-externs]
../target/ppc/translate.c:4009:5: error: implicit declaration of function ‘tcg_gen_concat_i64_i128’; did you mean ‘tcg_gen_concat_i32_i64’? [-Werror=implicit-function-declaration]
  4009 |     tcg_gen_concat_i64_i128(cmp, cpu_reserve_val2, cpu_reserve_val);
       |     ^~~~~~~~~~~~~~~~~~~~~~~
       |     tcg_gen_concat_i32_i64
../target/ppc/translate.c:4009:5: error: nested extern declaration of ‘tcg_gen_concat_i64_i128’ [-Werror=nested-externs]
../target/ppc/translate.c:4014:5: error: implicit declaration of function ‘tcg_gen_atomic_cmpxchg_i128’; did you mean ‘tcg_gen_atomic_cmpxchg_i32’? [-Werror=implicit-function-declaration]
  4014 |     tcg_gen_atomic_cmpxchg_i128(val, cpu_reserve, cmp, val, ctx->mem_idx,
       |     ^~~~~~~~~~~~~~~~~~~~~~~~~~~
       |     tcg_gen_atomic_cmpxchg_i32
../target/ppc/translate.c:4014:5: error: nested extern declaration of ‘tcg_gen_atomic_cmpxchg_i128’ [-Werror=nested-externs]
../target/ppc/translate.c:4016:5: error: implicit declaration of function ‘tcg_temp_free_i128’; did you mean ‘tcg_temp_free_i32’? [-Werror=implicit-function-declaration]
  4016 |     tcg_temp_free_i128(cmp);
       |     ^~~~~~~~~~~~~~~~~~
       |     tcg_temp_free_i32
../target/ppc/translate.c:4016:5: error: nested extern declaration of ‘tcg_temp_free_i128’ [-Werror=nested-externs]
../target/ppc/translate.c:4020:5: error: implicit declaration of function ‘tcg_gen_extr_i128_i64’; did you mean ‘tcg_gen_ext_i32_i64’? [-Werror=implicit-function-declaration]
  4020 |     tcg_gen_extr_i128_i64(t1, t0, val);
       |     ^~~~~~~~~~~~~~~~~~~~~
       |     tcg_gen_ext_i32_i64
../target/ppc/translate.c:4020:5: error: nested extern declaration of ‘tcg_gen_extr_i128_i64’ [-Werror=nested-externs]
cc1: all warnings being treated as errors
[16/133] Compiling C object libqemu-ppc64-softmmu.fa.p/target_ppc_mmu_helper.c.o
[17/133] Compiling C object libqemu-ppc64-softmmu.fa.p/target_ppc_translate.c.o
FAILED: libqemu-ppc64-softmmu.fa.p/target_ppc_translate.c.o


Thanks,


Daniel


On 11/12/22 03:11, Richard Henderson wrote:
> Note that the previous direct reference to reserve_val,
> 
> -   tcg_gen_ld_i64(t1, cpu_env, (ctx->le_mode
> -                                ? offsetof(CPUPPCState, reserve_val2)
> -                                : offsetof(CPUPPCState, reserve_val)));
> 
> was incorrect because all references should have gone through
> cpu_reserve_val.  Create a cpu_reserve_val2 tcg temp to fix this.
> 
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>   target/ppc/helper.h     |   2 -
>   target/ppc/mem_helper.c |  44 -----------------
>   target/ppc/translate.c  | 102 ++++++++++++++++++----------------------
>   3 files changed, 47 insertions(+), 101 deletions(-)
> 
> diff --git a/target/ppc/helper.h b/target/ppc/helper.h
> index 8dd22a35e4..0beaca5c7a 100644
> --- a/target/ppc/helper.h
> +++ b/target/ppc/helper.h
> @@ -818,6 +818,4 @@ DEF_HELPER_FLAGS_5(stq_le_parallel, TCG_CALL_NO_WG,
>                      void, env, tl, i64, i64, i32)
>   DEF_HELPER_FLAGS_5(stq_be_parallel, TCG_CALL_NO_WG,
>                      void, env, tl, i64, i64, i32)
> -DEF_HELPER_5(stqcx_le_parallel, i32, env, tl, i64, i64, i32)
> -DEF_HELPER_5(stqcx_be_parallel, i32, env, tl, i64, i64, i32)
>   #endif
> diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c
> index d1163f316c..1578887a8f 100644
> --- a/target/ppc/mem_helper.c
> +++ b/target/ppc/mem_helper.c
> @@ -413,50 +413,6 @@ void helper_stq_be_parallel(CPUPPCState *env, target_ulong addr,
>       val = int128_make128(lo, hi);
>       cpu_atomic_sto_be_mmu(env, addr, val, opidx, GETPC());
>   }
> -
> -uint32_t helper_stqcx_le_parallel(CPUPPCState *env, target_ulong addr,
> -                                  uint64_t new_lo, uint64_t new_hi,
> -                                  uint32_t opidx)
> -{
> -    bool success = false;
> -
> -    /* We will have raised EXCP_ATOMIC from the translator.  */
> -    assert(HAVE_CMPXCHG128);
> -
> -    if (likely(addr == env->reserve_addr)) {
> -        Int128 oldv, cmpv, newv;
> -
> -        cmpv = int128_make128(env->reserve_val2, env->reserve_val);
> -        newv = int128_make128(new_lo, new_hi);
> -        oldv = cpu_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv,
> -                                          opidx, GETPC());
> -        success = int128_eq(oldv, cmpv);
> -    }
> -    env->reserve_addr = -1;
> -    return env->so + success * CRF_EQ_BIT;
> -}
> -
> -uint32_t helper_stqcx_be_parallel(CPUPPCState *env, target_ulong addr,
> -                                  uint64_t new_lo, uint64_t new_hi,
> -                                  uint32_t opidx)
> -{
> -    bool success = false;
> -
> -    /* We will have raised EXCP_ATOMIC from the translator.  */
> -    assert(HAVE_CMPXCHG128);
> -
> -    if (likely(addr == env->reserve_addr)) {
> -        Int128 oldv, cmpv, newv;
> -
> -        cmpv = int128_make128(env->reserve_val2, env->reserve_val);
> -        newv = int128_make128(new_lo, new_hi);
> -        oldv = cpu_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv,
> -                                          opidx, GETPC());
> -        success = int128_eq(oldv, cmpv);
> -    }
> -    env->reserve_addr = -1;
> -    return env->so + success * CRF_EQ_BIT;
> -}
>   #endif
>   
>   /*****************************************************************************/
> diff --git a/target/ppc/translate.c b/target/ppc/translate.c
> index 19c1d17cb0..85f95a9045 100644
> --- a/target/ppc/translate.c
> +++ b/target/ppc/translate.c
> @@ -72,6 +72,7 @@ static TCGv cpu_cfar;
>   static TCGv cpu_xer, cpu_so, cpu_ov, cpu_ca, cpu_ov32, cpu_ca32;
>   static TCGv cpu_reserve;
>   static TCGv cpu_reserve_val;
> +static TCGv cpu_reserve_val2;
>   static TCGv cpu_fpscr;
>   static TCGv_i32 cpu_access_type;
>   
> @@ -141,8 +142,11 @@ void ppc_translate_init(void)
>                                        offsetof(CPUPPCState, reserve_addr),
>                                        "reserve_addr");
>       cpu_reserve_val = tcg_global_mem_new(cpu_env,
> -                                     offsetof(CPUPPCState, reserve_val),
> -                                     "reserve_val");
> +                                         offsetof(CPUPPCState, reserve_val),
> +                                         "reserve_val");
> +    cpu_reserve_val2 = tcg_global_mem_new(cpu_env,
> +                                          offsetof(CPUPPCState, reserve_val2),
> +                                          "reserve_val2");
>   
>       cpu_fpscr = tcg_global_mem_new(cpu_env,
>                                      offsetof(CPUPPCState, fpscr), "fpscr");
> @@ -3979,78 +3983,66 @@ static void gen_lqarx(DisasContext *ctx)
>   /* stqcx. */
>   static void gen_stqcx_(DisasContext *ctx)
>   {
> +    TCGLabel *lab_fail, *lab_over;
>       int rs = rS(ctx->opcode);
> -    TCGv EA, hi, lo;
> +    TCGv EA, t0, t1;
> +    TCGv_i128 cmp, val;
>   
>       if (unlikely(rs & 1)) {
>           gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);
>           return;
>       }
>   
> +    lab_fail = gen_new_label();
> +    lab_over = gen_new_label();
> +
>       gen_set_access_type(ctx, ACCESS_RES);
>       EA = tcg_temp_new();
>       gen_addr_reg_index(ctx, EA);
>   
> +    tcg_gen_brcond_tl(TCG_COND_NE, EA, cpu_reserve, lab_fail);
> +    tcg_temp_free(EA);
> +
> +    cmp = tcg_temp_new_i128();
> +    val = tcg_temp_new_i128();
> +
> +    tcg_gen_concat_i64_i128(cmp, cpu_reserve_val2, cpu_reserve_val);
> +
>       /* Note that the low part is always in RS+1, even in LE mode.  */
> -    lo = cpu_gpr[rs + 1];
> -    hi = cpu_gpr[rs];
> +    tcg_gen_concat_i64_i128(val, cpu_gpr[rs + 1], cpu_gpr[rs]);
>   
> -    if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
> -        if (HAVE_CMPXCHG128) {
> -            TCGv_i32 oi = tcg_const_i32(DEF_MEMOP(MO_128) | MO_ALIGN);
> -            if (ctx->le_mode) {
> -                gen_helper_stqcx_le_parallel(cpu_crf[0], cpu_env,
> -                                             EA, lo, hi, oi);
> -            } else {
> -                gen_helper_stqcx_be_parallel(cpu_crf[0], cpu_env,
> -                                             EA, lo, hi, oi);
> -            }
> -            tcg_temp_free_i32(oi);
> -        } else {
> -            /* Restart with exclusive lock.  */
> -            gen_helper_exit_atomic(cpu_env);
> -            ctx->base.is_jmp = DISAS_NORETURN;
> -        }
> -        tcg_temp_free(EA);
> -    } else {
> -        TCGLabel *lab_fail = gen_new_label();
> -        TCGLabel *lab_over = gen_new_label();
> -        TCGv_i64 t0 = tcg_temp_new_i64();
> -        TCGv_i64 t1 = tcg_temp_new_i64();
> +    tcg_gen_atomic_cmpxchg_i128(val, cpu_reserve, cmp, val, ctx->mem_idx,
> +                                DEF_MEMOP(MO_128 | MO_ALIGN));
> +    tcg_temp_free_i128(cmp);
>   
> -        tcg_gen_brcond_tl(TCG_COND_NE, EA, cpu_reserve, lab_fail);
> -        tcg_temp_free(EA);
> +    t0 = tcg_temp_new();
> +    t1 = tcg_temp_new();
> +    tcg_gen_extr_i128_i64(t1, t0, val);
> +    tcg_temp_free_i128(val);
>   
> -        gen_qemu_ld64_i64(ctx, t0, cpu_reserve);
> -        tcg_gen_ld_i64(t1, cpu_env, (ctx->le_mode
> -                                     ? offsetof(CPUPPCState, reserve_val2)
> -                                     : offsetof(CPUPPCState, reserve_val)));
> -        tcg_gen_brcond_i64(TCG_COND_NE, t0, t1, lab_fail);
> +    tcg_gen_xor_tl(t1, t1, cpu_reserve_val2);
> +    tcg_gen_xor_tl(t0, t0, cpu_reserve_val);
> +    tcg_gen_or_tl(t0, t0, t1);
> +    tcg_temp_free(t1);
>   
> -        tcg_gen_addi_i64(t0, cpu_reserve, 8);
> -        gen_qemu_ld64_i64(ctx, t0, t0);
> -        tcg_gen_ld_i64(t1, cpu_env, (ctx->le_mode
> -                                     ? offsetof(CPUPPCState, reserve_val)
> -                                     : offsetof(CPUPPCState, reserve_val2)));
> -        tcg_gen_brcond_i64(TCG_COND_NE, t0, t1, lab_fail);
> +    tcg_gen_setcondi_tl(TCG_COND_EQ, t0, t0, 0);
> +    tcg_gen_shli_tl(t0, t0, CRF_EQ_BIT);
> +    tcg_gen_or_tl(t0, t0, cpu_so);
> +    tcg_gen_trunc_tl_i32(cpu_crf[0], t0);
> +    tcg_temp_free(t0);
>   
> -        /* Success */
> -        gen_qemu_st64_i64(ctx, ctx->le_mode ? lo : hi, cpu_reserve);
> -        tcg_gen_addi_i64(t0, cpu_reserve, 8);
> -        gen_qemu_st64_i64(ctx, ctx->le_mode ? hi : lo, t0);
> +    tcg_gen_br(lab_over);
> +    gen_set_label(lab_fail);
>   
> -        tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
> -        tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], CRF_EQ);
> -        tcg_gen_br(lab_over);
> +    /*
> +     * Address mismatch implies failure.  But we still need to provide
> +     * the memory barrier semantics of the instruction.
> +     */
> +    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
> +    tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
>   
> -        gen_set_label(lab_fail);
> -        tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
> -
> -        gen_set_label(lab_over);
> -        tcg_gen_movi_tl(cpu_reserve, -1);
> -        tcg_temp_free_i64(t0);
> -        tcg_temp_free_i64(t1);
> -    }
> +    gen_set_label(lab_over);
> +    tcg_gen_movi_tl(cpu_reserve, -1);
>   }
>   #endif /* defined(TARGET_PPC64) */
>
Richard Henderson Nov. 17, 2022, 2:48 a.m. UTC | #2
I think you missed the Based-on tag.

r~

On Wed, 16 Nov 2022, 05:38 Daniel Henrique Barboza, <danielhb413@gmail.com>
wrote:

> Richard,
>
> I believe the ppc64-linux-user target didn't like what you did in this
> patch. Here's the error:
>
> $ ../configure
> --target-list=ppc64-softmmu,ppc64-linux-user,ppc-softmmu,ppc-linux-user,ppc64le-linux-user
> $ make -j
>
> (...)
>
> [15/133] Compiling C object
> libqemu-ppc64-linux-user.fa.p/target_ppc_translate.c.o
> FAILED: libqemu-ppc64-linux-user.fa.p/target_ppc_translate.c.o
> cc -m64 -mcx16 -Ilibqemu-ppc64-linux-user.fa.p -I. -I.. -Itarget/ppc
> -I../target/ppc -I../common-user/host/x86_64
> -I../linux-user/include/host/x86_64 -I../linux-user/include -Ilinux-user
> -I../linux-user -Ilinux-user/ppc -I../linux-user/ppc -Iqapi -Itrace -Iui
> -Iui/shader -I/usr/include/glib-2.0 -I/usr/lib64/glib-2.0/include
> -I/usr/include/sysprof-4 -fdiagnostics-color=auto -Wall -Winvalid-pch
> -Werror -std=gnu11 -O2 -g -isystem
> /home/danielhb/kvm-project/qemu/linux-headers -isystem linux-headers
> -iquote . -iquote /home/danielhb/kvm-project/qemu -iquote
> /home/danielhb/kvm-project/qemu/include -iquote
> /home/danielhb/kvm-project/qemu/tcg/i386 -pthread -U_FORTIFY_SOURCE
> -D_FORTIFY_SOURCE=2 -D_GNU_SOURCE -D_FILE_OFFSET_BITS=64
> -D_LARGEFILE_SOURCE -Wstrict-prototypes -Wredundant-decls -Wundef
> -Wwrite-strings -Wmissing-prototypes -fno-strict-aliasing -fno-common
> -fwrapv -Wold-style-declaration -Wold-style-definition -Wtype-limits
> -Wformat-security -Wformat-y2k -Winit-self -Wignored-qualifiers
> -Wempty-body -Wnested-externs -Wendif-labels -Wexpansion-to-defined
> -Wimplicit-fallthrough=2 -Wno-missing-include-dirs
> -Wno-shift-negative-value -Wno-psabi -fstack-protector-strong -fPIE
> -isystem../linux-headers -isystemlinux-headers -DNEED_CPU_H
> '-DCONFIG_TARGET="ppc64-linux-user-config-target.h"'
> '-DCONFIG_DEVICES="ppc64-linux-user-config-devices.h"' -MD -MQ
> libqemu-ppc64-linux-user.fa.p/target_ppc_translate.c.o -MF
> libqemu-ppc64-linux-user.fa.p/target_ppc_translate.c.o.d -o
> libqemu-ppc64-linux-user.fa.p/target_ppc_translate.c.o -c
> ../target/ppc/translate.c
> ../target/ppc/translate.c: In function ‘gen_stqcx_’:
> ../target/ppc/translate.c:3989:5: error: unknown type name ‘TCGv_i128’;
> did you mean ‘TCGv_i32’?
>   3989 |     TCGv_i128 cmp, val;
>        |     ^~~~~~~~~
>        |     TCGv_i32
> ../target/ppc/translate.c:4006:11: error: implicit declaration of function
> ‘tcg_temp_new_i128’; did you mean ‘tcg_temp_new_i32’?
> [-Werror=implicit-function-declaration]
>   4006 |     cmp = tcg_temp_new_i128();
>        |           ^~~~~~~~~~~~~~~~~
>        |           tcg_temp_new_i32
> ../target/ppc/translate.c:4006:11: error: nested extern declaration of
> ‘tcg_temp_new_i128’ [-Werror=nested-externs]
> ../target/ppc/translate.c:4009:5: error: implicit declaration of function
> ‘tcg_gen_concat_i64_i128’; did you mean ‘tcg_gen_concat_i32_i64’?
> [-Werror=implicit-function-declaration]
>   4009 |     tcg_gen_concat_i64_i128(cmp, cpu_reserve_val2,
> cpu_reserve_val);
>        |     ^~~~~~~~~~~~~~~~~~~~~~~
>        |     tcg_gen_concat_i32_i64
> ../target/ppc/translate.c:4009:5: error: nested extern declaration of
> ‘tcg_gen_concat_i64_i128’ [-Werror=nested-externs]
> ../target/ppc/translate.c:4014:5: error: implicit declaration of function
> ‘tcg_gen_atomic_cmpxchg_i128’; did you mean ‘tcg_gen_atomic_cmpxchg_i32’?
> [-Werror=implicit-function-declaration]
>   4014 |     tcg_gen_atomic_cmpxchg_i128(val, cpu_reserve, cmp, val,
> ctx->mem_idx,
>        |     ^~~~~~~~~~~~~~~~~~~~~~~~~~~
>        |     tcg_gen_atomic_cmpxchg_i32
> ../target/ppc/translate.c:4014:5: error: nested extern declaration of
> ‘tcg_gen_atomic_cmpxchg_i128’ [-Werror=nested-externs]
> ../target/ppc/translate.c:4016:5: error: implicit declaration of function
> ‘tcg_temp_free_i128’; did you mean ‘tcg_temp_free_i32’?
> [-Werror=implicit-function-declaration]
>   4016 |     tcg_temp_free_i128(cmp);
>        |     ^~~~~~~~~~~~~~~~~~
>        |     tcg_temp_free_i32
> ../target/ppc/translate.c:4016:5: error: nested extern declaration of
> ‘tcg_temp_free_i128’ [-Werror=nested-externs]
> ../target/ppc/translate.c:4020:5: error: implicit declaration of function
> ‘tcg_gen_extr_i128_i64’; did you mean ‘tcg_gen_ext_i32_i64’?
> [-Werror=implicit-function-declaration]
>   4020 |     tcg_gen_extr_i128_i64(t1, t0, val);
>        |     ^~~~~~~~~~~~~~~~~~~~~
>        |     tcg_gen_ext_i32_i64
> ../target/ppc/translate.c:4020:5: error: nested extern declaration of
> ‘tcg_gen_extr_i128_i64’ [-Werror=nested-externs]
> cc1: all warnings being treated as errors
> [16/133] Compiling C object
> libqemu-ppc64-softmmu.fa.p/target_ppc_mmu_helper.c.o
> [17/133] Compiling C object
> libqemu-ppc64-softmmu.fa.p/target_ppc_translate.c.o
> FAILED: libqemu-ppc64-softmmu.fa.p/target_ppc_translate.c.o
>
>
> Thanks,
>
>
> Daniel
>
>
> On 11/12/22 03:11, Richard Henderson wrote:
> > Note that the previous direct reference to reserve_val,
> >
> > -   tcg_gen_ld_i64(t1, cpu_env, (ctx->le_mode
> > -                                ? offsetof(CPUPPCState, reserve_val2)
> > -                                : offsetof(CPUPPCState, reserve_val)));
> >
> > was incorrect because all references should have gone through
> > cpu_reserve_val.  Create a cpu_reserve_val2 tcg temp to fix this.
> >
> > Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> > ---
> >   target/ppc/helper.h     |   2 -
> >   target/ppc/mem_helper.c |  44 -----------------
> >   target/ppc/translate.c  | 102 ++++++++++++++++++----------------------
> >   3 files changed, 47 insertions(+), 101 deletions(-)
> >
> > diff --git a/target/ppc/helper.h b/target/ppc/helper.h
> > index 8dd22a35e4..0beaca5c7a 100644
> > --- a/target/ppc/helper.h
> > +++ b/target/ppc/helper.h
> > @@ -818,6 +818,4 @@ DEF_HELPER_FLAGS_5(stq_le_parallel, TCG_CALL_NO_WG,
> >                      void, env, tl, i64, i64, i32)
> >   DEF_HELPER_FLAGS_5(stq_be_parallel, TCG_CALL_NO_WG,
> >                      void, env, tl, i64, i64, i32)
> > -DEF_HELPER_5(stqcx_le_parallel, i32, env, tl, i64, i64, i32)
> > -DEF_HELPER_5(stqcx_be_parallel, i32, env, tl, i64, i64, i32)
> >   #endif
> > diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c
> > index d1163f316c..1578887a8f 100644
> > --- a/target/ppc/mem_helper.c
> > +++ b/target/ppc/mem_helper.c
> > @@ -413,50 +413,6 @@ void helper_stq_be_parallel(CPUPPCState *env,
> target_ulong addr,
> >       val = int128_make128(lo, hi);
> >       cpu_atomic_sto_be_mmu(env, addr, val, opidx, GETPC());
> >   }
> > -
> > -uint32_t helper_stqcx_le_parallel(CPUPPCState *env, target_ulong addr,
> > -                                  uint64_t new_lo, uint64_t new_hi,
> > -                                  uint32_t opidx)
> > -{
> > -    bool success = false;
> > -
> > -    /* We will have raised EXCP_ATOMIC from the translator.  */
> > -    assert(HAVE_CMPXCHG128);
> > -
> > -    if (likely(addr == env->reserve_addr)) {
> > -        Int128 oldv, cmpv, newv;
> > -
> > -        cmpv = int128_make128(env->reserve_val2, env->reserve_val);
> > -        newv = int128_make128(new_lo, new_hi);
> > -        oldv = cpu_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv,
> > -                                          opidx, GETPC());
> > -        success = int128_eq(oldv, cmpv);
> > -    }
> > -    env->reserve_addr = -1;
> > -    return env->so + success * CRF_EQ_BIT;
> > -}
> > -
> > -uint32_t helper_stqcx_be_parallel(CPUPPCState *env, target_ulong addr,
> > -                                  uint64_t new_lo, uint64_t new_hi,
> > -                                  uint32_t opidx)
> > -{
> > -    bool success = false;
> > -
> > -    /* We will have raised EXCP_ATOMIC from the translator.  */
> > -    assert(HAVE_CMPXCHG128);
> > -
> > -    if (likely(addr == env->reserve_addr)) {
> > -        Int128 oldv, cmpv, newv;
> > -
> > -        cmpv = int128_make128(env->reserve_val2, env->reserve_val);
> > -        newv = int128_make128(new_lo, new_hi);
> > -        oldv = cpu_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv,
> > -                                          opidx, GETPC());
> > -        success = int128_eq(oldv, cmpv);
> > -    }
> > -    env->reserve_addr = -1;
> > -    return env->so + success * CRF_EQ_BIT;
> > -}
> >   #endif
> >
> >
>  /*****************************************************************************/
> > diff --git a/target/ppc/translate.c b/target/ppc/translate.c
> > index 19c1d17cb0..85f95a9045 100644
> > --- a/target/ppc/translate.c
> > +++ b/target/ppc/translate.c
> > @@ -72,6 +72,7 @@ static TCGv cpu_cfar;
> >   static TCGv cpu_xer, cpu_so, cpu_ov, cpu_ca, cpu_ov32, cpu_ca32;
> >   static TCGv cpu_reserve;
> >   static TCGv cpu_reserve_val;
> > +static TCGv cpu_reserve_val2;
> >   static TCGv cpu_fpscr;
> >   static TCGv_i32 cpu_access_type;
> >
> > @@ -141,8 +142,11 @@ void ppc_translate_init(void)
> >                                        offsetof(CPUPPCState,
> reserve_addr),
> >                                        "reserve_addr");
> >       cpu_reserve_val = tcg_global_mem_new(cpu_env,
> > -                                     offsetof(CPUPPCState, reserve_val),
> > -                                     "reserve_val");
> > +                                         offsetof(CPUPPCState,
> reserve_val),
> > +                                         "reserve_val");
> > +    cpu_reserve_val2 = tcg_global_mem_new(cpu_env,
> > +                                          offsetof(CPUPPCState,
> reserve_val2),
> > +                                          "reserve_val2");
> >
> >       cpu_fpscr = tcg_global_mem_new(cpu_env,
> >                                      offsetof(CPUPPCState, fpscr),
> "fpscr");
> > @@ -3979,78 +3983,66 @@ static void gen_lqarx(DisasContext *ctx)
> >   /* stqcx. */
> >   static void gen_stqcx_(DisasContext *ctx)
> >   {
> > +    TCGLabel *lab_fail, *lab_over;
> >       int rs = rS(ctx->opcode);
> > -    TCGv EA, hi, lo;
> > +    TCGv EA, t0, t1;
> > +    TCGv_i128 cmp, val;
> >
> >       if (unlikely(rs & 1)) {
> >           gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);
> >           return;
> >       }
> >
> > +    lab_fail = gen_new_label();
> > +    lab_over = gen_new_label();
> > +
> >       gen_set_access_type(ctx, ACCESS_RES);
> >       EA = tcg_temp_new();
> >       gen_addr_reg_index(ctx, EA);
> >
> > +    tcg_gen_brcond_tl(TCG_COND_NE, EA, cpu_reserve, lab_fail);
> > +    tcg_temp_free(EA);
> > +
> > +    cmp = tcg_temp_new_i128();
> > +    val = tcg_temp_new_i128();
> > +
> > +    tcg_gen_concat_i64_i128(cmp, cpu_reserve_val2, cpu_reserve_val);
> > +
> >       /* Note that the low part is always in RS+1, even in LE mode.  */
> > -    lo = cpu_gpr[rs + 1];
> > -    hi = cpu_gpr[rs];
> > +    tcg_gen_concat_i64_i128(val, cpu_gpr[rs + 1], cpu_gpr[rs]);
> >
> > -    if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
> > -        if (HAVE_CMPXCHG128) {
> > -            TCGv_i32 oi = tcg_const_i32(DEF_MEMOP(MO_128) | MO_ALIGN);
> > -            if (ctx->le_mode) {
> > -                gen_helper_stqcx_le_parallel(cpu_crf[0], cpu_env,
> > -                                             EA, lo, hi, oi);
> > -            } else {
> > -                gen_helper_stqcx_be_parallel(cpu_crf[0], cpu_env,
> > -                                             EA, lo, hi, oi);
> > -            }
> > -            tcg_temp_free_i32(oi);
> > -        } else {
> > -            /* Restart with exclusive lock.  */
> > -            gen_helper_exit_atomic(cpu_env);
> > -            ctx->base.is_jmp = DISAS_NORETURN;
> > -        }
> > -        tcg_temp_free(EA);
> > -    } else {
> > -        TCGLabel *lab_fail = gen_new_label();
> > -        TCGLabel *lab_over = gen_new_label();
> > -        TCGv_i64 t0 = tcg_temp_new_i64();
> > -        TCGv_i64 t1 = tcg_temp_new_i64();
> > +    tcg_gen_atomic_cmpxchg_i128(val, cpu_reserve, cmp, val,
> ctx->mem_idx,
> > +                                DEF_MEMOP(MO_128 | MO_ALIGN));
> > +    tcg_temp_free_i128(cmp);
> >
> > -        tcg_gen_brcond_tl(TCG_COND_NE, EA, cpu_reserve, lab_fail);
> > -        tcg_temp_free(EA);
> > +    t0 = tcg_temp_new();
> > +    t1 = tcg_temp_new();
> > +    tcg_gen_extr_i128_i64(t1, t0, val);
> > +    tcg_temp_free_i128(val);
> >
> > -        gen_qemu_ld64_i64(ctx, t0, cpu_reserve);
> > -        tcg_gen_ld_i64(t1, cpu_env, (ctx->le_mode
> > -                                     ? offsetof(CPUPPCState,
> reserve_val2)
> > -                                     : offsetof(CPUPPCState,
> reserve_val)));
> > -        tcg_gen_brcond_i64(TCG_COND_NE, t0, t1, lab_fail);
> > +    tcg_gen_xor_tl(t1, t1, cpu_reserve_val2);
> > +    tcg_gen_xor_tl(t0, t0, cpu_reserve_val);
> > +    tcg_gen_or_tl(t0, t0, t1);
> > +    tcg_temp_free(t1);
> >
> > -        tcg_gen_addi_i64(t0, cpu_reserve, 8);
> > -        gen_qemu_ld64_i64(ctx, t0, t0);
> > -        tcg_gen_ld_i64(t1, cpu_env, (ctx->le_mode
> > -                                     ? offsetof(CPUPPCState,
> reserve_val)
> > -                                     : offsetof(CPUPPCState,
> reserve_val2)));
> > -        tcg_gen_brcond_i64(TCG_COND_NE, t0, t1, lab_fail);
> > +    tcg_gen_setcondi_tl(TCG_COND_EQ, t0, t0, 0);
> > +    tcg_gen_shli_tl(t0, t0, CRF_EQ_BIT);
> > +    tcg_gen_or_tl(t0, t0, cpu_so);
> > +    tcg_gen_trunc_tl_i32(cpu_crf[0], t0);
> > +    tcg_temp_free(t0);
> >
> > -        /* Success */
> > -        gen_qemu_st64_i64(ctx, ctx->le_mode ? lo : hi, cpu_reserve);
> > -        tcg_gen_addi_i64(t0, cpu_reserve, 8);
> > -        gen_qemu_st64_i64(ctx, ctx->le_mode ? hi : lo, t0);
> > +    tcg_gen_br(lab_over);
> > +    gen_set_label(lab_fail);
> >
> > -        tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
> > -        tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], CRF_EQ);
> > -        tcg_gen_br(lab_over);
> > +    /*
> > +     * Address mismatch implies failure.  But we still need to provide
> > +     * the memory barrier semantics of the instruction.
> > +     */
> > +    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
> > +    tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
> >
> > -        gen_set_label(lab_fail);
> > -        tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
> > -
> > -        gen_set_label(lab_over);
> > -        tcg_gen_movi_tl(cpu_reserve, -1);
> > -        tcg_temp_free_i64(t0);
> > -        tcg_temp_free_i64(t1);
> > -    }
> > +    gen_set_label(lab_over);
> > +    tcg_gen_movi_tl(cpu_reserve, -1);
> >   }
> >   #endif /* defined(TARGET_PPC64) */
> >
>
Daniel Henrique Barboza Nov. 17, 2022, 10:03 a.m. UTC | #3
On 11/16/22 23:48, Richard Henderson wrote:
> I think you missed the Based-on tag.
> 
> r~


Duh. Sorry about that. Let me try it again.


Daniel


> 
> On Wed, 16 Nov 2022, 05:38 Daniel Henrique Barboza, <danielhb413@gmail.com <mailto:danielhb413@gmail.com>> wrote:
> 
>     Richard,
> 
>     I believe the ppc64-linux-user target didn't like what you did in this
>     patch. Here's the error:
> 
>     $ ../configure --target-list=ppc64-softmmu,ppc64-linux-user,ppc-softmmu,ppc-linux-user,ppc64le-linux-user
>     $ make -j
> 
>     (...)
> 
>     [15/133] Compiling C object libqemu-ppc64-linux-user.fa.p/target_ppc_translate.c.o
>     FAILED: libqemu-ppc64-linux-user.fa.p/target_ppc_translate.c.o
>     cc -m64 -mcx16 -Ilibqemu-ppc64-linux-user.fa.p -I. -I.. -Itarget/ppc -I../target/ppc -I../common-user/host/x86_64 -I../linux-user/include/host/x86_64 -I../linux-user/include -Ilinux-user -I../linux-user -Ilinux-user/ppc -I../linux-user/ppc -Iqapi -Itrace -Iui -Iui/shader -I/usr/include/glib-2.0 -I/usr/lib64/glib-2.0/include -I/usr/include/sysprof-4 -fdiagnostics-color=auto -Wall -Winvalid-pch -Werror -std=gnu11 -O2 -g -isystem /home/danielhb/kvm-project/qemu/linux-headers -isystem linux-headers -iquote . -iquote /home/danielhb/kvm-project/qemu -iquote /home/danielhb/kvm-project/qemu/include -iquote /home/danielhb/kvm-project/qemu/tcg/i386 -pthread -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -Wstrict-prototypes -Wredundant-decls -Wundef -Wwrite-strings -Wmissing-prototypes -fno-strict-aliasing -fno-common -fwrapv -Wold-style-declaration -Wold-style-definition -Wtype-limits -Wformat-security -Wformat-y2k -Winit-self
>     -Wignored-qualifiers -Wempty-body -Wnested-externs -Wendif-labels -Wexpansion-to-defined -Wimplicit-fallthrough=2 -Wno-missing-include-dirs -Wno-shift-negative-value -Wno-psabi -fstack-protector-strong -fPIE -isystem../linux-headers -isystemlinux-headers -DNEED_CPU_H '-DCONFIG_TARGET="ppc64-linux-user-config-target.h"' '-DCONFIG_DEVICES="ppc64-linux-user-config-devices.h"' -MD -MQ libqemu-ppc64-linux-user.fa.p/target_ppc_translate.c.o -MF libqemu-ppc64-linux-user.fa.p/target_ppc_translate.c.o.d -o libqemu-ppc64-linux-user.fa.p/target_ppc_translate.c.o -c ../target/ppc/translate.c
>     ../target/ppc/translate.c: In function ‘gen_stqcx_’:
>     ../target/ppc/translate.c:3989:5: error: unknown type name ‘TCGv_i128’; did you mean ‘TCGv_i32’?
>        3989 |     TCGv_i128 cmp, val;
>             |     ^~~~~~~~~
>             |     TCGv_i32
>     ../target/ppc/translate.c:4006:11: error: implicit declaration of function ‘tcg_temp_new_i128’; did you mean ‘tcg_temp_new_i32’? [-Werror=implicit-function-declaration]
>        4006 |     cmp = tcg_temp_new_i128();
>             |           ^~~~~~~~~~~~~~~~~
>             |           tcg_temp_new_i32
>     ../target/ppc/translate.c:4006:11: error: nested extern declaration of ‘tcg_temp_new_i128’ [-Werror=nested-externs]
>     ../target/ppc/translate.c:4009:5: error: implicit declaration of function ‘tcg_gen_concat_i64_i128’; did you mean ‘tcg_gen_concat_i32_i64’? [-Werror=implicit-function-declaration]
>        4009 |     tcg_gen_concat_i64_i128(cmp, cpu_reserve_val2, cpu_reserve_val);
>             |     ^~~~~~~~~~~~~~~~~~~~~~~
>             |     tcg_gen_concat_i32_i64
>     ../target/ppc/translate.c:4009:5: error: nested extern declaration of ‘tcg_gen_concat_i64_i128’ [-Werror=nested-externs]
>     ../target/ppc/translate.c:4014:5: error: implicit declaration of function ‘tcg_gen_atomic_cmpxchg_i128’; did you mean ‘tcg_gen_atomic_cmpxchg_i32’? [-Werror=implicit-function-declaration]
>        4014 |     tcg_gen_atomic_cmpxchg_i128(val, cpu_reserve, cmp, val, ctx->mem_idx,
>             |     ^~~~~~~~~~~~~~~~~~~~~~~~~~~
>             |     tcg_gen_atomic_cmpxchg_i32
>     ../target/ppc/translate.c:4014:5: error: nested extern declaration of ‘tcg_gen_atomic_cmpxchg_i128’ [-Werror=nested-externs]
>     ../target/ppc/translate.c:4016:5: error: implicit declaration of function ‘tcg_temp_free_i128’; did you mean ‘tcg_temp_free_i32’? [-Werror=implicit-function-declaration]
>        4016 |     tcg_temp_free_i128(cmp);
>             |     ^~~~~~~~~~~~~~~~~~
>             |     tcg_temp_free_i32
>     ../target/ppc/translate.c:4016:5: error: nested extern declaration of ‘tcg_temp_free_i128’ [-Werror=nested-externs]
>     ../target/ppc/translate.c:4020:5: error: implicit declaration of function ‘tcg_gen_extr_i128_i64’; did you mean ‘tcg_gen_ext_i32_i64’? [-Werror=implicit-function-declaration]
>        4020 |     tcg_gen_extr_i128_i64(t1, t0, val);
>             |     ^~~~~~~~~~~~~~~~~~~~~
>             |     tcg_gen_ext_i32_i64
>     ../target/ppc/translate.c:4020:5: error: nested extern declaration of ‘tcg_gen_extr_i128_i64’ [-Werror=nested-externs]
>     cc1: all warnings being treated as errors
>     [16/133] Compiling C object libqemu-ppc64-softmmu.fa.p/target_ppc_mmu_helper.c.o
>     [17/133] Compiling C object libqemu-ppc64-softmmu.fa.p/target_ppc_translate.c.o
>     FAILED: libqemu-ppc64-softmmu.fa.p/target_ppc_translate.c.o
> 
> 
>     Thanks,
> 
> 
>     Daniel
> 
> 
>     On 11/12/22 03:11, Richard Henderson wrote:
>      > Note that the previous direct reference to reserve_val,
>      >
>      > -   tcg_gen_ld_i64(t1, cpu_env, (ctx->le_mode
>      > -                                ? offsetof(CPUPPCState, reserve_val2)
>      > -                                : offsetof(CPUPPCState, reserve_val)));
>      >
>      > was incorrect because all references should have gone through
>      > cpu_reserve_val.  Create a cpu_reserve_val2 tcg temp to fix this.
>      >
>      > Signed-off-by: Richard Henderson <richard.henderson@linaro.org <mailto:richard.henderson@linaro.org>>
>      > ---
>      >   target/ppc/helper.h     |   2 -
>      >   target/ppc/mem_helper.c |  44 -----------------
>      >   target/ppc/translate.c  | 102 ++++++++++++++++++----------------------
>      >   3 files changed, 47 insertions(+), 101 deletions(-)
>      >
>      > diff --git a/target/ppc/helper.h b/target/ppc/helper.h
>      > index 8dd22a35e4..0beaca5c7a 100644
>      > --- a/target/ppc/helper.h
>      > +++ b/target/ppc/helper.h
>      > @@ -818,6 +818,4 @@ DEF_HELPER_FLAGS_5(stq_le_parallel, TCG_CALL_NO_WG,
>      >                      void, env, tl, i64, i64, i32)
>      >   DEF_HELPER_FLAGS_5(stq_be_parallel, TCG_CALL_NO_WG,
>      >                      void, env, tl, i64, i64, i32)
>      > -DEF_HELPER_5(stqcx_le_parallel, i32, env, tl, i64, i64, i32)
>      > -DEF_HELPER_5(stqcx_be_parallel, i32, env, tl, i64, i64, i32)
>      >   #endif
>      > diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c
>      > index d1163f316c..1578887a8f 100644
>      > --- a/target/ppc/mem_helper.c
>      > +++ b/target/ppc/mem_helper.c
>      > @@ -413,50 +413,6 @@ void helper_stq_be_parallel(CPUPPCState *env, target_ulong addr,
>      >       val = int128_make128(lo, hi);
>      >       cpu_atomic_sto_be_mmu(env, addr, val, opidx, GETPC());
>      >   }
>      > -
>      > -uint32_t helper_stqcx_le_parallel(CPUPPCState *env, target_ulong addr,
>      > -                                  uint64_t new_lo, uint64_t new_hi,
>      > -                                  uint32_t opidx)
>      > -{
>      > -    bool success = false;
>      > -
>      > -    /* We will have raised EXCP_ATOMIC from the translator.  */
>      > -    assert(HAVE_CMPXCHG128);
>      > -
>      > -    if (likely(addr == env->reserve_addr)) {
>      > -        Int128 oldv, cmpv, newv;
>      > -
>      > -        cmpv = int128_make128(env->reserve_val2, env->reserve_val);
>      > -        newv = int128_make128(new_lo, new_hi);
>      > -        oldv = cpu_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv,
>      > -                                          opidx, GETPC());
>      > -        success = int128_eq(oldv, cmpv);
>      > -    }
>      > -    env->reserve_addr = -1;
>      > -    return env->so + success * CRF_EQ_BIT;
>      > -}
>      > -
>      > -uint32_t helper_stqcx_be_parallel(CPUPPCState *env, target_ulong addr,
>      > -                                  uint64_t new_lo, uint64_t new_hi,
>      > -                                  uint32_t opidx)
>      > -{
>      > -    bool success = false;
>      > -
>      > -    /* We will have raised EXCP_ATOMIC from the translator.  */
>      > -    assert(HAVE_CMPXCHG128);
>      > -
>      > -    if (likely(addr == env->reserve_addr)) {
>      > -        Int128 oldv, cmpv, newv;
>      > -
>      > -        cmpv = int128_make128(env->reserve_val2, env->reserve_val);
>      > -        newv = int128_make128(new_lo, new_hi);
>      > -        oldv = cpu_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv,
>      > -                                          opidx, GETPC());
>      > -        success = int128_eq(oldv, cmpv);
>      > -    }
>      > -    env->reserve_addr = -1;
>      > -    return env->so + success * CRF_EQ_BIT;
>      > -}
>      >   #endif
>      >
>      >   /*****************************************************************************/
>      > diff --git a/target/ppc/translate.c b/target/ppc/translate.c
>      > index 19c1d17cb0..85f95a9045 100644
>      > --- a/target/ppc/translate.c
>      > +++ b/target/ppc/translate.c
>      > @@ -72,6 +72,7 @@ static TCGv cpu_cfar;
>      >   static TCGv cpu_xer, cpu_so, cpu_ov, cpu_ca, cpu_ov32, cpu_ca32;
>      >   static TCGv cpu_reserve;
>      >   static TCGv cpu_reserve_val;
>      > +static TCGv cpu_reserve_val2;
>      >   static TCGv cpu_fpscr;
>      >   static TCGv_i32 cpu_access_type;
>      >
>      > @@ -141,8 +142,11 @@ void ppc_translate_init(void)
>      >                                        offsetof(CPUPPCState, reserve_addr),
>      >                                        "reserve_addr");
>      >       cpu_reserve_val = tcg_global_mem_new(cpu_env,
>      > -                                     offsetof(CPUPPCState, reserve_val),
>      > -                                     "reserve_val");
>      > +                                         offsetof(CPUPPCState, reserve_val),
>      > +                                         "reserve_val");
>      > +    cpu_reserve_val2 = tcg_global_mem_new(cpu_env,
>      > +                                          offsetof(CPUPPCState, reserve_val2),
>      > +                                          "reserve_val2");
>      >
>      >       cpu_fpscr = tcg_global_mem_new(cpu_env,
>      >                                      offsetof(CPUPPCState, fpscr), "fpscr");
>      > @@ -3979,78 +3983,66 @@ static void gen_lqarx(DisasContext *ctx)
>      >   /* stqcx. */
>      >   static void gen_stqcx_(DisasContext *ctx)
>      >   {
>      > +    TCGLabel *lab_fail, *lab_over;
>      >       int rs = rS(ctx->opcode);
>      > -    TCGv EA, hi, lo;
>      > +    TCGv EA, t0, t1;
>      > +    TCGv_i128 cmp, val;
>      >
>      >       if (unlikely(rs & 1)) {
>      >           gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);
>      >           return;
>      >       }
>      >
>      > +    lab_fail = gen_new_label();
>      > +    lab_over = gen_new_label();
>      > +
>      >       gen_set_access_type(ctx, ACCESS_RES);
>      >       EA = tcg_temp_new();
>      >       gen_addr_reg_index(ctx, EA);
>      >
>      > +    tcg_gen_brcond_tl(TCG_COND_NE, EA, cpu_reserve, lab_fail);
>      > +    tcg_temp_free(EA);
>      > +
>      > +    cmp = tcg_temp_new_i128();
>      > +    val = tcg_temp_new_i128();
>      > +
>      > +    tcg_gen_concat_i64_i128(cmp, cpu_reserve_val2, cpu_reserve_val);
>      > +
>      >       /* Note that the low part is always in RS+1, even in LE mode.  */
>      > -    lo = cpu_gpr[rs + 1];
>      > -    hi = cpu_gpr[rs];
>      > +    tcg_gen_concat_i64_i128(val, cpu_gpr[rs + 1], cpu_gpr[rs]);
>      >
>      > -    if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
>      > -        if (HAVE_CMPXCHG128) {
>      > -            TCGv_i32 oi = tcg_const_i32(DEF_MEMOP(MO_128) | MO_ALIGN);
>      > -            if (ctx->le_mode) {
>      > -                gen_helper_stqcx_le_parallel(cpu_crf[0], cpu_env,
>      > -                                             EA, lo, hi, oi);
>      > -            } else {
>      > -                gen_helper_stqcx_be_parallel(cpu_crf[0], cpu_env,
>      > -                                             EA, lo, hi, oi);
>      > -            }
>      > -            tcg_temp_free_i32(oi);
>      > -        } else {
>      > -            /* Restart with exclusive lock.  */
>      > -            gen_helper_exit_atomic(cpu_env);
>      > -            ctx->base.is_jmp = DISAS_NORETURN;
>      > -        }
>      > -        tcg_temp_free(EA);
>      > -    } else {
>      > -        TCGLabel *lab_fail = gen_new_label();
>      > -        TCGLabel *lab_over = gen_new_label();
>      > -        TCGv_i64 t0 = tcg_temp_new_i64();
>      > -        TCGv_i64 t1 = tcg_temp_new_i64();
>      > +    tcg_gen_atomic_cmpxchg_i128(val, cpu_reserve, cmp, val, ctx->mem_idx,
>      > +                                DEF_MEMOP(MO_128 | MO_ALIGN));
>      > +    tcg_temp_free_i128(cmp);
>      >
>      > -        tcg_gen_brcond_tl(TCG_COND_NE, EA, cpu_reserve, lab_fail);
>      > -        tcg_temp_free(EA);
>      > +    t0 = tcg_temp_new();
>      > +    t1 = tcg_temp_new();
>      > +    tcg_gen_extr_i128_i64(t1, t0, val);
>      > +    tcg_temp_free_i128(val);
>      >
>      > -        gen_qemu_ld64_i64(ctx, t0, cpu_reserve);
>      > -        tcg_gen_ld_i64(t1, cpu_env, (ctx->le_mode
>      > -                                     ? offsetof(CPUPPCState, reserve_val2)
>      > -                                     : offsetof(CPUPPCState, reserve_val)));
>      > -        tcg_gen_brcond_i64(TCG_COND_NE, t0, t1, lab_fail);
>      > +    tcg_gen_xor_tl(t1, t1, cpu_reserve_val2);
>      > +    tcg_gen_xor_tl(t0, t0, cpu_reserve_val);
>      > +    tcg_gen_or_tl(t0, t0, t1);
>      > +    tcg_temp_free(t1);
>      >
>      > -        tcg_gen_addi_i64(t0, cpu_reserve, 8);
>      > -        gen_qemu_ld64_i64(ctx, t0, t0);
>      > -        tcg_gen_ld_i64(t1, cpu_env, (ctx->le_mode
>      > -                                     ? offsetof(CPUPPCState, reserve_val)
>      > -                                     : offsetof(CPUPPCState, reserve_val2)));
>      > -        tcg_gen_brcond_i64(TCG_COND_NE, t0, t1, lab_fail);
>      > +    tcg_gen_setcondi_tl(TCG_COND_EQ, t0, t0, 0);
>      > +    tcg_gen_shli_tl(t0, t0, CRF_EQ_BIT);
>      > +    tcg_gen_or_tl(t0, t0, cpu_so);
>      > +    tcg_gen_trunc_tl_i32(cpu_crf[0], t0);
>      > +    tcg_temp_free(t0);
>      >
>      > -        /* Success */
>      > -        gen_qemu_st64_i64(ctx, ctx->le_mode ? lo : hi, cpu_reserve);
>      > -        tcg_gen_addi_i64(t0, cpu_reserve, 8);
>      > -        gen_qemu_st64_i64(ctx, ctx->le_mode ? hi : lo, t0);
>      > +    tcg_gen_br(lab_over);
>      > +    gen_set_label(lab_fail);
>      >
>      > -        tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
>      > -        tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], CRF_EQ);
>      > -        tcg_gen_br(lab_over);
>      > +    /*
>      > +     * Address mismatch implies failure.  But we still need to provide
>      > +     * the memory barrier semantics of the instruction.
>      > +     */
>      > +    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
>      > +    tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
>      >
>      > -        gen_set_label(lab_fail);
>      > -        tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
>      > -
>      > -        gen_set_label(lab_over);
>      > -        tcg_gen_movi_tl(cpu_reserve, -1);
>      > -        tcg_temp_free_i64(t0);
>      > -        tcg_temp_free_i64(t1);
>      > -    }
>      > +    gen_set_label(lab_over);
>      > +    tcg_gen_movi_tl(cpu_reserve, -1);
>      >   }
>      >   #endif /* defined(TARGET_PPC64) */
>      >
>
Daniel Henrique Barboza Nov. 21, 2022, 10:37 p.m. UTC | #4
On 11/12/22 03:11, Richard Henderson wrote:
> Note that the previous direct reference to reserve_val,
> 
> -   tcg_gen_ld_i64(t1, cpu_env, (ctx->le_mode
> -                                ? offsetof(CPUPPCState, reserve_val2)
> -                                : offsetof(CPUPPCState, reserve_val)));
> 
> was incorrect because all references should have gone through
> cpu_reserve_val.  Create a cpu_reserve_val2 tcg temp to fix this.
> 
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---

Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>


Richard, since this depends on your int128 with helpers, do you want to
queue this in the same queue?



Daniel

>   target/ppc/helper.h     |   2 -
>   target/ppc/mem_helper.c |  44 -----------------
>   target/ppc/translate.c  | 102 ++++++++++++++++++----------------------
>   3 files changed, 47 insertions(+), 101 deletions(-)
> 
> diff --git a/target/ppc/helper.h b/target/ppc/helper.h
> index 8dd22a35e4..0beaca5c7a 100644
> --- a/target/ppc/helper.h
> +++ b/target/ppc/helper.h
> @@ -818,6 +818,4 @@ DEF_HELPER_FLAGS_5(stq_le_parallel, TCG_CALL_NO_WG,
>                      void, env, tl, i64, i64, i32)
>   DEF_HELPER_FLAGS_5(stq_be_parallel, TCG_CALL_NO_WG,
>                      void, env, tl, i64, i64, i32)
> -DEF_HELPER_5(stqcx_le_parallel, i32, env, tl, i64, i64, i32)
> -DEF_HELPER_5(stqcx_be_parallel, i32, env, tl, i64, i64, i32)
>   #endif
> diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c
> index d1163f316c..1578887a8f 100644
> --- a/target/ppc/mem_helper.c
> +++ b/target/ppc/mem_helper.c
> @@ -413,50 +413,6 @@ void helper_stq_be_parallel(CPUPPCState *env, target_ulong addr,
>       val = int128_make128(lo, hi);
>       cpu_atomic_sto_be_mmu(env, addr, val, opidx, GETPC());
>   }
> -
> -uint32_t helper_stqcx_le_parallel(CPUPPCState *env, target_ulong addr,
> -                                  uint64_t new_lo, uint64_t new_hi,
> -                                  uint32_t opidx)
> -{
> -    bool success = false;
> -
> -    /* We will have raised EXCP_ATOMIC from the translator.  */
> -    assert(HAVE_CMPXCHG128);
> -
> -    if (likely(addr == env->reserve_addr)) {
> -        Int128 oldv, cmpv, newv;
> -
> -        cmpv = int128_make128(env->reserve_val2, env->reserve_val);
> -        newv = int128_make128(new_lo, new_hi);
> -        oldv = cpu_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv,
> -                                          opidx, GETPC());
> -        success = int128_eq(oldv, cmpv);
> -    }
> -    env->reserve_addr = -1;
> -    return env->so + success * CRF_EQ_BIT;
> -}
> -
> -uint32_t helper_stqcx_be_parallel(CPUPPCState *env, target_ulong addr,
> -                                  uint64_t new_lo, uint64_t new_hi,
> -                                  uint32_t opidx)
> -{
> -    bool success = false;
> -
> -    /* We will have raised EXCP_ATOMIC from the translator.  */
> -    assert(HAVE_CMPXCHG128);
> -
> -    if (likely(addr == env->reserve_addr)) {
> -        Int128 oldv, cmpv, newv;
> -
> -        cmpv = int128_make128(env->reserve_val2, env->reserve_val);
> -        newv = int128_make128(new_lo, new_hi);
> -        oldv = cpu_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv,
> -                                          opidx, GETPC());
> -        success = int128_eq(oldv, cmpv);
> -    }
> -    env->reserve_addr = -1;
> -    return env->so + success * CRF_EQ_BIT;
> -}
>   #endif
>   
>   /*****************************************************************************/
> diff --git a/target/ppc/translate.c b/target/ppc/translate.c
> index 19c1d17cb0..85f95a9045 100644
> --- a/target/ppc/translate.c
> +++ b/target/ppc/translate.c
> @@ -72,6 +72,7 @@ static TCGv cpu_cfar;
>   static TCGv cpu_xer, cpu_so, cpu_ov, cpu_ca, cpu_ov32, cpu_ca32;
>   static TCGv cpu_reserve;
>   static TCGv cpu_reserve_val;
> +static TCGv cpu_reserve_val2;
>   static TCGv cpu_fpscr;
>   static TCGv_i32 cpu_access_type;
>   
> @@ -141,8 +142,11 @@ void ppc_translate_init(void)
>                                        offsetof(CPUPPCState, reserve_addr),
>                                        "reserve_addr");
>       cpu_reserve_val = tcg_global_mem_new(cpu_env,
> -                                     offsetof(CPUPPCState, reserve_val),
> -                                     "reserve_val");
> +                                         offsetof(CPUPPCState, reserve_val),
> +                                         "reserve_val");
> +    cpu_reserve_val2 = tcg_global_mem_new(cpu_env,
> +                                          offsetof(CPUPPCState, reserve_val2),
> +                                          "reserve_val2");
>   
>       cpu_fpscr = tcg_global_mem_new(cpu_env,
>                                      offsetof(CPUPPCState, fpscr), "fpscr");
> @@ -3979,78 +3983,66 @@ static void gen_lqarx(DisasContext *ctx)
>   /* stqcx. */
>   static void gen_stqcx_(DisasContext *ctx)
>   {
> +    TCGLabel *lab_fail, *lab_over;
>       int rs = rS(ctx->opcode);
> -    TCGv EA, hi, lo;
> +    TCGv EA, t0, t1;
> +    TCGv_i128 cmp, val;
>   
>       if (unlikely(rs & 1)) {
>           gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);
>           return;
>       }
>   
> +    lab_fail = gen_new_label();
> +    lab_over = gen_new_label();
> +
>       gen_set_access_type(ctx, ACCESS_RES);
>       EA = tcg_temp_new();
>       gen_addr_reg_index(ctx, EA);
>   
> +    tcg_gen_brcond_tl(TCG_COND_NE, EA, cpu_reserve, lab_fail);
> +    tcg_temp_free(EA);
> +
> +    cmp = tcg_temp_new_i128();
> +    val = tcg_temp_new_i128();
> +
> +    tcg_gen_concat_i64_i128(cmp, cpu_reserve_val2, cpu_reserve_val);
> +
>       /* Note that the low part is always in RS+1, even in LE mode.  */
> -    lo = cpu_gpr[rs + 1];
> -    hi = cpu_gpr[rs];
> +    tcg_gen_concat_i64_i128(val, cpu_gpr[rs + 1], cpu_gpr[rs]);
>   
> -    if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
> -        if (HAVE_CMPXCHG128) {
> -            TCGv_i32 oi = tcg_const_i32(DEF_MEMOP(MO_128) | MO_ALIGN);
> -            if (ctx->le_mode) {
> -                gen_helper_stqcx_le_parallel(cpu_crf[0], cpu_env,
> -                                             EA, lo, hi, oi);
> -            } else {
> -                gen_helper_stqcx_be_parallel(cpu_crf[0], cpu_env,
> -                                             EA, lo, hi, oi);
> -            }
> -            tcg_temp_free_i32(oi);
> -        } else {
> -            /* Restart with exclusive lock.  */
> -            gen_helper_exit_atomic(cpu_env);
> -            ctx->base.is_jmp = DISAS_NORETURN;
> -        }
> -        tcg_temp_free(EA);
> -    } else {
> -        TCGLabel *lab_fail = gen_new_label();
> -        TCGLabel *lab_over = gen_new_label();
> -        TCGv_i64 t0 = tcg_temp_new_i64();
> -        TCGv_i64 t1 = tcg_temp_new_i64();
> +    tcg_gen_atomic_cmpxchg_i128(val, cpu_reserve, cmp, val, ctx->mem_idx,
> +                                DEF_MEMOP(MO_128 | MO_ALIGN));
> +    tcg_temp_free_i128(cmp);
>   
> -        tcg_gen_brcond_tl(TCG_COND_NE, EA, cpu_reserve, lab_fail);
> -        tcg_temp_free(EA);
> +    t0 = tcg_temp_new();
> +    t1 = tcg_temp_new();
> +    tcg_gen_extr_i128_i64(t1, t0, val);
> +    tcg_temp_free_i128(val);
>   
> -        gen_qemu_ld64_i64(ctx, t0, cpu_reserve);
> -        tcg_gen_ld_i64(t1, cpu_env, (ctx->le_mode
> -                                     ? offsetof(CPUPPCState, reserve_val2)
> -                                     : offsetof(CPUPPCState, reserve_val)));
> -        tcg_gen_brcond_i64(TCG_COND_NE, t0, t1, lab_fail);
> +    tcg_gen_xor_tl(t1, t1, cpu_reserve_val2);
> +    tcg_gen_xor_tl(t0, t0, cpu_reserve_val);
> +    tcg_gen_or_tl(t0, t0, t1);
> +    tcg_temp_free(t1);
>   
> -        tcg_gen_addi_i64(t0, cpu_reserve, 8);
> -        gen_qemu_ld64_i64(ctx, t0, t0);
> -        tcg_gen_ld_i64(t1, cpu_env, (ctx->le_mode
> -                                     ? offsetof(CPUPPCState, reserve_val)
> -                                     : offsetof(CPUPPCState, reserve_val2)));
> -        tcg_gen_brcond_i64(TCG_COND_NE, t0, t1, lab_fail);
> +    tcg_gen_setcondi_tl(TCG_COND_EQ, t0, t0, 0);
> +    tcg_gen_shli_tl(t0, t0, CRF_EQ_BIT);
> +    tcg_gen_or_tl(t0, t0, cpu_so);
> +    tcg_gen_trunc_tl_i32(cpu_crf[0], t0);
> +    tcg_temp_free(t0);
>   
> -        /* Success */
> -        gen_qemu_st64_i64(ctx, ctx->le_mode ? lo : hi, cpu_reserve);
> -        tcg_gen_addi_i64(t0, cpu_reserve, 8);
> -        gen_qemu_st64_i64(ctx, ctx->le_mode ? hi : lo, t0);
> +    tcg_gen_br(lab_over);
> +    gen_set_label(lab_fail);
>   
> -        tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
> -        tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], CRF_EQ);
> -        tcg_gen_br(lab_over);
> +    /*
> +     * Address mismatch implies failure.  But we still need to provide
> +     * the memory barrier semantics of the instruction.
> +     */
> +    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
> +    tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
>   
> -        gen_set_label(lab_fail);
> -        tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
> -
> -        gen_set_label(lab_over);
> -        tcg_gen_movi_tl(cpu_reserve, -1);
> -        tcg_temp_free_i64(t0);
> -        tcg_temp_free_i64(t1);
> -    }
> +    gen_set_label(lab_over);
> +    tcg_gen_movi_tl(cpu_reserve, -1);
>   }
>   #endif /* defined(TARGET_PPC64) */
>
Richard Henderson Nov. 21, 2022, 10:53 p.m. UTC | #5
On 11/21/22 14:37, Daniel Henrique Barboza wrote:
> 
> 
> On 11/12/22 03:11, Richard Henderson wrote:
>> Note that the previous direct reference to reserve_val,
>>
>> -   tcg_gen_ld_i64(t1, cpu_env, (ctx->le_mode
>> -                                ? offsetof(CPUPPCState, reserve_val2)
>> -                                : offsetof(CPUPPCState, reserve_val)));
>>
>> was incorrect because all references should have gone through
>> cpu_reserve_val.  Create a cpu_reserve_val2 tcg temp to fix this.
>>
>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
>> ---
> 
> Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>
> 
> 
> Richard, since this depends on your int128 with helpers, do you want to
> queue this in the same queue?


I certainly can.  Thanks,


r~
diff mbox series

Patch

diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 8dd22a35e4..0beaca5c7a 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -818,6 +818,4 @@  DEF_HELPER_FLAGS_5(stq_le_parallel, TCG_CALL_NO_WG,
                    void, env, tl, i64, i64, i32)
 DEF_HELPER_FLAGS_5(stq_be_parallel, TCG_CALL_NO_WG,
                    void, env, tl, i64, i64, i32)
-DEF_HELPER_5(stqcx_le_parallel, i32, env, tl, i64, i64, i32)
-DEF_HELPER_5(stqcx_be_parallel, i32, env, tl, i64, i64, i32)
 #endif
diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c
index d1163f316c..1578887a8f 100644
--- a/target/ppc/mem_helper.c
+++ b/target/ppc/mem_helper.c
@@ -413,50 +413,6 @@  void helper_stq_be_parallel(CPUPPCState *env, target_ulong addr,
     val = int128_make128(lo, hi);
     cpu_atomic_sto_be_mmu(env, addr, val, opidx, GETPC());
 }
-
-uint32_t helper_stqcx_le_parallel(CPUPPCState *env, target_ulong addr,
-                                  uint64_t new_lo, uint64_t new_hi,
-                                  uint32_t opidx)
-{
-    bool success = false;
-
-    /* We will have raised EXCP_ATOMIC from the translator.  */
-    assert(HAVE_CMPXCHG128);
-
-    if (likely(addr == env->reserve_addr)) {
-        Int128 oldv, cmpv, newv;
-
-        cmpv = int128_make128(env->reserve_val2, env->reserve_val);
-        newv = int128_make128(new_lo, new_hi);
-        oldv = cpu_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv,
-                                          opidx, GETPC());
-        success = int128_eq(oldv, cmpv);
-    }
-    env->reserve_addr = -1;
-    return env->so + success * CRF_EQ_BIT;
-}
-
-uint32_t helper_stqcx_be_parallel(CPUPPCState *env, target_ulong addr,
-                                  uint64_t new_lo, uint64_t new_hi,
-                                  uint32_t opidx)
-{
-    bool success = false;
-
-    /* We will have raised EXCP_ATOMIC from the translator.  */
-    assert(HAVE_CMPXCHG128);
-
-    if (likely(addr == env->reserve_addr)) {
-        Int128 oldv, cmpv, newv;
-
-        cmpv = int128_make128(env->reserve_val2, env->reserve_val);
-        newv = int128_make128(new_lo, new_hi);
-        oldv = cpu_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv,
-                                          opidx, GETPC());
-        success = int128_eq(oldv, cmpv);
-    }
-    env->reserve_addr = -1;
-    return env->so + success * CRF_EQ_BIT;
-}
 #endif
 
 /*****************************************************************************/
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 19c1d17cb0..85f95a9045 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -72,6 +72,7 @@  static TCGv cpu_cfar;
 static TCGv cpu_xer, cpu_so, cpu_ov, cpu_ca, cpu_ov32, cpu_ca32;
 static TCGv cpu_reserve;
 static TCGv cpu_reserve_val;
+static TCGv cpu_reserve_val2;
 static TCGv cpu_fpscr;
 static TCGv_i32 cpu_access_type;
 
@@ -141,8 +142,11 @@  void ppc_translate_init(void)
                                      offsetof(CPUPPCState, reserve_addr),
                                      "reserve_addr");
     cpu_reserve_val = tcg_global_mem_new(cpu_env,
-                                     offsetof(CPUPPCState, reserve_val),
-                                     "reserve_val");
+                                         offsetof(CPUPPCState, reserve_val),
+                                         "reserve_val");
+    cpu_reserve_val2 = tcg_global_mem_new(cpu_env,
+                                          offsetof(CPUPPCState, reserve_val2),
+                                          "reserve_val2");
 
     cpu_fpscr = tcg_global_mem_new(cpu_env,
                                    offsetof(CPUPPCState, fpscr), "fpscr");
@@ -3979,78 +3983,66 @@  static void gen_lqarx(DisasContext *ctx)
 /* stqcx. */
 static void gen_stqcx_(DisasContext *ctx)
 {
+    TCGLabel *lab_fail, *lab_over;
     int rs = rS(ctx->opcode);
-    TCGv EA, hi, lo;
+    TCGv EA, t0, t1;
+    TCGv_i128 cmp, val;
 
     if (unlikely(rs & 1)) {
         gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);
         return;
     }
 
+    lab_fail = gen_new_label();
+    lab_over = gen_new_label();
+
     gen_set_access_type(ctx, ACCESS_RES);
     EA = tcg_temp_new();
     gen_addr_reg_index(ctx, EA);
 
+    tcg_gen_brcond_tl(TCG_COND_NE, EA, cpu_reserve, lab_fail);
+    tcg_temp_free(EA);
+
+    cmp = tcg_temp_new_i128();
+    val = tcg_temp_new_i128();
+
+    tcg_gen_concat_i64_i128(cmp, cpu_reserve_val2, cpu_reserve_val);
+
     /* Note that the low part is always in RS+1, even in LE mode.  */
-    lo = cpu_gpr[rs + 1];
-    hi = cpu_gpr[rs];
+    tcg_gen_concat_i64_i128(val, cpu_gpr[rs + 1], cpu_gpr[rs]);
 
-    if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
-        if (HAVE_CMPXCHG128) {
-            TCGv_i32 oi = tcg_const_i32(DEF_MEMOP(MO_128) | MO_ALIGN);
-            if (ctx->le_mode) {
-                gen_helper_stqcx_le_parallel(cpu_crf[0], cpu_env,
-                                             EA, lo, hi, oi);
-            } else {
-                gen_helper_stqcx_be_parallel(cpu_crf[0], cpu_env,
-                                             EA, lo, hi, oi);
-            }
-            tcg_temp_free_i32(oi);
-        } else {
-            /* Restart with exclusive lock.  */
-            gen_helper_exit_atomic(cpu_env);
-            ctx->base.is_jmp = DISAS_NORETURN;
-        }
-        tcg_temp_free(EA);
-    } else {
-        TCGLabel *lab_fail = gen_new_label();
-        TCGLabel *lab_over = gen_new_label();
-        TCGv_i64 t0 = tcg_temp_new_i64();
-        TCGv_i64 t1 = tcg_temp_new_i64();
+    tcg_gen_atomic_cmpxchg_i128(val, cpu_reserve, cmp, val, ctx->mem_idx,
+                                DEF_MEMOP(MO_128 | MO_ALIGN));
+    tcg_temp_free_i128(cmp);
 
-        tcg_gen_brcond_tl(TCG_COND_NE, EA, cpu_reserve, lab_fail);
-        tcg_temp_free(EA);
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
+    tcg_gen_extr_i128_i64(t1, t0, val);
+    tcg_temp_free_i128(val);
 
-        gen_qemu_ld64_i64(ctx, t0, cpu_reserve);
-        tcg_gen_ld_i64(t1, cpu_env, (ctx->le_mode
-                                     ? offsetof(CPUPPCState, reserve_val2)
-                                     : offsetof(CPUPPCState, reserve_val)));
-        tcg_gen_brcond_i64(TCG_COND_NE, t0, t1, lab_fail);
+    tcg_gen_xor_tl(t1, t1, cpu_reserve_val2);
+    tcg_gen_xor_tl(t0, t0, cpu_reserve_val);
+    tcg_gen_or_tl(t0, t0, t1);
+    tcg_temp_free(t1);
 
-        tcg_gen_addi_i64(t0, cpu_reserve, 8);
-        gen_qemu_ld64_i64(ctx, t0, t0);
-        tcg_gen_ld_i64(t1, cpu_env, (ctx->le_mode
-                                     ? offsetof(CPUPPCState, reserve_val)
-                                     : offsetof(CPUPPCState, reserve_val2)));
-        tcg_gen_brcond_i64(TCG_COND_NE, t0, t1, lab_fail);
+    tcg_gen_setcondi_tl(TCG_COND_EQ, t0, t0, 0);
+    tcg_gen_shli_tl(t0, t0, CRF_EQ_BIT);
+    tcg_gen_or_tl(t0, t0, cpu_so);
+    tcg_gen_trunc_tl_i32(cpu_crf[0], t0);
+    tcg_temp_free(t0);
 
-        /* Success */
-        gen_qemu_st64_i64(ctx, ctx->le_mode ? lo : hi, cpu_reserve);
-        tcg_gen_addi_i64(t0, cpu_reserve, 8);
-        gen_qemu_st64_i64(ctx, ctx->le_mode ? hi : lo, t0);
+    tcg_gen_br(lab_over);
+    gen_set_label(lab_fail);
 
-        tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
-        tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], CRF_EQ);
-        tcg_gen_br(lab_over);
+    /*
+     * Address mismatch implies failure.  But we still need to provide
+     * the memory barrier semantics of the instruction.
+     */
+    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
+    tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
 
-        gen_set_label(lab_fail);
-        tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
-
-        gen_set_label(lab_over);
-        tcg_gen_movi_tl(cpu_reserve, -1);
-        tcg_temp_free_i64(t0);
-        tcg_temp_free_i64(t1);
-    }
+    gen_set_label(lab_over);
+    tcg_gen_movi_tl(cpu_reserve, -1);
 }
 #endif /* defined(TARGET_PPC64) */