Message ID | 20230130214844.1158612-33-richard.henderson@linaro.org |
---|---|
State | Superseded |
Headers | show |
Series | tcg: Support for Int128 with helpers | expand |
On 30.01.23 22:48, Richard Henderson wrote: > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > Cc: David Hildenbrand <david@redhat.com> > Cc: Ilya Leoshkevich <iii@linux.ibm.com> > --- Acked-by: David Hildenbrand <david@redhat.com>
On Mon, 2023-01-30 at 11:48 -1000, Richard Henderson wrote: > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > Cc: David Hildenbrand <david@redhat.com> > Cc: Ilya Leoshkevich <iii@linux.ibm.com> > --- > target/s390x/helper.h | 2 -- > target/s390x/tcg/insn-data.h.inc | 2 +- > target/s390x/tcg/mem_helper.c | 52 ------------------------------ > target/s390x/tcg/translate.c | 55 > +++++++++++++++++++------------- > 4 files changed, 33 insertions(+), 78 deletions(-) Acked-by: Ilya Leoshkevich <iii@linux.ibm.com> I wrote a test for this a while ago [1], but apparently it was lost in the mail. I will post a rebased version here. [1] https://lists.gnu.org/archive/html/qemu-devel/2022-11/msg04506.html
On 2/1/23 03:32, Ilya Leoshkevich wrote: > Add a simple test to prevent regressions. > > Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com> Thanks for re-posting, and sorry for missing the original. Added to the patch set. r~ > --- > tests/tcg/s390x/Makefile.target | 4 ++ > tests/tcg/s390x/cdsg.c | 93 +++++++++++++++++++++++++++++++++ > 2 files changed, 97 insertions(+) > create mode 100644 tests/tcg/s390x/cdsg.c > > diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target > index 1d454270c0e..72ad309b273 100644 > --- a/tests/tcg/s390x/Makefile.target > +++ b/tests/tcg/s390x/Makefile.target > @@ -27,6 +27,10 @@ TESTS+=noexec > TESTS+=div > TESTS+=clst > TESTS+=long-double > +TESTS+=cdsg > + > +cdsg: CFLAGS+=-pthread > +cdsg: LDFLAGS+=-pthread > > Z13_TESTS=vistr > $(Z13_TESTS): CFLAGS+=-march=z13 -O2 > diff --git a/tests/tcg/s390x/cdsg.c b/tests/tcg/s390x/cdsg.c > new file mode 100644 > index 00000000000..800618ff4b4 > --- /dev/null > +++ b/tests/tcg/s390x/cdsg.c > @@ -0,0 +1,93 @@ > +/* > + * Test CDSG instruction. > + * > + * Increment the first half of aligned_quadword by 1, and the second half by 2 > + * from 2 threads. Verify that the result is consistent. > + * > + * SPDX-License-Identifier: GPL-2.0-or-later > + */ > +#include <assert.h> > +#include <pthread.h> > +#include <stdbool.h> > +#include <stdlib.h> > + > +static volatile bool start; > +typedef unsigned long aligned_quadword[2] __attribute__((__aligned__(16))); > +static aligned_quadword val; > +static const int n_iterations = 1000000; > + > +static inline int cdsg(unsigned long *orig0, unsigned long *orig1, > + unsigned long new0, unsigned long new1, > + aligned_quadword *mem) > +{ > + register unsigned long r0 asm("r0"); > + register unsigned long r1 asm("r1"); > + register unsigned long r2 asm("r2"); > + register unsigned long r3 asm("r3"); > + int cc; > + > + r0 = *orig0; > + r1 = *orig1; > + r2 = new0; > + r3 = new1; > + asm("cdsg %[r0],%[r2],%[db2]\n" > + "ipm %[cc]" > + : [r0] "+r" (r0) > + , [r1] "+r" (r1) > + , [db2] "+m" (*mem) > + , [cc] "=r" (cc) > + : [r2] "r" (r2) > + , [r3] "r" (r3) > + : "cc"); > + *orig0 = r0; > + *orig1 = r1; > + > + return (cc >> 28) & 3; > +} > + > +void *cdsg_loop(void *arg) > +{ > + unsigned long orig0, orig1, new0, new1; > + int cc; > + int i; > + > + while (!start) { > + } > + > + orig0 = val[0]; > + orig1 = val[1]; > + for (i = 0; i < n_iterations;) { > + new0 = orig0 + 1; > + new1 = orig1 + 2; > + > + cc = cdsg(&orig0, &orig1, new0, new1, &val); > + > + if (cc == 0) { > + orig0 = new0; > + orig1 = new1; > + i++; > + } else { > + assert(cc == 1); > + } > + } > + > + return NULL; > +} > + > +int main(void) > +{ > + pthread_t thread; > + int ret; > + > + ret = pthread_create(&thread, NULL, cdsg_loop, NULL); > + assert(ret == 0); > + start = true; > + cdsg_loop(NULL); > + ret = pthread_join(thread, NULL); > + assert(ret == 0); > + > + assert(val[0] == n_iterations * 2); > + assert(val[1] == n_iterations * 4); > + > + return EXIT_SUCCESS; > +}
diff --git a/target/s390x/helper.h b/target/s390x/helper.h index bccd3bfca6..341bc51ec2 100644 --- a/target/s390x/helper.h +++ b/target/s390x/helper.h @@ -35,8 +35,6 @@ DEF_HELPER_3(cxgb, i128, env, s64, i32) DEF_HELPER_3(celgb, i64, env, i64, i32) DEF_HELPER_3(cdlgb, i64, env, i64, i32) DEF_HELPER_3(cxlgb, i128, env, i64, i32) -DEF_HELPER_4(cdsg, void, env, i64, i32, i32) -DEF_HELPER_4(cdsg_parallel, void, env, i64, i32, i32) DEF_HELPER_4(csst, i32, env, i32, i64, i64) DEF_HELPER_4(csst_parallel, i32, env, i32, i64, i64) DEF_HELPER_FLAGS_3(aeb, TCG_CALL_NO_WG, i64, env, i64, i64) diff --git a/target/s390x/tcg/insn-data.h.inc b/target/s390x/tcg/insn-data.h.inc index 893f4b48db..9d2d35f084 100644 --- a/target/s390x/tcg/insn-data.h.inc +++ b/target/s390x/tcg/insn-data.h.inc @@ -276,7 +276,7 @@ /* COMPARE DOUBLE AND SWAP */ D(0xbb00, CDS, RS_a, Z, r3_D32, r1_D32, new, r1_D32, cs, 0, MO_TEUQ) D(0xeb31, CDSY, RSY_a, LD, r3_D32, r1_D32, new, r1_D32, cs, 0, MO_TEUQ) - C(0xeb3e, CDSG, RSY_a, Z, 0, 0, 0, 0, cdsg, 0) + C(0xeb3e, CDSG, RSY_a, Z, la2, r3_D64, 0, r1_D64, cdsg, 0) /* COMPARE AND SWAP AND STORE */ C(0xc802, CSST, SSF, CASS, la1, a2, 0, 0, csst, 0) diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c index 49969abda7..d6725fd18c 100644 --- a/target/s390x/tcg/mem_helper.c +++ b/target/s390x/tcg/mem_helper.c @@ -1771,58 +1771,6 @@ uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2, return cc; } -void HELPER(cdsg)(CPUS390XState *env, uint64_t addr, - uint32_t r1, uint32_t r3) -{ - uintptr_t ra = GETPC(); - Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]); - Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]); - Int128 oldv; - uint64_t oldh, oldl; - bool fail; - - check_alignment(env, addr, 16, ra); - - oldh = cpu_ldq_data_ra(env, addr + 0, ra); - oldl = cpu_ldq_data_ra(env, addr + 8, ra); - - oldv = int128_make128(oldl, oldh); - fail = !int128_eq(oldv, cmpv); - if (fail) { - newv = oldv; - } - - cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra); - cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra); - - env->cc_op = fail; - env->regs[r1] = int128_gethi(oldv); - env->regs[r1 + 1] = int128_getlo(oldv); -} - -void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr, - uint32_t r1, uint32_t r3) -{ - uintptr_t ra = GETPC(); - Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]); - Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]); - int mem_idx; - MemOpIdx oi; - Int128 oldv; - bool fail; - - assert(HAVE_CMPXCHG128); - - mem_idx = cpu_mmu_index(env, false); - oi = make_memop_idx(MO_TE | MO_128 | MO_ALIGN, mem_idx); - oldv = cpu_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra); - fail = !int128_eq(oldv, cmpv); - - env->cc_op = fail; - env->regs[r1] = int128_gethi(oldv); - env->regs[r1 + 1] = int128_getlo(oldv); -} - static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1, uint64_t a2, bool parallel) { diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c index d422a1e62b..9ea28b3e52 100644 --- a/target/s390x/tcg/translate.c +++ b/target/s390x/tcg/translate.c @@ -2224,31 +2224,25 @@ static DisasJumpType op_cs(DisasContext *s, DisasOps *o) static DisasJumpType op_cdsg(DisasContext *s, DisasOps *o) { int r1 = get_field(s, r1); - int r3 = get_field(s, r3); - int d2 = get_field(s, d2); - int b2 = get_field(s, b2); - DisasJumpType ret = DISAS_NEXT; - TCGv_i64 addr; - TCGv_i32 t_r1, t_r3; - /* Note that R1:R1+1 = expected value and R3:R3+1 = new value. */ - addr = get_address(s, 0, b2, d2); - t_r1 = tcg_const_i32(r1); - t_r3 = tcg_const_i32(r3); - if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { - gen_helper_cdsg(cpu_env, addr, t_r1, t_r3); - } else if (HAVE_CMPXCHG128) { - gen_helper_cdsg_parallel(cpu_env, addr, t_r1, t_r3); - } else { - gen_helper_exit_atomic(cpu_env); - ret = DISAS_NORETURN; - } - tcg_temp_free_i64(addr); - tcg_temp_free_i32(t_r1); - tcg_temp_free_i32(t_r3); + o->out_128 = tcg_temp_new_i128(); + tcg_gen_concat_i64_i128(o->out_128, regs[r1 + 1], regs[r1]); - set_cc_static(s); - return ret; + /* Note out (R1:R1+1) = expected value and in2 (R3:R3+1) = new value. */ + tcg_gen_atomic_cmpxchg_i128(o->out_128, o->addr1, o->out_128, o->in2_128, + get_mem_index(s), MO_BE | MO_128 | MO_ALIGN); + + /* + * Extract result into cc_dst:cc_src, compare vs the expected value + * in the as yet unmodified input registers, then update CC_OP. + */ + tcg_gen_extr_i128_i64(cc_src, cc_dst, o->out_128); + tcg_gen_xor_i64(cc_dst, cc_dst, regs[r1]); + tcg_gen_xor_i64(cc_src, cc_src, regs[r1 + 1]); + tcg_gen_or_i64(cc_dst, cc_dst, cc_src); + set_cc_nz_u64(s, cc_dst); + + return DISAS_NEXT; } static DisasJumpType op_csst(DisasContext *s, DisasOps *o) @@ -5488,6 +5482,13 @@ static void wout_r1_D32(DisasContext *s, DisasOps *o) } #define SPEC_wout_r1_D32 SPEC_r1_even +static void wout_r1_D64(DisasContext *s, DisasOps *o) +{ + int r1 = get_field(s, r1); + tcg_gen_extr_i128_i64(regs[r1 + 1], regs[r1], o->out_128); +} +#define SPEC_wout_r1_D64 SPEC_r1_even + static void wout_r3_P32(DisasContext *s, DisasOps *o) { int r3 = get_field(s, r3); @@ -5935,6 +5936,14 @@ static void in2_r3(DisasContext *s, DisasOps *o) } #define SPEC_in2_r3 0 +static void in2_r3_D64(DisasContext *s, DisasOps *o) +{ + int r3 = get_field(s, r3); + o->in2_128 = tcg_temp_new_i128(); + tcg_gen_concat_i64_i128(o->in2_128, regs[r3 + 1], regs[r3]); +} +#define SPEC_in2_r3_D64 SPEC_r3_even + static void in2_r3_sr32(DisasContext *s, DisasOps *o) { o->in2 = tcg_temp_new_i64();
Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- Cc: David Hildenbrand <david@redhat.com> Cc: Ilya Leoshkevich <iii@linux.ibm.com> --- target/s390x/helper.h | 2 -- target/s390x/tcg/insn-data.h.inc | 2 +- target/s390x/tcg/mem_helper.c | 52 ------------------------------ target/s390x/tcg/translate.c | 55 +++++++++++++++++++------------- 4 files changed, 33 insertions(+), 78 deletions(-)