@@ -1587,8 +1587,8 @@ typedef struct CPUArchState {
float_status mmx_status; /* for 3DNow! float ops */
float_status sse_status;
uint32_t mxcsr;
- ZMMReg xmm_regs[CPU_NB_REGS == 8 ? 8 : 32];
- ZMMReg xmm_t0;
+ ZMMReg xmm_regs[CPU_NB_REGS == 8 ? 8 : 32] QEMU_ALIGNED(16);
+ ZMMReg xmm_t0 QEMU_ALIGNED(16);
MMXReg mmx_t0;
uint64_t opmask_regs[NB_OPMASK_REGS];
@@ -23,6 +23,7 @@
#include "disas/disas.h"
#include "exec/exec-all.h"
#include "tcg/tcg-op.h"
+#include "tcg/tcg-op-gvec.h"
#include "exec/cpu_ldst.h"
#include "exec/translator.h"
@@ -2753,10 +2754,8 @@ static inline void gen_sto_env_A0(DisasContext *s, int offset)
static inline void gen_op_movo(DisasContext *s, int d_offset, int s_offset)
{
- tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(0)));
- tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(0)));
- tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(1)));
- tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(1)));
+ int xmm_ofs = offsetof(ZMMReg, ZMM_X(0));
+ tcg_gen_gvec_mov(MO_64, d_offset + xmm_ofs, s_offset + xmm_ofs, 16, 16);
}
static inline void gen_op_movq(DisasContext *s, int d_offset, int s_offset)
Low hanging fruit, using gvec to move 16 bytes. Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- target/i386/cpu.h | 4 ++-- target/i386/tcg/translate.c | 7 +++---- 2 files changed, 5 insertions(+), 6 deletions(-)