@@ -1119,11 +1119,15 @@ LD_zpri 1010010 .. nreg:2 0.... 111 ... ..... ..... @rpri_load_msz
# SVE load and broadcast quadword (scalar plus scalar)
LD1RQ_zprr 1010010 .. 00 ..... 000 ... ..... ..... \
@rprr_load_msz nreg=0
+LD1RO_zprr 1010010 .. 01 ..... 000 ... ..... ..... \
+ @rprr_load_msz nreg=0
# SVE load and broadcast quadword (scalar plus immediate)
# LD1RQB, LD1RQH, LD1RQS, LD1RQD
LD1RQ_zpri 1010010 .. 00 0.... 001 ... ..... ..... \
@rpri_load_msz nreg=0
+LD1RO_zpri 1010010 .. 01 0.... 001 ... ..... ..... \
+ @rpri_load_msz nreg=0
# SVE 32-bit gather prefetch (scalar plus 32-bit scaled offsets)
PRF 1000010 00 -1 ----- 0-- --- ----- 0 ----
@@ -5641,6 +5641,103 @@ static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a)
return true;
}
+static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
+{
+ unsigned vsz = vec_full_reg_size(s);
+ unsigned vsz_r32;
+ TCGv_ptr t_pg;
+ TCGv_i32 t_desc;
+ int desc, poff, doff;
+
+ if (vsz < 32) {
+ /*
+ * Note that this UNDEFINED check comes after CheckSVEEnabled()
+ * in the ARM pseudocode, which is the sve_access_check() done
+ * in our caller. We should not now return false from the caller.
+ */
+ unallocated_encoding(s);
+ return;
+ }
+
+ /* Load the first octaword using the normal predicated load helpers. */
+
+ poff = pred_full_reg_offset(s, pg);
+ if (vsz > 32) {
+ /*
+ * Zero-extend the first 32 bits of the predicate into a temporary.
+ * This avoids triggering an assert making sure we don't have bits
+ * set within a predicate beyond VQ, but we have lowered VQ to 2
+ * for this load operation.
+ */
+ TCGv_i64 tmp = tcg_temp_new_i64();
+#ifdef HOST_WORDS_BIGENDIAN
+ poff += 4;
+#endif
+ tcg_gen_ld32u_i64(tmp, cpu_env, poff);
+
+ poff = offsetof(CPUARMState, vfp.preg_tmp);
+ tcg_gen_st_i64(tmp, cpu_env, poff);
+ tcg_temp_free_i64(tmp);
+ }
+
+ t_pg = tcg_temp_new_ptr();
+ tcg_gen_addi_ptr(t_pg, cpu_env, poff);
+
+ desc = simd_desc(32, 32, zt);
+ t_desc = tcg_const_i32(desc);
+
+ gen_helper_gvec_mem *fn
+ = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
+ fn(cpu_env, t_pg, addr, t_desc);
+
+ tcg_temp_free_ptr(t_pg);
+ tcg_temp_free_i32(t_desc);
+
+ /*
+ * Replicate that first octaword.
+ * The replication happens in units of 32; if the full vector size
+ * is not a multiple of 32, the final bits are zeroed.
+ */
+ doff = vec_full_reg_offset(s, zt);
+ vsz_r32 = QEMU_ALIGN_DOWN(vsz, 32);
+ if (vsz >= 64) {
+ tcg_gen_gvec_dup_mem(5, doff + 32, doff, vsz_r32 - 32, vsz - 32);
+ } else if (vsz > vsz_r32) {
+ /* Nop move, with side effect of clearing the tail. */
+ tcg_gen_gvec_mov(MO_64, doff, doff, vsz_r32, vsz);
+ }
+}
+
+static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a)
+{
+ if (!dc_isar_feature(aa64_sve2_f64mm, s)) {
+ return false;
+ }
+ if (a->rm == 31) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ TCGv_i64 addr = new_tmp_a64(s);
+ tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
+ tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
+ do_ldro(s, a->rd, a->pg, addr, a->dtype);
+ }
+ return true;
+}
+
+static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a)
+{
+ if (!dc_isar_feature(aa64_sve2_f64mm, s)) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ TCGv_i64 addr = new_tmp_a64(s);
+ tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32);
+ do_ldro(s, a->rd, a->pg, addr, a->dtype);
+ }
+ return true;
+}
+
/* Load and broadcast element. */
static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
{
Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- target/arm/sve.decode | 4 ++ target/arm/translate-sve.c | 97 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 101 insertions(+)