tcg/loongarch64: Fill out tcg_out_{ld,st} for vector regs

Message ID	20240510091251.7975-1-richard.henderson@linaro.org
State	Superseded
Headers	show Delivered-To: patch@linaro.org Received-SPF: pass (google.com: domain of qemu-devel-bounces+patch=linaro.org@nongnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; From: Richard Henderson <richard.henderson@linaro.org> To: qemu-devel@nongnu.org Cc: gaosong@loongson.cn, git@xen0n.name, qemu-stable@nongnu.org Subject: [PATCH] tcg/loongarch64: Fill out tcg_out_{ld,st} for vector regs Date: Fri, 10 May 2024 11:12:51 +0200 Message-Id: <20240510091251.7975-1-richard.henderson@linaro.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Received-SPF: pass client-ip=2a00:1450:4864:20::329; envelope-from=richard.henderson@linaro.org; helo=mail-wm1-x329.google.com X-Spam_score_int: -20 X-Spam_score: -2.1 X-Spam_bar: -- X-Spam_report: (-2.1 / 5.0 requ) BAYES_00=-1.9, DKIM_SIGNED=0.1, DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, DKIM_VALID_EF=-0.1, RCVD_IN_DNSWL_NONE=-0.0001, SPF_HELO_NONE=0.001, SPF_PASS=-0.001 autolearn=unavailable autolearn_force=no X-Spam_action: no action Precedence: list Errors-To: qemu-devel-bounces+patch=linaro.org@nongnu.org Sender: qemu-devel-bounces+patch=linaro.org@nongnu.org
Series	tcg/loongarch64: Fill out tcg_out_{ld,st} for vector regs \| expand tcg/loongarch64: Fill out tcg_out_{ld,st} for vector regs

Message ID

20240510091251.7975-1-richard.henderson@linaro.org

State

Superseded

Headers

Received-SPF: pass (google.com: domain of
 qemu-devel-bounces+patch=linaro.org@nongnu.org designates 209.51.188.17 as
 permitted sender) client-ip=209.51.188.17;
From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Cc: gaosong@loongson.cn,
	git@xen0n.name,
	qemu-stable@nongnu.org
Subject: [PATCH] tcg/loongarch64: Fill out tcg_out_{ld,st} for vector regs
Date: Fri, 10 May 2024 11:12:51 +0200
Message-Id: <20240510091251.7975-1-richard.henderson@linaro.org>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
Received-SPF: pass client-ip=2a00:1450:4864:20::329;
 envelope-from=richard.henderson@linaro.org; helo=mail-wm1-x329.google.com
X-Spam_score_int: -20
X-Spam_score: -2.1
X-Spam_bar: --
X-Spam_report: (-2.1 / 5.0 requ) BAYES_00=-1.9, DKIM_SIGNED=0.1,
 DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, DKIM_VALID_EF=-0.1,
 RCVD_IN_DNSWL_NONE=-0.0001, SPF_HELO_NONE=0.001,
 SPF_PASS=-0.001 autolearn=unavailable autolearn_force=no
X-Spam_action: no action
X-BeenThere: qemu-devel@nongnu.org
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: <qemu-devel.nongnu.org>
List-Unsubscribe: <https://lists.nongnu.org/mailman/options/qemu-devel>,
 <mailto:qemu-devel-request@nongnu.org?subject=unsubscribe>
List-Archive: <https://lists.nongnu.org/archive/html/qemu-devel>
List-Post: <mailto:qemu-devel@nongnu.org>
List-Help: <mailto:qemu-devel-request@nongnu.org?subject=help>
List-Subscribe: <https://lists.nongnu.org/mailman/listinfo/qemu-devel>,
 <mailto:qemu-devel-request@nongnu.org?subject=subscribe>
Errors-To: qemu-devel-bounces+patch=linaro.org@nongnu.org
Sender: qemu-devel-bounces+patch=linaro.org@nongnu.org

Series

tcg/loongarch64: Fill out tcg_out_{ld,st} for vector regs | expand

Commit Message

Richard Henderson May 10, 2024, 9:12 a.m. UTC

TCG register spill/fill uses tcg_out_ld/st with all types,
not necessarily going through INDEX_op_{ld,st}_vec.

Cc: qemu-stable@nongnu.org
Fixes: 16288ded944 ("tcg/loongarch64: Lower basic tcg vec ops to LSX")
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2336
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/loongarch64/tcg-target.c.inc | 103 ++++++++++++++++++++++++-------
 1 file changed, 80 insertions(+), 23 deletions(-)

Comments

Song Gao May 10, 2024, 10:08 a.m. UTC | #1

在 2024/5/10 下午5:12, Richard Henderson 写道:
> TCG register spill/fill uses tcg_out_ld/st with all types,
> not necessarily going through INDEX_op_{ld,st}_vec.
>
> Cc: qemu-stable@nongnu.org
> Fixes: 16288ded944 ("tcg/loongarch64: Lower basic tcg vec ops to LSX")
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2336
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>   tcg/loongarch64/tcg-target.c.inc | 103 ++++++++++++++++++++++++-------
>   1 file changed, 80 insertions(+), 23 deletions(-)

Tested-by: Song Gao <gaosong@loongson.cn>
Reviewed-by: Song Gao <gaosong@loongson.cn>

Thanks.
Song Gao
> diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
> index 69c5b8ac4f..06ca1ab11c 100644
> --- a/tcg/loongarch64/tcg-target.c.inc
> +++ b/tcg/loongarch64/tcg-target.c.inc
> @@ -808,18 +808,88 @@ static void tcg_out_ldst(TCGContext *s, LoongArchInsn opc, TCGReg data,
>       }
>   }
>   
> -static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
> -                       TCGReg arg1, intptr_t arg2)
> +static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg dest,
> +                       TCGReg base, intptr_t offset)
>   {
> -    bool is_32bit = type == TCG_TYPE_I32;
> -    tcg_out_ldst(s, is_32bit ? OPC_LD_W : OPC_LD_D, arg, arg1, arg2);
> +    switch (type) {
> +    case TCG_TYPE_I32:
> +        if (dest < TCG_REG_V0) {
> +            tcg_out_ldst(s, OPC_LD_W, dest, base, offset);
> +        } else {
> +            tcg_out_dupm_vec(s, TCG_TYPE_I128, MO_32, dest, base, offset);
> +        }
> +        break;
> +    case TCG_TYPE_I64:
> +        if (dest < TCG_REG_V0) {
> +            tcg_out_ldst(s, OPC_LD_D, dest, base, offset);
> +        } else {
> +            tcg_out_dupm_vec(s, TCG_TYPE_I128, MO_64, dest, base, offset);
> +        }
> +        break;
> +    case TCG_TYPE_V128:
> +        if (-0x800 <= offset && offset <= 0x7ff) {
> +            tcg_out_opc_vld(s, dest, base, offset);
> +        } else {
> +            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset);
> +            tcg_out_opc_vldx(s, dest, base, TCG_REG_TMP0);
> +        }
> +        break;
> +    default:
> +        g_assert_not_reached();
> +    }
>   }
>   
> -static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
> -                       TCGReg arg1, intptr_t arg2)
> +static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
> +                       TCGReg base, intptr_t offset)
>   {
> -    bool is_32bit = type == TCG_TYPE_I32;
> -    tcg_out_ldst(s, is_32bit ? OPC_ST_W : OPC_ST_D, arg, arg1, arg2);
> +    switch (type) {
> +    case TCG_TYPE_I32:
> +        if (src < TCG_REG_V0) {
> +            tcg_out_ldst(s, OPC_ST_W, src, base, offset);
> +        } else {
> +            /* TODO: Could use fst_s, fstx_s */
> +            if (offset < -0x100 || offset > 0xff || (offset & 3)) {
> +                if (-0x800 <= offset && offset <= 0x7ff) {
> +                    tcg_out_opc_addi_d(s, TCG_REG_TMP0, base, offset);
> +                } else {
> +                    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset);
> +                    tcg_out_opc_add_d(s, TCG_REG_TMP0, TCG_REG_TMP0, base);
> +                }
> +                base = TCG_REG_TMP0;
> +                offset = 0;
> +            }
> +            tcg_out_opc_vstelm_w(s, src, base, offset, 0);
> +        }
> +        break;
> +    case TCG_TYPE_I64:
> +        if (src < TCG_REG_V0) {
> +            tcg_out_ldst(s, OPC_ST_D, src, base, offset);
> +        } else {
> +            /* TODO: Could use fst_d, fstx_d */
> +            if (offset < -0x100 || offset > 0xff || (offset & 7)) {
> +                if (-0x800 <= offset && offset <= 0x7ff) {
> +                    tcg_out_opc_addi_d(s, TCG_REG_TMP0, base, offset);
> +                } else {
> +                    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset);
> +                    tcg_out_opc_add_d(s, TCG_REG_TMP0, TCG_REG_TMP0, base);
> +                }
> +                base = TCG_REG_TMP0;
> +                offset = 0;
> +            }
> +            tcg_out_opc_vstelm_d(s, src, base, offset, 0);
> +        }
> +        break;
> +    case TCG_TYPE_V128:
> +        if (-0x800 <= offset && offset <= 0x7ff) {
> +            tcg_out_opc_vst(s, src, base, offset);
> +        } else {
> +            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset);
> +            tcg_out_opc_vstx(s, src, base, TCG_REG_TMP0);
> +        }
> +        break;
> +    default:
> +        g_assert_not_reached();
> +    }
>   }
>   
>   static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
> @@ -1740,7 +1810,6 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
>   {
>       TCGType type = vecl + TCG_TYPE_V64;
>       TCGArg a0, a1, a2, a3;
> -    TCGReg temp = TCG_REG_TMP0;
>       TCGReg temp_vec = TCG_VEC_TMP0;
>   
>       static const LoongArchInsn cmp_vec_insn[16][4] = {
> @@ -1820,22 +1889,10 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
>   
>       switch (opc) {
>       case INDEX_op_st_vec:
> -        /* Try to fit vst imm */
> -        if (-0x800 <= a2 && a2 <= 0x7ff) {
> -            tcg_out_opc_vst(s, a0, a1, a2);
> -        } else {
> -            tcg_out_movi(s, TCG_TYPE_I64, temp, a2);
> -            tcg_out_opc_vstx(s, a0, a1, temp);
> -        }
> +        tcg_out_st(s, type, a0, a1, a2);
>           break;
>       case INDEX_op_ld_vec:
> -        /* Try to fit vld imm */
> -        if (-0x800 <= a2 && a2 <= 0x7ff) {
> -            tcg_out_opc_vld(s, a0, a1, a2);
> -        } else {
> -            tcg_out_movi(s, TCG_TYPE_I64, temp, a2);
> -            tcg_out_opc_vldx(s, a0, a1, temp);
> -        }
> +        tcg_out_ld(s, type, a0, a1, a2);
>           break;
>       case INDEX_op_and_vec:
>           tcg_out_opc_vand_v(s, a0, a1, a2);

diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index 69c5b8ac4f..06ca1ab11c 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -808,18 +808,88 @@  static void tcg_out_ldst(TCGContext *s, LoongArchInsn opc, TCGReg data,
     }
 }
 
-static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
-                       TCGReg arg1, intptr_t arg2)
+static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg dest,
+                       TCGReg base, intptr_t offset)
 {
-    bool is_32bit = type == TCG_TYPE_I32;
-    tcg_out_ldst(s, is_32bit ? OPC_LD_W : OPC_LD_D, arg, arg1, arg2);
+    switch (type) {
+    case TCG_TYPE_I32:
+        if (dest < TCG_REG_V0) {
+            tcg_out_ldst(s, OPC_LD_W, dest, base, offset);
+        } else {
+            tcg_out_dupm_vec(s, TCG_TYPE_I128, MO_32, dest, base, offset);
+        }
+        break;
+    case TCG_TYPE_I64:
+        if (dest < TCG_REG_V0) {
+            tcg_out_ldst(s, OPC_LD_D, dest, base, offset);
+        } else {
+            tcg_out_dupm_vec(s, TCG_TYPE_I128, MO_64, dest, base, offset);
+        }
+        break;
+    case TCG_TYPE_V128:
+        if (-0x800 <= offset && offset <= 0x7ff) {
+            tcg_out_opc_vld(s, dest, base, offset);
+        } else {
+            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset);
+            tcg_out_opc_vldx(s, dest, base, TCG_REG_TMP0);
+        }
+        break;
+    default:
+        g_assert_not_reached();
+    }
 }
 
-static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
-                       TCGReg arg1, intptr_t arg2)
+static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
+                       TCGReg base, intptr_t offset)
 {
-    bool is_32bit = type == TCG_TYPE_I32;
-    tcg_out_ldst(s, is_32bit ? OPC_ST_W : OPC_ST_D, arg, arg1, arg2);
+    switch (type) {
+    case TCG_TYPE_I32:
+        if (src < TCG_REG_V0) {
+            tcg_out_ldst(s, OPC_ST_W, src, base, offset);
+        } else {
+            /* TODO: Could use fst_s, fstx_s */
+            if (offset < -0x100 || offset > 0xff || (offset & 3)) {
+                if (-0x800 <= offset && offset <= 0x7ff) {
+                    tcg_out_opc_addi_d(s, TCG_REG_TMP0, base, offset);
+                } else {
+                    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset);
+                    tcg_out_opc_add_d(s, TCG_REG_TMP0, TCG_REG_TMP0, base);
+                }
+                base = TCG_REG_TMP0;
+                offset = 0;
+            }
+            tcg_out_opc_vstelm_w(s, src, base, offset, 0);
+        }
+        break;
+    case TCG_TYPE_I64:
+        if (src < TCG_REG_V0) {
+            tcg_out_ldst(s, OPC_ST_D, src, base, offset);
+        } else {
+            /* TODO: Could use fst_d, fstx_d */
+            if (offset < -0x100 || offset > 0xff || (offset & 7)) {
+                if (-0x800 <= offset && offset <= 0x7ff) {
+                    tcg_out_opc_addi_d(s, TCG_REG_TMP0, base, offset);
+                } else {
+                    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset);
+                    tcg_out_opc_add_d(s, TCG_REG_TMP0, TCG_REG_TMP0, base);
+                }
+                base = TCG_REG_TMP0;
+                offset = 0;
+            }
+            tcg_out_opc_vstelm_d(s, src, base, offset, 0);
+        }
+        break;
+    case TCG_TYPE_V128:
+        if (-0x800 <= offset && offset <= 0x7ff) {
+            tcg_out_opc_vst(s, src, base, offset);
+        } else {
+            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset);
+            tcg_out_opc_vstx(s, src, base, TCG_REG_TMP0);
+        }
+        break;
+    default:
+        g_assert_not_reached();
+    }
 }
 
 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
@@ -1740,7 +1810,6 @@  static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 {
     TCGType type = vecl + TCG_TYPE_V64;
     TCGArg a0, a1, a2, a3;
-    TCGReg temp = TCG_REG_TMP0;
     TCGReg temp_vec = TCG_VEC_TMP0;
 
     static const LoongArchInsn cmp_vec_insn[16][4] = {
@@ -1820,22 +1889,10 @@  static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 
     switch (opc) {
     case INDEX_op_st_vec:
-        /* Try to fit vst imm */
-        if (-0x800 <= a2 && a2 <= 0x7ff) {
-            tcg_out_opc_vst(s, a0, a1, a2);
-        } else {
-            tcg_out_movi(s, TCG_TYPE_I64, temp, a2);
-            tcg_out_opc_vstx(s, a0, a1, temp);
-        }
+        tcg_out_st(s, type, a0, a1, a2);
         break;
     case INDEX_op_ld_vec:
-        /* Try to fit vld imm */
-        if (-0x800 <= a2 && a2 <= 0x7ff) {
-            tcg_out_opc_vld(s, a0, a1, a2);
-        } else {
-            tcg_out_movi(s, TCG_TYPE_I64, temp, a2);
-            tcg_out_opc_vldx(s, a0, a1, temp);
-        }
+        tcg_out_ld(s, type, a0, a1, a2);
         break;
     case INDEX_op_and_vec:
         tcg_out_opc_vand_v(s, a0, a1, a2);

tcg/loongarch64: Fill out tcg_out_{ld,st} for vector regs

Commit Message

Comments

Patch