diff mbox series

[2/7] tcg/ppc: Use PADDI in tcg_out_movi

Message ID 20230804213355.294443-3-richard.henderson@linaro.org
State Superseded
Headers show
Series tcg/ppc: Support power10 prefixed instructions | expand

Commit Message

Richard Henderson Aug. 4, 2023, 9:33 p.m. UTC
PADDI can load 34-bit immediates and 34-bit pc-relative addresses.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/ppc/tcg-target.c.inc | 47 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

Comments

Jordan Niethe Aug. 7, 2023, 3:53 a.m. UTC | #1
On Sat, Aug 5, 2023 at 7:33 AM Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> PADDI can load 34-bit immediates and 34-bit pc-relative addresses.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  tcg/ppc/tcg-target.c.inc | 47 ++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 47 insertions(+)
>
> diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
> index 642d0fd128..7fa2a2500b 100644
> --- a/tcg/ppc/tcg-target.c.inc
> +++ b/tcg/ppc/tcg-target.c.inc
> @@ -707,6 +707,33 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
>      return true;
>  }
>
> +/* Ensure that the prefixed instruction does not cross a 64-byte boundary. */
> +static bool tcg_out_need_prefix_align(TCGContext *s)
> +{
> +    return ((uintptr_t)s->code_ptr & 0x3f) == 0x3c;
> +}
> +
> +static void tcg_out_prefix_align(TCGContext *s)
> +{
> +    if (tcg_out_need_prefix_align(s)) {
> +        tcg_out32(s, NOP);
> +    }
> +}
> +
> +/* Output Type 10 Prefix - Modified Load/Store Form (MLS:D) */
> +static void tcg_out_mls_d(TCGContext *s, tcg_insn_unit opc, unsigned rt,
> +                          unsigned ra, tcg_target_long imm, bool r)
> +{
> +    tcg_insn_unit p, i;
> +
> +    p = OPCD(1) | (2 << 24) | (r << 20) | ((imm >> 16) & 0x3ffff);
> +    i = opc | TAI(rt, ra, imm);
> +
> +    tcg_out_prefix_align(s);
> +    tcg_out32(s, p);
> +    tcg_out32(s, i);
> +}
> +
>  static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
>                               TCGReg base, tcg_target_long offset);
>
> @@ -992,6 +1019,26 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
>          return;
>      }
>
> +    /*
> +     * Load values up to 34 bits, and pc-relative addresses,
> +     * with one prefixed insn.
> +     */
> +    if (have_isa_3_10) {
> +        if (arg == sextract64(arg, 0, 34)) {
> +            /* pli ret,value = paddi ret,0,value,0 */
> +            tcg_out_mls_d(s, ADDI, ret, 0, arg, 0);
> +            return;
> +        }
> +
> +        tmp = tcg_out_need_prefix_align(s) * 4;

tcg_out_need_prefix_align() returns a bool, optionally might prefer

tmp = tcg_out_need_prefix_align(s) ? 4 : 0;


> +        tmp = tcg_pcrel_diff(s, (void *)arg) - tmp;
> +        if (tmp == sextract64(tmp, 0, 34)) {
> +            /* pla ret,value = paddi ret,0,value,1 */
> +            tcg_out_mls_d(s, ADDI, ret, 0, tmp, 1);
> +            return;
> +        }
> +    }
> +
>      /* Load 32-bit immediates with two insns.  Note that we've already
>         eliminated bare ADDIS, so we know both insns are required.  */
>      if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) {
> --
> 2.34.1
>

Reviewed-by: Jordan Niethe <jniethe5@gmail.com>
Richard Henderson Aug. 7, 2023, 9:26 p.m. UTC | #2
On 8/6/23 20:53, Jordan Niethe wrote:
>> +        tmp = tcg_out_need_prefix_align(s) * 4;
> 
> tcg_out_need_prefix_align() returns a bool, optionally might prefer
> 
> tmp = tcg_out_need_prefix_align(s) ? 4 : 0;

I suppose.  C type promotion rules make the multiplication just the same though.

That said, I've merged back Nick's tcg_pcrel_diff_for_prefix function using ?:.


r~
diff mbox series

Patch

diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index 642d0fd128..7fa2a2500b 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -707,6 +707,33 @@  static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
     return true;
 }
 
+/* Ensure that the prefixed instruction does not cross a 64-byte boundary. */
+static bool tcg_out_need_prefix_align(TCGContext *s)
+{
+    return ((uintptr_t)s->code_ptr & 0x3f) == 0x3c;
+}
+
+static void tcg_out_prefix_align(TCGContext *s)
+{
+    if (tcg_out_need_prefix_align(s)) {
+        tcg_out32(s, NOP);
+    }
+}
+
+/* Output Type 10 Prefix - Modified Load/Store Form (MLS:D) */
+static void tcg_out_mls_d(TCGContext *s, tcg_insn_unit opc, unsigned rt,
+                          unsigned ra, tcg_target_long imm, bool r)
+{
+    tcg_insn_unit p, i;
+
+    p = OPCD(1) | (2 << 24) | (r << 20) | ((imm >> 16) & 0x3ffff);
+    i = opc | TAI(rt, ra, imm);
+
+    tcg_out_prefix_align(s);
+    tcg_out32(s, p);
+    tcg_out32(s, i);
+}
+
 static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
                              TCGReg base, tcg_target_long offset);
 
@@ -992,6 +1019,26 @@  static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
         return;
     }
 
+    /*
+     * Load values up to 34 bits, and pc-relative addresses,
+     * with one prefixed insn.
+     */
+    if (have_isa_3_10) {
+        if (arg == sextract64(arg, 0, 34)) {
+            /* pli ret,value = paddi ret,0,value,0 */
+            tcg_out_mls_d(s, ADDI, ret, 0, arg, 0);
+            return;
+        }
+
+        tmp = tcg_out_need_prefix_align(s) * 4;
+        tmp = tcg_pcrel_diff(s, (void *)arg) - tmp;
+        if (tmp == sextract64(tmp, 0, 34)) {
+            /* pla ret,value = paddi ret,0,value,1 */
+            tcg_out_mls_d(s, ADDI, ret, 0, tmp, 1);
+            return;
+        }
+    }
+
     /* Load 32-bit immediates with two insns.  Note that we've already
        eliminated bare ADDIS, so we know both insns are required.  */
     if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) {