diff mbox series

[2/3] target/hppa: Optimize UADDCM with no condition

Message ID 20240325030448.52110-3-richard.henderson@linaro.org
State Superseded
Headers show
Series target/hppa: Fix DCOR, UADDCM conditions | expand

Commit Message

Richard Henderson March 25, 2024, 3:04 a.m. UTC
With r1 as zero is by far the only usage of UADDCM, as the easiest
way to invert a register.  The compiler does occasionally use the
addition step as well, and we can simplify that to avoid a temp
and write directly into the destination.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/hppa/translate.c | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

Comments

Helge Deller March 25, 2024, 9:48 a.m. UTC | #1
On 3/25/24 04:04, Richard Henderson wrote:
> With r1 as zero is by far the only usage of UADDCM, as the easiest
> way to invert a register.  The compiler does occasionally use the
> addition step as well, and we can simplify that to avoid a temp
> and write directly into the destination.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

Reviewed-by: Helge Deller <deller@gmx.de>
Tested-by: Helge Deller <deller@gmx.de>

Helge


> ---
>   target/hppa/translate.c | 24 ++++++++++++++++++++++--
>   1 file changed, 22 insertions(+), 2 deletions(-)
>
> diff --git a/target/hppa/translate.c b/target/hppa/translate.c
> index a3f425d861..3fc3e7754c 100644
> --- a/target/hppa/translate.c
> +++ b/target/hppa/translate.c
> @@ -2763,9 +2763,29 @@ static bool do_uaddcm(DisasContext *ctx, arg_rrr_cf_d *a, bool is_tc)
>   {
>       TCGv_i64 tcg_r1, tcg_r2, tmp;
>
> -    if (a->cf) {
> -        nullify_over(ctx);
> +    if (a->cf == 0) {
> +        tcg_r2 = load_gpr(ctx, a->r2);
> +        tmp = dest_gpr(ctx, a->t);
> +
> +        if (a->r1 == 0) {
> +            /* UADDCM r0,src,dst is the common idiom for dst = ~src. */
> +            tcg_gen_not_i64(tmp, tcg_r2);
> +        } else {
> +            /*
> +             * Recall that r1 - r2 == r1 + ~r2 + 1.
> +             * Thus r1 + ~r2 == r1 - r2 - 1,
> +             * which does not require an extra temporary.
> +             */
> +            tcg_r1 = load_gpr(ctx, a->r1);
> +            tcg_gen_sub_i64(tmp, tcg_r1, tcg_r2);
> +            tcg_gen_subi_i64(tmp, tmp, 1);
> +        }
> +        save_gpr(ctx, a->t, tmp);
> +        cond_free(&ctx->null_cond);
> +        return true;
>       }
> +
> +    nullify_over(ctx);
>       tcg_r1 = load_gpr(ctx, a->r1);
>       tcg_r2 = load_gpr(ctx, a->r2);
>       tmp = tcg_temp_new_i64();
diff mbox series

Patch

diff --git a/target/hppa/translate.c b/target/hppa/translate.c
index a3f425d861..3fc3e7754c 100644
--- a/target/hppa/translate.c
+++ b/target/hppa/translate.c
@@ -2763,9 +2763,29 @@  static bool do_uaddcm(DisasContext *ctx, arg_rrr_cf_d *a, bool is_tc)
 {
     TCGv_i64 tcg_r1, tcg_r2, tmp;
 
-    if (a->cf) {
-        nullify_over(ctx);
+    if (a->cf == 0) {
+        tcg_r2 = load_gpr(ctx, a->r2);
+        tmp = dest_gpr(ctx, a->t);
+
+        if (a->r1 == 0) {
+            /* UADDCM r0,src,dst is the common idiom for dst = ~src. */
+            tcg_gen_not_i64(tmp, tcg_r2);
+        } else {
+            /*
+             * Recall that r1 - r2 == r1 + ~r2 + 1.
+             * Thus r1 + ~r2 == r1 - r2 - 1,
+             * which does not require an extra temporary.
+             */
+            tcg_r1 = load_gpr(ctx, a->r1);
+            tcg_gen_sub_i64(tmp, tcg_r1, tcg_r2);
+            tcg_gen_subi_i64(tmp, tmp, 1);
+        }
+        save_gpr(ctx, a->t, tmp);
+        cond_free(&ctx->null_cond);
+        return true;
     }
+
+    nullify_over(ctx);
     tcg_r1 = load_gpr(ctx, a->r1);
     tcg_r2 = load_gpr(ctx, a->r2);
     tmp = tcg_temp_new_i64();