diff mbox

[PR70920] transform (intptr_t) x eq/ne CST to x eq/ne (typeof x) cst

Message ID CAAgBjMng-muqz1Gi2ALk8EjDRBYdoz0L80dd+yjbC16TYUDTLA@mail.gmail.com
State New
Headers show

Commit Message

Prathamesh Kulkarni July 25, 2016, 4:46 p.m. UTC
On 25 July 2016 at 14:32, Richard Biener <rguenther@suse.de> wrote:
> On Mon, 25 Jul 2016, Prathamesh Kulkarni wrote:

>

>> Hi Richard,

>> The attached patch tries to fix PR70920.

>> It adds your pattern from comment 1 in the PR

>> (with additional gating on INTEGRAL_TYPE_P to avoid regressing finalize_18.f90)

>> and second pattern, which is reverse of the first transform.

>> I needed to update ssa-dom-branch-1.c because with patch applied,

>> jump threading removed the second if (i != 0B) block.

>> The dumps with and without patch for ssa-dom-branch-1.c start

>> to differ with forwprop1:

>>

>> before:

>>  <bb 3>:

>>   _1 = temp_16(D)->code;

>>   _2 = _1 == 42;

>>   _3 = (int) _2;

>>   _4 = (long int) _3;

>>   temp_17 = (struct rtx_def *) _4;

>>   if (temp_17 != 0B)

>>     goto <bb 4>;

>>   else

>>     goto <bb 8>;

>>

>> after:

>> <bb 3>:

>>   _1 = temp_16(D)->code;

>>   _2 = _1 == 42;

>>   _3 = (int) _2;

>>   _4 = (long int) _2;

>>   temp_17 = (struct rtx_def *) _4;

>>   if (_1 == 42)

>>     goto <bb 4>;

>>   else

>>     goto <bb 8>;

>>

>> I suppose the transform is correct for above test-case ?

>>

>> Then vrp dump shows:

>>  Threaded jump 5 --> 9 to 13

>>   Threaded jump 8 --> 9 to 13

>>   Threaded jump 3 --> 9 to 13

>>   Threaded jump 12 --> 9 to 14

>> Removing basic block 9

>> basic block 9, loop depth 0

>>  pred:

>> if (i1_10(D) != 0B)

>>   goto <bb 10>;

>> else

>>   goto <bb 11>;

>>  succ:       10

>>              11

>>

>> So there remained two instances of if (i1_10 (D) != 0B) in dom2 dump file,

>> and hence needed to update the test-case.

>>

>> Bootstrapped and tested on x86_64-unknown-linux-gnu.

>> OK to commit ?

>

> --- a/gcc/match.pd

> +++ b/gcc/match.pd

> @@ -3408,3 +3408,23 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)

>          { CONSTRUCTOR_ELT (ctor, idx / k)->value; })

>         (BIT_FIELD_REF { CONSTRUCTOR_ELT (ctor, idx / k)->value; }

>                        @1 { bitsize_int ((idx % k) * width); })))))))))

> +

> +/* PR70920: Transform (intptr_t)x eq/ne CST to x eq/ne (typeof x) CST.

> */

> +

> +(for cmp (ne eq)

> + (simplify

> +  (cmp (convert@2 @0) INTEGER_CST@1)

> +  (if (POINTER_TYPE_P (TREE_TYPE (@0))

> +       && INTEGRAL_TYPE_P (TREE_TYPE (@2)))

>

> you can use @1 here and omit @2.

>

> +   (cmp @0 (convert @1)))))

> +

> +/* Reverse of the above case:

> +   x has integral_type, CST is a pointer constant.

> +   Transform (typeof CST)x eq/ne CST to x eq/ne (typeof x) CST.  */

> +

> +(for cmp (ne eq)

> + (simplify

> +  (cmp (convert @0) @1)

> +  (if (POINTER_TYPE_P (TREE_TYPE (@1))

> +       && INTEGRAL_TYPE_P (TREE_TYPE (@0)))

> +    (cmp @0 (convert @1)))))

>

> The second pattern lacks the INTEGER_CST on @1 so it doesn't match

> its comment.  Please do not add vertical space between pattern

> comment and pattern.

>

> Please place patterns not at the end of match.pd but where similar

> transforms are done.  Like after

>

> /* Simplify pointer equality compares using PTA.  */

> (for neeq (ne eq)

>  (simplify

>   (neeq @0 @1)

>   (if (POINTER_TYPE_P (TREE_TYPE (@0))

>        && ptrs_compare_unequal (@0, @1))

>    { neeq == EQ_EXPR ? boolean_false_node : boolean_true_node; })))

>

> please also share the (for ...) for both patterns or merge them

> by changing the condition to

>

>   (if ((POINTER_TYPE_P (TREE_TYPE (@0))

>         && INTEGRAL_TYPE_P (TREE_TYPE (@1)))

>        || (INTEGRAL_TYPE_P (TREE_TYPE (@0))

>            && POINTER_TYPE_P (TREE_TYPE (@1))))

>

Hi,
Done suggested changes in this version.
pr70920-4.c (test-case in patch) is now folded during  ccp instead of
forwprop after merging the
two patterns.
Passes bootstrap+test on x86_64-unknown-linux-gnu.
OK for trunk ?

Thanks,
Prathamesh
> Richard.

>

>> PS: Writing changelog entries for match.pd is a bit tedious.

>> Should we add optional names for pattern so we can refer to them by names

>> in the ChangeLog for the more complicated ones ?

>> Or maybe just use comments:

>> (simplify /* name */ ... ) -;)

>

> That will add the fun of inventing names ;)

>

>> Thanks,

>> Prathamesh

>>

>

> --

> Richard Biener <rguenther@suse.de>

> SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)

Comments

Prathamesh Kulkarni July 26, 2016, 1:16 p.m. UTC | #1
On 26 July 2016 at 17:28, Richard Biener <rguenther@suse.de> wrote:
> On Mon, 25 Jul 2016, Prathamesh Kulkarni wrote:

>

>> On 25 July 2016 at 14:32, Richard Biener <rguenther@suse.de> wrote:

>> > On Mon, 25 Jul 2016, Prathamesh Kulkarni wrote:

>> >

>> >> Hi Richard,

>> >> The attached patch tries to fix PR70920.

>> >> It adds your pattern from comment 1 in the PR

>> >> (with additional gating on INTEGRAL_TYPE_P to avoid regressing finalize_18.f90)

>> >> and second pattern, which is reverse of the first transform.

>> >> I needed to update ssa-dom-branch-1.c because with patch applied,

>> >> jump threading removed the second if (i != 0B) block.

>> >> The dumps with and without patch for ssa-dom-branch-1.c start

>> >> to differ with forwprop1:

>> >>

>> >> before:

>> >>  <bb 3>:

>> >>   _1 = temp_16(D)->code;

>> >>   _2 = _1 == 42;

>> >>   _3 = (int) _2;

>> >>   _4 = (long int) _3;

>> >>   temp_17 = (struct rtx_def *) _4;

>> >>   if (temp_17 != 0B)

>> >>     goto <bb 4>;

>> >>   else

>> >>     goto <bb 8>;

>> >>

>> >> after:

>> >> <bb 3>:

>> >>   _1 = temp_16(D)->code;

>> >>   _2 = _1 == 42;

>> >>   _3 = (int) _2;

>> >>   _4 = (long int) _2;

>> >>   temp_17 = (struct rtx_def *) _4;

>> >>   if (_1 == 42)

>> >>     goto <bb 4>;

>> >>   else

>> >>     goto <bb 8>;

>> >>

>> >> I suppose the transform is correct for above test-case ?

>> >>

>> >> Then vrp dump shows:

>> >>  Threaded jump 5 --> 9 to 13

>> >>   Threaded jump 8 --> 9 to 13

>> >>   Threaded jump 3 --> 9 to 13

>> >>   Threaded jump 12 --> 9 to 14

>> >> Removing basic block 9

>> >> basic block 9, loop depth 0

>> >>  pred:

>> >> if (i1_10(D) != 0B)

>> >>   goto <bb 10>;

>> >> else

>> >>   goto <bb 11>;

>> >>  succ:       10

>> >>              11

>> >>

>> >> So there remained two instances of if (i1_10 (D) != 0B) in dom2 dump file,

>> >> and hence needed to update the test-case.

>> >>

>> >> Bootstrapped and tested on x86_64-unknown-linux-gnu.

>> >> OK to commit ?

>> >

>> > --- a/gcc/match.pd

>> > +++ b/gcc/match.pd

>> > @@ -3408,3 +3408,23 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)

>> >          { CONSTRUCTOR_ELT (ctor, idx / k)->value; })

>> >         (BIT_FIELD_REF { CONSTRUCTOR_ELT (ctor, idx / k)->value; }

>> >                        @1 { bitsize_int ((idx % k) * width); })))))))))

>> > +

>> > +/* PR70920: Transform (intptr_t)x eq/ne CST to x eq/ne (typeof x) CST.

>> > */

>> > +

>> > +(for cmp (ne eq)

>> > + (simplify

>> > +  (cmp (convert@2 @0) INTEGER_CST@1)

>> > +  (if (POINTER_TYPE_P (TREE_TYPE (@0))

>> > +       && INTEGRAL_TYPE_P (TREE_TYPE (@2)))

>> >

>> > you can use @1 here and omit @2.

>> >

>> > +   (cmp @0 (convert @1)))))

>> > +

>> > +/* Reverse of the above case:

>> > +   x has integral_type, CST is a pointer constant.

>> > +   Transform (typeof CST)x eq/ne CST to x eq/ne (typeof x) CST.  */

>> > +

>> > +(for cmp (ne eq)

>> > + (simplify

>> > +  (cmp (convert @0) @1)

>> > +  (if (POINTER_TYPE_P (TREE_TYPE (@1))

>> > +       && INTEGRAL_TYPE_P (TREE_TYPE (@0)))

>> > +    (cmp @0 (convert @1)))))

>> >

>> > The second pattern lacks the INTEGER_CST on @1 so it doesn't match

>> > its comment.  Please do not add vertical space between pattern

>> > comment and pattern.

>> >

>> > Please place patterns not at the end of match.pd but where similar

>> > transforms are done.  Like after

>> >

>> > /* Simplify pointer equality compares using PTA.  */

>> > (for neeq (ne eq)

>> >  (simplify

>> >   (neeq @0 @1)

>> >   (if (POINTER_TYPE_P (TREE_TYPE (@0))

>> >        && ptrs_compare_unequal (@0, @1))

>> >    { neeq == EQ_EXPR ? boolean_false_node : boolean_true_node; })))

>> >

>> > please also share the (for ...) for both patterns or merge them

>> > by changing the condition to

>> >

>> >   (if ((POINTER_TYPE_P (TREE_TYPE (@0))

>> >         && INTEGRAL_TYPE_P (TREE_TYPE (@1)))

>> >        || (INTEGRAL_TYPE_P (TREE_TYPE (@0))

>> >            && POINTER_TYPE_P (TREE_TYPE (@1))))

>> >

>> Hi,

>> Done suggested changes in this version.

>> pr70920-4.c (test-case in patch) is now folded during  ccp instead of

>> forwprop after merging the

>> two patterns.

>> Passes bootstrap+test on x86_64-unknown-linux-gnu.

>> OK for trunk ?

>

> (please paste in ChangeLog entries rather than attaching them).

Will do henceforth.
>

> In gcc.dg/tree-ssa/ssa-dom-branch-1.c you need to adjust the comment

> before the dump-scan you adjust.

>

> Ok with that change.

Thanks, committed as r238754 after adjusting the comment in ssa-dom-branch-1.c.

Thanks,
Prathamesh
>

> Thanks,

> Richard.
Prathamesh Kulkarni July 28, 2016, 12:51 p.m. UTC | #2
On 28 July 2016 at 15:58, Andreas Schwab <schwab@suse.de> wrote:
> On Mo, Jul 25 2016, Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org> wrote:

>

>> diff --git a/gcc/testsuite/gcc.dg/pr70920-4.c b/gcc/testsuite/gcc.dg/pr70920-4.c

>> new file mode 100644

>> index 0000000..dedb895

>> --- /dev/null

>> +++ b/gcc/testsuite/gcc.dg/pr70920-4.c

>> @@ -0,0 +1,21 @@

>> +/* { dg-do compile } */

>> +/* { dg-options "-O2 -fdump-tree-ccp-details -Wno-int-to-pointer-cast" } */

>> +

>> +#include <stdint.h>

>> +

>> +void f1();

>> +void f2();

>> +

>> +void

>> +foo (int a)

>> +{

>> +  void *cst = 0;

>> +  if ((int *) a == cst)

>> +    {

>> +      f1 ();

>> +      if (a)

>> +     f2 ();

>> +    }

>> +}

>> +

>> +/* { dg-final { scan-tree-dump "gimple_simplified to if \\(_\[0-9\]* == 0\\)" "ccp1" } } */

>

> This fails on all ilp32 platforms.

Oops, sorry for the breakage.
With -m32, the pattern is applied during forwprop1 rather than ccp1.
I wonder though why ccp1 fails to fold the pattern with -m32 ?
Looking at the dumps:

without -m32:
input to ccp1 pass:
  <bb 2>:
  cst_4 = 0B;
  _1 = (long int) a_5(D);
  _2 = (void *) _1;
  if (cst_4 == _2)
    goto <bb 3>;
  else
    goto <bb 5>;

cc1 pass dump shows:
Substituting values and folding statements

Folding statement: _1 = (long int) a_5(D);
Not folded
Folding statement: _2 = (void *) _1;
Not folded
Folding statement: if (cst_4 == _2)
which is likely CONSTANT
Applying pattern match.pd:2537, gimple-match.c:6530
gimple_simplified to if (_1 == 0)
Folded into: if (_1 == 0)

with -m32:
input to ccp1 pass:
 <bb 2>:
  cst_3 = 0B;
  a.0_1 = (void *) a_4(D);
  if (cst_3 == a.0_1)
    goto <bb 3>;
  else
    goto <bb 5>;

ccp1 pass dump shows:
Substituting values and folding statements

Folding statement: a.0_1 = (void *) a_4(D);
Not folded
Folding statement: if (cst_3 == a.0_1)
which is likely CONSTANT
Folded into: if (a.0_1 == 0B)

I am not able to understand why it doesn't fold it to
if (a_4(D) == 0) ?
forwprop1 folds a.0_1 == 0B to a_4(D) == 0.

I suppose the test-case would need to scan ccp1 for non-ilp targets
and forwprop1 for
ilp targets. How do update the test-case to reflect this ?

Thanks,
Prathamesh
>

> Andreas.

>

> --

> Andreas Schwab, SUSE Labs, schwab@suse.de

> GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE  1748 E4D4 88E3 0EEA B9D7

> "And now for something completely different."
diff mbox

Patch

diff --git a/gcc/match.pd b/gcc/match.pd
index 21bf617..6c2ec82 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -2513,6 +2513,15 @@  DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
        && ptrs_compare_unequal (@0, @1))
    { neeq == EQ_EXPR ? boolean_false_node : boolean_true_node; })))
 
+/* PR70920: Transform (intptr_t)x eq/ne CST to x eq/ne (typeof x) CST.
+   and (typeof ptr_cst) x eq/ne ptr_cst to x eq/ne (typeof x) CST */
+(for cmp (ne eq)
+ (simplify
+  (cmp (convert @0) INTEGER_CST@1)
+  (if ((POINTER_TYPE_P (TREE_TYPE (@0)) && INTEGRAL_TYPE_P (TREE_TYPE (@1)))
+        || (INTEGRAL_TYPE_P (TREE_TYPE (@0)) && POINTER_TYPE_P (TREE_TYPE (@1))))
+   (cmp @0 (convert @1)))))
+
 /* Non-equality compare simplifications from fold_binary  */
 (for cmp (lt gt le ge)
  /* Comparisons with the highest or lowest possible integer of
diff --git a/gcc/testsuite/gcc.dg/pr70920-1.c b/gcc/testsuite/gcc.dg/pr70920-1.c
new file mode 100644
index 0000000..9b7e2d0
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr70920-1.c
@@ -0,0 +1,20 @@ 
+/* { dg-do compile } */
+/* { dg-options "-fdump-tree-gimple" } */
+
+#include <stdint.h>
+
+void f1();
+void f2();
+
+void
+foo (int *a)
+{
+  if ((intptr_t) a == 0)
+    {
+      f1 ();
+      if (a)
+	f2 (); 
+    }
+}
+
+/* { dg-final { scan-tree-dump "if \\(a == 0B\\)" "gimple" } } */
diff --git a/gcc/testsuite/gcc.dg/pr70920-2.c b/gcc/testsuite/gcc.dg/pr70920-2.c
new file mode 100644
index 0000000..2db9897
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr70920-2.c
@@ -0,0 +1,21 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-forwprop-details" } */
+
+#include <stdint.h>
+
+void f1();
+void f2();
+
+void
+foo (int *a)
+{
+  int cst = 0;
+  if ((intptr_t) a == cst)
+    {
+      f1 ();
+      if (a) 
+	f2 (); 
+    }
+}
+
+/* { dg-final { scan-tree-dump "gimple_simplified to if \\(a_\[0-9\]*\\(D\\) == 0B\\)" "forwprop1" } } */
diff --git a/gcc/testsuite/gcc.dg/pr70920-3.c b/gcc/testsuite/gcc.dg/pr70920-3.c
new file mode 100644
index 0000000..8b24cbc
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr70920-3.c
@@ -0,0 +1,20 @@ 
+/* { dg-do compile } */
+/* { dg-options "-fdump-tree-gimple -Wno-int-to-pointer-cast" } */
+
+#include <stdint.h>
+
+void f1();
+void f2();
+
+void
+foo (int a)
+{
+  if ((int *) a == 0)
+    {
+      f1 ();
+      if (a)
+	f2 (); 
+    }
+}
+
+/* { dg-final { scan-tree-dump "if \\(a == 0\\)" "gimple" } } */
diff --git a/gcc/testsuite/gcc.dg/pr70920-4.c b/gcc/testsuite/gcc.dg/pr70920-4.c
new file mode 100644
index 0000000..dedb895
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr70920-4.c
@@ -0,0 +1,21 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-ccp-details -Wno-int-to-pointer-cast" } */
+
+#include <stdint.h>
+
+void f1();
+void f2();
+
+void
+foo (int a)
+{
+  void *cst = 0; 
+  if ((int *) a == cst)
+    {
+      f1 ();
+      if (a) 
+	f2 (); 
+    }
+}
+
+/* { dg-final { scan-tree-dump "gimple_simplified to if \\(_\[0-9\]* == 0\\)" "ccp1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-branch-1.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-branch-1.c
index 18f9041..d38e3a8 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-branch-1.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-branch-1.c
@@ -21,7 +21,7 @@  try_combine (rtx i1, rtx newpat)
 
 /* There should be three tests against i1.  Two from the hash table
    dumps, one in the code itself.  */
-/* { dg-final { scan-tree-dump-times "if .i1_" 3 "dom2"} } */
+/* { dg-final { scan-tree-dump-times "if .i1_" 2 "dom2"} } */
 
 /* There should be no actual jump threads realized by DOM.  The
    legitimize jump threads are handled in VRP and those discovered