diff mbox

[vectorizer] Fix PR tree-optimization/48765

Message ID OF93A25F4C.68B007ED-ONC2257880.004195B4-C2257880.0045C3D6@il.ibm.com
State Accepted
Headers show

Commit Message

Ira Rosen April 28, 2011, 12:42 p.m. UTC
Hi,

Sometimes loop vectorization factor changes during the analysis, while
statement analysis depends on it. This patch moves the update of the
vectorization before statements, avoiding current difference between the
analysis and the transformations phases that caused the problem described
in the PR.

Bootstrapped and now testing on powerpc64-suse-linux.
I'll commit the patch once the testing completes.

Ira

ChangeLog:

	PR tree-optimization/48765
	* tree-vect-loop.c (vect_analyze_loop_operations): Scan the
statements
	and update the vectorization factor according to the type of
	vectorization before statement analysis.
	(vectorizable_reduction): Set number of copies to 1 in case of pure
SLP
	statement.
	* tree-vect-stmts.c (vectorizable_conversion,
vectorizable_assignment,
	vectorizable_shift, vectorizable_operation,
vectorizable_type_demotion,
	vectorizable_type_promotion, vectorizable_store, vectorizable_load):
	Likewise.
	(vectorizable_condition): Move the check that it is not SLP
	vectorization before the number of copies check.

testsuite/ChangeLog:

	PR tree-optimization/48765
	* gcc.dg/vect/pr48765.c: New.


+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff mbox

Patch

Index: tree-vect-loop.c
===================================================================
--- tree-vect-loop.c    (revision 173018)
+++ tree-vect-loop.c    (working copy)
@@ -1167,7 +1167,38 @@  vect_analyze_loop_operations (loop_vec_info loop_v

   gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
   vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+  /* If all the stmts in the loop can be SLPed, we perform only SLP, and
+     vectorization factor of the loop is the unrolling factor required by
the
+     SLP instances.  If that unrolling factor is 1, we say, that we
perform
+     pure SLP on loop - cross iteration parallelism is not exploited.  */
+  for (i = 0; i < nbbs; i++)
+    {
+      basic_block bb = bbs[i];
+      for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
+        {
+          gimple stmt = gsi_stmt (si);
+          stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+          gcc_assert (stmt_info);
+          if ((STMT_VINFO_RELEVANT_P (stmt_info)
+               || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE
(stmt_info)))
+              && !PURE_SLP_STMT (stmt_info))
+            /* STMT needs both SLP and loop-based vectorization.  */
+            only_slp_in_loop = false;
+        }
+    }

+  if (only_slp_in_loop)
+    vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
+  else
+    vectorization_factor = least_common_multiple (vectorization_factor,
+                                LOOP_VINFO_SLP_UNROLLING_FACTOR
(loop_vinfo));
+
+  LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
+
+  if (vect_print_dump_info (REPORT_DETAILS))
+    fprintf (vect_dump, "Updating vectorization factor to %d ",
+             vectorization_factor);
+
   for (i = 0; i < nbbs; i++)
     {
       basic_block bb = bbs[i];
@@ -1272,18 +1303,8 @@  vect_analyze_loop_operations (loop_vec_info loop_v
       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
         {
           gimple stmt = gsi_stmt (si);
-          stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
-
-          gcc_assert (stmt_info);
-
          if (!vect_analyze_stmt (stmt, &need_to_vectorize, NULL))
            return false;
-
-          if ((STMT_VINFO_RELEVANT_P (stmt_info)
-               || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE
(stmt_info)))
-              && !PURE_SLP_STMT (stmt_info))
-            /* STMT needs both SLP and loop-based vectorization.  */
-            only_slp_in_loop = false;
         }
     } /* bbs */

@@ -1303,18 +1324,6 @@  vect_analyze_loop_operations (loop_vec_info loop_v
       return false;
     }

-  /* If all the stmts in the loop can be SLPed, we perform only SLP, and
-     vectorization factor of the loop is the unrolling factor required by
the
-     SLP instances.  If that unrolling factor is 1, we say, that we
perform
-     pure SLP on loop - cross iteration parallelism is not exploited.  */
-  if (only_slp_in_loop)
-    vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
-  else
-    vectorization_factor = least_common_multiple (vectorization_factor,
-                                LOOP_VINFO_SLP_UNROLLING_FACTOR
(loop_vinfo));
-
-  LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
-
   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
       && vect_print_dump_info (REPORT_DETAILS))
     fprintf (vect_dump,
@@ -4136,7 +4145,7 @@  vectorizable_reduction (gimple stmt, gimple_stmt_i
   if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt)))
     return false;

-  if (slp_node)
+  if (slp_node || PURE_SLP_STMT (stmt_info))
     ncopies = 1;
   else
     ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
Index: tree-vect-stmts.c
===================================================================
--- tree-vect-stmts.c   (revision 173018)
+++ tree-vect-stmts.c   (working copy)
@@ -1747,7 +1747,7 @@  vectorizable_conversion (gimple stmt, gimple_stmt_
   /* Multiple types in SLP are handled by creating the appropriate number
of
      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
      case of SLP.  */
-  if (slp_node)
+  if (slp_node || PURE_SLP_STMT (stmt_info))
     ncopies = 1;

   /* Sanity check: make sure that at least one copy of the vectorized stmt
@@ -1940,7 +1940,7 @@  vectorizable_assignment (gimple stmt, gimple_stmt_
   /* Multiple types in SLP are handled by creating the appropriate number
of
      vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
      case of SLP.  */
-  if (slp_node)
+  if (slp_node || PURE_SLP_STMT (stmt_info))
     ncopies = 1;
   else
     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
@@ -2149,7 +2149,7 @@  vectorizable_shift (gimple stmt, gimple_stmt_itera
   /* Multiple types in SLP are handled by creating the appropriate number
of
      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
      case of SLP.  */
-  if (slp_node)
+  if (slp_node || PURE_SLP_STMT (stmt_info))
     ncopies = 1;
   else
     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
@@ -2497,7 +2497,7 @@  vectorizable_operation (gimple stmt, gimple_stmt_i
   /* Multiple types in SLP are handled by creating the appropriate number
of
      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
      case of SLP.  */
-  if (slp_node)
+  if (slp_node || PURE_SLP_STMT (stmt_info))
     ncopies = 1;
   else
     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
@@ -2895,7 +2895,7 @@  vectorizable_type_demotion (gimple stmt, gimple_st
   /* Multiple types in SLP are handled by creating the appropriate number
of
      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
      case of SLP.  */
-  if (slp_node)
+  if (slp_node || PURE_SLP_STMT (stmt_info))
     ncopies = 1;
   else
     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
@@ -3175,7 +3175,7 @@  vectorizable_type_promotion (gimple stmt, gimple_s
   /* Multiple types in SLP are handled by creating the appropriate number
of
      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
      case of SLP.  */
-  if (slp_node)
+  if (slp_node || PURE_SLP_STMT (stmt_info))
     ncopies = 1;
   else
     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
@@ -3358,7 +3358,7 @@  vectorizable_store (gimple stmt, gimple_stmt_itera
   /* Multiple types in SLP are handled by creating the appropriate number
of
      vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
      case of SLP.  */
-  if (slp)
+  if (slp || PURE_SLP_STMT (stmt_info))
     ncopies = 1;
   else
     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
@@ -3851,7 +3851,7 @@  vectorizable_load (gimple stmt, gimple_stmt_iterat
   /* Multiple types in SLP are handled by creating the appropriate number
of
      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
      case of SLP.  */
-  if (slp)
+  if (slp || PURE_SLP_STMT (stmt_info))
     ncopies = 1;
   else
     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
@@ -4457,6 +4457,10 @@  vectorizable_condition (gimple stmt, gimple_stmt_i
   /* FORNOW: unsupported in basic block SLP.  */
   gcc_assert (loop_vinfo);
+  /* FORNOW: SLP not supported.  */
+  if (STMT_SLP_TYPE (stmt_info))
+    return false;
+
   gcc_assert (ncopies >= 1);
   if (reduc_index && ncopies > 1)
     return false; /* FORNOW */
@@ -4469,10 +4473,6 @@  vectorizable_condition (gimple stmt, gimple_stmt_i
            && reduc_def))
     return false;

-  /* FORNOW: SLP not supported.  */
-  if (STMT_SLP_TYPE (stmt_info))
-    return false;
-
   /* FORNOW: not yet supported.  */
   if (STMT_VINFO_LIVE_P (stmt_info))
     {
Index: testsuite/gcc.dg/vect/pr48765.c
===================================================================
--- testsuite/gcc.dg/vect/pr48765.c     (revision 0)
+++ testsuite/gcc.dg/vect/pr48765.c     (revision 0)
@@ -0,0 +1,82 @@ 
+/* { dg-do compile { target powerpc*-*-* } } */
+/* { dg-options "-m64 -O3 -mcpu=power6" } */
+
+enum reg_class
+{
+  NO_REGS, AP_REG, XRF_REGS, GENERAL_REGS, AGRF_REGS, XGRF_REGS, ALL_REGS,
+    LIM_REG_CLASSES
+};
+enum machine_mode
+{
+  VOIDmode, QImode, HImode, PSImode, SImode, PDImode, DImode, TImode,
OImode,
+    QFmode, HFmode, TQFmode, SFmode, DFmode, XFmode, TFmode, SCmode,
DCmode,
+    XCmode, TCmode, CQImode, CHImode, CSImode, CDImode, CTImode, COImode,
+    BLKmode, CCmode, CCEVENmode, MAX_MACHINE_MODE
+};
+typedef struct rtx_def
+{
+  int mode:8;
+}
+ *rtx;
+extern rtx *regno_reg_rtx;
+typedef unsigned int HARD_REG_ELT_TYPE;
+typedef HARD_REG_ELT_TYPE HARD_REG_SET[((64 + 32 - 1) / 32)];
+extern int reg_alloc_order[64];
+extern int max_regno;
+extern int *reg_n_calls_crossed;
+extern short *reg_renumber;
+static int *reg_where_dead;
+static int *reg_where_born;
+static int *reg_order;
+static char *regs_change_size;
+static HARD_REG_SET *after_insn_hard_regs;
+static int stupid_find_reg (int, enum reg_class, enum machine_mode, int,
int,
+                           int);
+void
+stupid_life_analysis (f, nregs, file)
+     rtx f;
+{
+  register int i;
+  for (i = (((64)) + 3) + 1; i < max_regno; i++)
+    {
+      register int r = reg_order[i];
+      if ((int) LIM_REG_CLASSES > 1)
+       reg_renumber[r] =
+         stupid_find_reg (reg_n_calls_crossed[r], reg_preferred_class (r),
+                          ((regno_reg_rtx[r])->mode), reg_where_born[r],
+                          reg_where_dead[r], regs_change_size[r]);
+    }
+}
+
+static int
+stupid_find_reg (call_preserved, class, mode, born_insn, dead_insn,
+                changes_size)
+     int call_preserved;
+     enum reg_class class;
+     enum machine_mode mode;
+{
+  register int i, ins;
+  HARD_REG_SET used, this_reg;
+  for (ins = born_insn; ins < dead_insn; ins++)
+    do
+      {
+       register HARD_REG_ELT_TYPE *scan_tp_ = (used), *scan_fp_ =
+         (after_insn_hard_regs[ins]);
+       for (i = 0; i < ((64 + 32 - 1) / 32); i++)
+         *scan_tp_++ |= *scan_fp_++;
+      }
+    while (0);
+  for (i = 0; i < 64; i++)
+    {
+      int regno = reg_alloc_order[i];
+      if (((used)[(regno) / ((unsigned) 32)] &
+          (((HARD_REG_ELT_TYPE) (1)) << ((regno) % ((unsigned) 32)))))
+       {
+         register int j;
+         if (j == regno)
+           return regno;
+       }
+    }