===================================================================
@@ -1167,7 +1167,38 @@ vect_analyze_loop_operations (loop_vec_info loop_v
gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ /* If all the stmts in the loop can be SLPed, we perform only SLP, and
+ vectorization factor of the loop is the unrolling factor required by
the
+ SLP instances. If that unrolling factor is 1, we say, that we
perform
+ pure SLP on loop - cross iteration parallelism is not exploited. */
+ for (i = 0; i < nbbs; i++)
+ {
+ basic_block bb = bbs[i];
+ for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
+ {
+ gimple stmt = gsi_stmt (si);
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ gcc_assert (stmt_info);
+ if ((STMT_VINFO_RELEVANT_P (stmt_info)
+ || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE
(stmt_info)))
+ && !PURE_SLP_STMT (stmt_info))
+ /* STMT needs both SLP and loop-based vectorization. */
+ only_slp_in_loop = false;
+ }
+ }
+ if (only_slp_in_loop)
+ vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
+ else
+ vectorization_factor = least_common_multiple (vectorization_factor,
+ LOOP_VINFO_SLP_UNROLLING_FACTOR
(loop_vinfo));
+
+ LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "Updating vectorization factor to %d ",
+ vectorization_factor);
+
for (i = 0; i < nbbs; i++)
{
basic_block bb = bbs[i];
@@ -1272,18 +1303,8 @@ vect_analyze_loop_operations (loop_vec_info loop_v
for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
{
gimple stmt = gsi_stmt (si);
- stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
-
- gcc_assert (stmt_info);
-
if (!vect_analyze_stmt (stmt, &need_to_vectorize, NULL))
return false;
-
- if ((STMT_VINFO_RELEVANT_P (stmt_info)
- || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE
(stmt_info)))
- && !PURE_SLP_STMT (stmt_info))
- /* STMT needs both SLP and loop-based vectorization. */
- only_slp_in_loop = false;
}
} /* bbs */
@@ -1303,18 +1324,6 @@ vect_analyze_loop_operations (loop_vec_info loop_v
return false;
}
- /* If all the stmts in the loop can be SLPed, we perform only SLP, and
- vectorization factor of the loop is the unrolling factor required by
the
- SLP instances. If that unrolling factor is 1, we say, that we
perform
- pure SLP on loop - cross iteration parallelism is not exploited. */
- if (only_slp_in_loop)
- vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
- else
- vectorization_factor = least_common_multiple (vectorization_factor,
- LOOP_VINFO_SLP_UNROLLING_FACTOR
(loop_vinfo));
-
- LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
-
if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
&& vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump,
@@ -4136,7 +4145,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_i
if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt)))
return false;
- if (slp_node)
+ if (slp_node || PURE_SLP_STMT (stmt_info))
ncopies = 1;
else
ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
===================================================================
@@ -1747,7 +1747,7 @@ vectorizable_conversion (gimple stmt, gimple_stmt_
/* Multiple types in SLP are handled by creating the appropriate number
of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
case of SLP. */
- if (slp_node)
+ if (slp_node || PURE_SLP_STMT (stmt_info))
ncopies = 1;
/* Sanity check: make sure that at least one copy of the vectorized stmt
@@ -1940,7 +1940,7 @@ vectorizable_assignment (gimple stmt, gimple_stmt_
/* Multiple types in SLP are handled by creating the appropriate number
of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
case of SLP. */
- if (slp_node)
+ if (slp_node || PURE_SLP_STMT (stmt_info))
ncopies = 1;
else
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
@@ -2149,7 +2149,7 @@ vectorizable_shift (gimple stmt, gimple_stmt_itera
/* Multiple types in SLP are handled by creating the appropriate number
of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
case of SLP. */
- if (slp_node)
+ if (slp_node || PURE_SLP_STMT (stmt_info))
ncopies = 1;
else
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
@@ -2497,7 +2497,7 @@ vectorizable_operation (gimple stmt, gimple_stmt_i
/* Multiple types in SLP are handled by creating the appropriate number
of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
case of SLP. */
- if (slp_node)
+ if (slp_node || PURE_SLP_STMT (stmt_info))
ncopies = 1;
else
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
@@ -2895,7 +2895,7 @@ vectorizable_type_demotion (gimple stmt, gimple_st
/* Multiple types in SLP are handled by creating the appropriate number
of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
case of SLP. */
- if (slp_node)
+ if (slp_node || PURE_SLP_STMT (stmt_info))
ncopies = 1;
else
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
@@ -3175,7 +3175,7 @@ vectorizable_type_promotion (gimple stmt, gimple_s
/* Multiple types in SLP are handled by creating the appropriate number
of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
case of SLP. */
- if (slp_node)
+ if (slp_node || PURE_SLP_STMT (stmt_info))
ncopies = 1;
else
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
@@ -3358,7 +3358,7 @@ vectorizable_store (gimple stmt, gimple_stmt_itera
/* Multiple types in SLP are handled by creating the appropriate number
of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
case of SLP. */
- if (slp)
+ if (slp || PURE_SLP_STMT (stmt_info))
ncopies = 1;
else
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
@@ -3851,7 +3851,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterat
/* Multiple types in SLP are handled by creating the appropriate number
of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
case of SLP. */
- if (slp)
+ if (slp || PURE_SLP_STMT (stmt_info))
ncopies = 1;
else
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
@@ -4457,6 +4457,10 @@ vectorizable_condition (gimple stmt, gimple_stmt_i
/* FORNOW: unsupported in basic block SLP. */
gcc_assert (loop_vinfo);
+ /* FORNOW: SLP not supported. */
+ if (STMT_SLP_TYPE (stmt_info))
+ return false;
+
gcc_assert (ncopies >= 1);
if (reduc_index && ncopies > 1)
return false; /* FORNOW */
@@ -4469,10 +4473,6 @@ vectorizable_condition (gimple stmt, gimple_stmt_i
&& reduc_def))
return false;
- /* FORNOW: SLP not supported. */
- if (STMT_SLP_TYPE (stmt_info))
- return false;
-
/* FORNOW: not yet supported. */
if (STMT_VINFO_LIVE_P (stmt_info))
{
===================================================================
@@ -0,0 +1,82 @@
+/* { dg-do compile { target powerpc*-*-* } } */
+/* { dg-options "-m64 -O3 -mcpu=power6" } */
+
+enum reg_class
+{
+ NO_REGS, AP_REG, XRF_REGS, GENERAL_REGS, AGRF_REGS, XGRF_REGS, ALL_REGS,
+ LIM_REG_CLASSES
+};
+enum machine_mode
+{
+ VOIDmode, QImode, HImode, PSImode, SImode, PDImode, DImode, TImode,
OImode,
+ QFmode, HFmode, TQFmode, SFmode, DFmode, XFmode, TFmode, SCmode,
DCmode,
+ XCmode, TCmode, CQImode, CHImode, CSImode, CDImode, CTImode, COImode,
+ BLKmode, CCmode, CCEVENmode, MAX_MACHINE_MODE
+};
+typedef struct rtx_def
+{
+ int mode:8;
+}
+ *rtx;
+extern rtx *regno_reg_rtx;
+typedef unsigned int HARD_REG_ELT_TYPE;
+typedef HARD_REG_ELT_TYPE HARD_REG_SET[((64 + 32 - 1) / 32)];
+extern int reg_alloc_order[64];
+extern int max_regno;
+extern int *reg_n_calls_crossed;
+extern short *reg_renumber;
+static int *reg_where_dead;
+static int *reg_where_born;
+static int *reg_order;
+static char *regs_change_size;
+static HARD_REG_SET *after_insn_hard_regs;
+static int stupid_find_reg (int, enum reg_class, enum machine_mode, int,
int,
+ int);
+void
+stupid_life_analysis (f, nregs, file)
+ rtx f;
+{
+ register int i;
+ for (i = (((64)) + 3) + 1; i < max_regno; i++)
+ {
+ register int r = reg_order[i];
+ if ((int) LIM_REG_CLASSES > 1)
+ reg_renumber[r] =
+ stupid_find_reg (reg_n_calls_crossed[r], reg_preferred_class (r),
+ ((regno_reg_rtx[r])->mode), reg_where_born[r],
+ reg_where_dead[r], regs_change_size[r]);
+ }
+}
+
+static int
+stupid_find_reg (call_preserved, class, mode, born_insn, dead_insn,
+ changes_size)
+ int call_preserved;
+ enum reg_class class;
+ enum machine_mode mode;
+{
+ register int i, ins;
+ HARD_REG_SET used, this_reg;
+ for (ins = born_insn; ins < dead_insn; ins++)
+ do
+ {
+ register HARD_REG_ELT_TYPE *scan_tp_ = (used), *scan_fp_ =
+ (after_insn_hard_regs[ins]);
+ for (i = 0; i < ((64 + 32 - 1) / 32); i++)
+ *scan_tp_++ |= *scan_fp_++;
+ }
+ while (0);
+ for (i = 0; i < 64; i++)
+ {
+ int regno = reg_alloc_order[i];
+ if (((used)[(regno) / ((unsigned) 32)] &
+ (((HARD_REG_ELT_TYPE) (1)) << ((regno) % ((unsigned) 32)))))
+ {
+ register int j;
+ if (j == regno)
+ return regno;
+ }
+ }