===================================================================
@@ -0,0 +1,64 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_shift } */
+
+#include <stdlib.h>
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 64
+
+/* Modified rgb to rgb conversion from FFmpeg. */
+__attribute__ ((noinline)) void
+foo (unsigned char *src, unsigned char *dst)
+{
+ unsigned char *s = src;
+ unsigned short *d = (unsigned short *)dst;
+ int i;
+
+ for (i = 0; i < N/4; i++)
+ {
+ const int b = *s++;
+ const int g = *s++;
+ const int r = *s++;
+ const int a = *s++;
+ *d = ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5));
+ d++;
+ }
+
+ s = src;
+ d = (unsigned short *)dst;
+ for (i = 0; i < N/4; i++)
+ {
+ const int b = *s++;
+ const int g = *s++;
+ const int r = *s++;
+ const int a = *s++;
+ if (*d != ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5)))
+ abort ();
+ d++;
+ }
+}
+
+int main (void)
+{
+ int i;
+ unsigned char in[N], out[N];
+
+ check_vect ();
+
+ for (i = 0; i < N; i++)
+ {
+ in[i] = i;
+ out[i] = 255;
+ __asm__ volatile ("");
+ }
+
+ foo (in, out);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vect_recog_over_widening_pattern: detected" 4 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
===================================================================
@@ -0,0 +1,65 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_shift } */
+
+#include <stdlib.h>
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 64
+
+/* Modified rgb to rgb conversion from FFmpeg. */
+__attribute__ ((noinline)) void
+foo (unsigned char *src, unsigned char *dst)
+{
+ unsigned char *s = src;
+ int *d = (int *)dst;
+ int i;
+
+ for (i = 0; i < N/4; i++)
+ {
+ const int b = *s++;
+ const int g = *s++;
+ const int r = *s++;
+ const int a = *s++;
+ *d = ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5));
+ d++;
+ }
+
+ s = src;
+ d = (int *)dst;
+ for (i = 0; i < N/4; i++)
+ {
+ const int b = *s++;
+ const int g = *s++;
+ const int r = *s++;
+ const int a = *s++;
+ if (*d != ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5)))
+ abort ();
+ d++;
+ }
+}
+
+int main (void)
+{
+ int i;
+ unsigned char in[N], out[N];
+
+ check_vect ();
+
+ for (i = 0; i < N; i++)
+ {
+ in[i] = i;
+ out[i] = 255;
+ __asm__ volatile ("");
+ }
+
+ foo (in, out);
+
+ return 0;
+}
+
+/* Final value stays in int, so no over-widening is detected at the moment. */
+/* { dg-final { scan-tree-dump-times "vect_recog_over_widening_pattern: detected" 0 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
===================================================================
@@ -0,0 +1,64 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_shift } */
+
+#include <stdlib.h>
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 64
+
+/* Modified rgb to rgb conversion from FFmpeg. */
+__attribute__ ((noinline)) void
+foo (unsigned char *src, unsigned char *dst)
+{
+ unsigned char *s = src;
+ unsigned short *d = (unsigned short *)dst;
+ int i;
+
+ for (i = 0; i < N/4; i++)
+ {
+ const int b = *s++;
+ const int g = *s++;
+ const int r = *s++;
+ const int a = *s++;
+ *d = ((b>>3) | ((g&0xFFC)<<3) | ((r+0xF8)>>8) | (a<<9));
+ d++;
+ }
+
+ s = src;
+ d = (unsigned short *)dst;
+ for (i = 0; i < N/4; i++)
+ {
+ const int b = *s++;
+ const int g = *s++;
+ const int r = *s++;
+ const int a = *s++;
+ if (*d != ((b>>3) | ((g&0xFFC)<<3) | ((r+0xF8)>>8) | (a<<9)))
+ abort ();
+ d++;
+ }
+}
+
+int main (void)
+{
+ int i;
+ unsigned char in[N], out[N];
+
+ check_vect ();
+
+ for (i = 0; i < N; i++)
+ {
+ in[i] = i;
+ out[i] = 255;
+ __asm__ volatile ("");
+ }
+
+ foo (in, out);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vect_recog_over_widening_pattern: detected" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
===================================================================
@@ -0,0 +1,68 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_shift } */
+
+#include <stdlib.h>
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 64
+
+/* Modified rgb to rgb conversion from FFmpeg. */
+__attribute__ ((noinline)) int
+foo (unsigned char *src, unsigned char *dst)
+{
+ unsigned char *s = src;
+ unsigned short *d = (unsigned short *)dst, res;
+ int i, result = 0;
+
+ for (i = 0; i < N/4; i++)
+ {
+ const int b = *s++;
+ const int g = *s++;
+ const int r = *s++;
+ const int a = *s++;
+ res = ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5));
+ *d = res;
+ result += res;
+ d++;
+ }
+
+ s = src;
+ d = (unsigned short *)dst;
+ for (i = 0; i < N/4; i++)
+ {
+ const int b = *s++;
+ const int g = *s++;
+ const int r = *s++;
+ const int a = *s++;
+ if (*d != ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5)))
+ abort ();
+ d++;
+ }
+
+ return result;
+}
+
+int main (void)
+{
+ int i;
+ unsigned char in[N], out[N];
+
+ check_vect ();
+
+ for (i = 0; i < N; i++)
+ {
+ in[i] = i;
+ out[i] = 255;
+ __asm__ volatile ("");
+ }
+
+ foo (in, out);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vect_recog_over_widening_pattern: detected" 4 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
===================================================================
@@ -469,6 +469,9 @@ typedef struct _stmt_vec_info {
pattern). */
gimple related_stmt;
+ /* Used to keep a def stmt of a pattern stmt if such exists. */
+ gimple pattern_def_stmt;
+
/* List of datarefs that are known to have the same alignment as the dataref
of this stmt. */
VEC(dr_p,heap) *same_align_refs;
@@ -536,6 +539,7 @@ typedef struct _stmt_vec_info {
#define STMT_VINFO_IN_PATTERN_P(S) (S)->in_pattern_p
#define STMT_VINFO_RELATED_STMT(S) (S)->related_stmt
+#define STMT_VINFO_PATTERN_DEF_STMT(S) (S)->pattern_def_stmt
#define STMT_VINFO_SAME_ALIGN_REFS(S) (S)->same_align_refs
#define STMT_VINFO_DEF_TYPE(S) (S)->def_type
#define STMT_VINFO_GROUP_FIRST_ELEMENT(S) (S)->first_element
@@ -819,6 +823,7 @@ extern bool vectorizable_condition (gimple, gimple
extern void vect_get_load_cost (struct data_reference *, int, bool,
unsigned int *, unsigned int *);
extern void vect_get_store_cost (struct data_reference *, int, unsigned int *);
+extern bool vect_supportable_shift (enum tree_code, tree);
/* In tree-vect-data-refs.c. */
extern bool vect_can_force_dr_alignment_p (const_tree, unsigned int);
@@ -897,7 +902,7 @@ extern void vect_slp_transform_bb (basic_block);
Additional pattern recognition functions can (and will) be added
in the future. */
typedef gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *);
-#define NUM_PATTERNS 4
+#define NUM_PATTERNS 5
void vect_pattern_recog (loop_vec_info);
/* In tree-vectorizer.c. */
===================================================================
@@ -181,8 +181,8 @@ vect_determine_vectorization_factor (loop_vec_info
stmt_vec_info stmt_info;
int i;
HOST_WIDE_INT dummy;
- gimple stmt, pattern_stmt = NULL;
- bool analyze_pattern_stmt = false;
+ gimple stmt, pattern_stmt = NULL, pattern_def_stmt = NULL;
+ bool analyze_pattern_stmt = false, pattern_def = false;
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
@@ -296,6 +296,29 @@ vect_determine_vectorization_factor (loop_vec_info
|| STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
analyze_pattern_stmt = true;
+ /* If a pattern statement has a def stmt, analyze it too. */
+ if (is_pattern_stmt_p (stmt_info)
+ && (pattern_def_stmt = STMT_VINFO_PATTERN_DEF_STMT (stmt_info))
+ && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
+ || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt))))
+ {
+ if (pattern_def)
+ pattern_def = false;
+ else
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ {
+ fprintf (vect_dump, "==> examining pattern def stmt: ");
+ print_gimple_stmt (vect_dump, pattern_def_stmt, 0,
+ TDF_SLIM);
+ }
+
+ pattern_def = true;
+ stmt = pattern_def_stmt;
+ stmt_info = vinfo_for_stmt (stmt);
+ }
+ }
+
if (gimple_get_lhs (stmt) == NULL_TREE)
{
if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
@@ -400,7 +423,7 @@ vect_determine_vectorization_factor (loop_vec_info
|| (nunits > vectorization_factor))
vectorization_factor = nunits;
- if (!analyze_pattern_stmt)
+ if (!analyze_pattern_stmt && !pattern_def)
gsi_next (&si);
}
}
@@ -5078,8 +5101,8 @@ vect_transform_loop (loop_vec_info loop_vinfo)
tree cond_expr = NULL_TREE;
gimple_seq cond_expr_stmt_list = NULL;
bool do_peeling_for_loop_bound;
- gimple stmt, pattern_stmt;
- bool transform_pattern_stmt = false;
+ gimple stmt, pattern_stmt, pattern_def_stmt;
+ bool transform_pattern_stmt = false, pattern_def = false;
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "=== vec_transform_loop ===");
@@ -5223,6 +5246,30 @@ vect_transform_loop (loop_vec_info loop_vinfo)
|| STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
transform_pattern_stmt = true;
+ /* If pattern statement has a def stmt, vectorize it too. */
+ if (is_pattern_stmt_p (stmt_info)
+ && (pattern_def_stmt = STMT_VINFO_PATTERN_DEF_STMT (stmt_info))
+ && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
+ || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt))))
+ {
+ if (pattern_def)
+ pattern_def = false;
+ else
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ {
+ fprintf (vect_dump, "==> vectorizing pattern def"
+ " stmt: ");
+ print_gimple_stmt (vect_dump, pattern_def_stmt, 0,
+ TDF_SLIM);
+ }
+
+ pattern_def = true;
+ stmt = pattern_def_stmt;
+ stmt_info = vinfo_for_stmt (stmt);
+ }
+ }
+
gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (
STMT_VINFO_VECTYPE (stmt_info));
@@ -5250,7 +5297,7 @@ vect_transform_loop (loop_vec_info loop_vinfo)
/* Hybrid SLP stmts must be vectorized in addition to SLP. */
if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info))
{
- if (!transform_pattern_stmt)
+ if (!transform_pattern_stmt && !pattern_def)
gsi_next (&si);
continue;
}
@@ -5282,7 +5329,7 @@ vect_transform_loop (loop_vec_info loop_vinfo)
}
}
- if (!transform_pattern_stmt)
+ if (!transform_pattern_stmt && !pattern_def)
gsi_next (&si);
} /* stmts in BB */
} /* BBs in loop */
===================================================================
@@ -47,11 +47,14 @@ static gimple vect_recog_widen_mult_pattern (VEC (
static gimple vect_recog_dot_prod_pattern (VEC (gimple, heap) **, tree *,
tree *);
static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *);
+static gimple vect_recog_over_widening_pattern (VEC (gimple, heap) **, tree *,
+ tree *);
static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
vect_recog_widen_mult_pattern,
vect_recog_widen_sum_pattern,
vect_recog_dot_prod_pattern,
- vect_recog_pow_pattern};
+ vect_recog_pow_pattern,
+ vect_recog_over_widening_pattern};
/* Function widened_name_p
@@ -827,6 +830,442 @@ vect_recog_widen_sum_pattern (VEC (gimple, heap) *
}
+/* Return TRUE if the operation in STMT can be performed on a smaller type.
+
+ Input:
+ STMT - a statement to check.
+ DEF - we support operations with two operands, one of which is constant.
+ The other operand can be defined by a demotion operation, or by a
+ previous statement in a sequence of over-promoted operations. In the
+ later case DEF is used to replace that operand. (It is defined by a
+ pattern statement we created for the previous statement in the
+ sequence).
+
+ Input/output:
+ NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
+ NULL, it's the type of DEF.
+ STMTS - additional pattern statements. If a pattern statement (type
+ conversion) is created in this function, its original statement is
+ added to STMTS.
+
+ Output:
+ OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
+ operands to use in the new pattern statement for STMT (will be created
+ in vect_recog_over_widening_pattern ()).
+ NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
+ statements for STMT: the first one is a type promotion and the second
+ one is the operation itself. We return the type promotion statement
+ in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_STMT of
+ the second pattern statement. */
+
+static bool
+vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
+ tree *op0, tree *op1, gimple *new_def_stmt,
+ VEC (gimple, heap) **stmts)
+{
+ enum tree_code code;
+ tree oprnd0, oprnd1, const_oprnd, oprnd;
+ tree interm_type = NULL_TREE, half_type, tmp, new_oprnd, type;
+ gimple def_stmt, new_stmt;
+ bool first = false;
+
+ *new_def_stmt = NULL;
+
+ if (!is_gimple_assign (stmt))
+ return false;
+
+ code = gimple_assign_rhs_code (stmt);
+ if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
+ && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
+ return false;
+
+ oprnd0 = gimple_assign_rhs1 (stmt);
+ oprnd1 = gimple_assign_rhs2 (stmt);
+ type = gimple_expr_type (stmt);
+ if (!types_compatible_p (TREE_TYPE (oprnd0), type)
+ || !types_compatible_p (TREE_TYPE (oprnd1), type)
+ || (TREE_CODE (oprnd0) != INTEGER_CST
+ && TREE_CODE (oprnd1) != INTEGER_CST))
+ return false;
+
+ if (TREE_CODE (oprnd0) == INTEGER_CST)
+ {
+ if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
+ return false;
+
+ const_oprnd = oprnd0;
+ oprnd = oprnd1;
+ }
+ else
+ {
+ const_oprnd = oprnd1;
+ oprnd = oprnd0;
+ }
+
+ /* If we are in the middle of a sequence, we use DEF from a previous
+ statement. Otherwise, OPRND has to be a result of type promotion. */
+ if (*new_type)
+ {
+ half_type = *new_type;
+ oprnd = def;
+ }
+ else
+ {
+ first = true;
+ if (!widened_name_p (oprnd, stmt, &half_type, &def_stmt, false))
+ return false;
+ }
+
+ /* Can we perform the operation on a smaller type? */
+ switch (code)
+ {
+ case BIT_IOR_EXPR:
+ case BIT_XOR_EXPR:
+ case BIT_AND_EXPR:
+ if (!int_fits_type_p (const_oprnd, half_type))
+ {
+ /* HALF_TYPE is not enough. Try a bigger type if possible. */
+ if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
+ return false;
+
+ interm_type = build_nonstandard_integer_type (
+ TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
+ if (!int_fits_type_p (const_oprnd, interm_type))
+ return false;
+ }
+
+ break;
+
+ case LSHIFT_EXPR:
+ /* Try intermediate type - HALF_TYPE is not enough for sure. */
+ if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
+ return false;
+
+ /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
+ (e.g., if the original value was char, the shift amount is at most 8
+ if we want to use short). */
+ if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
+ return false;
+
+ interm_type = build_nonstandard_integer_type (
+ TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
+
+ if (!vect_supportable_shift (code, interm_type))
+ return false;
+
+ break;
+
+ case RSHIFT_EXPR:
+ if (vect_supportable_shift (code, half_type))
+ break;
+
+ /* Try intermediate type - HALF_TYPE is not supported. */
+ if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
+ return false;
+
+ interm_type = build_nonstandard_integer_type (
+ TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
+
+ if (!vect_supportable_shift (code, interm_type))
+ return false;
+
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ /* There are four possible cases:
+ 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
+ the first statement in the sequence)
+ a. The original, HALF_TYPE, is not enough - we replace the promotion
+ from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
+ b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
+ promotion.
+ 2. OPRND is defined by a pattern statement we created.
+ a. Its type is not sufficient for the operation, we create a new stmt:
+ a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
+ this statement in NEW_DEF_STMT, and it is later put in
+ STMT_VINFO_PATTERN_DEF_STMT of the pattern statement for STMT.
+ b. OPRND is good to use in the new statement. */
+ if (first)
+ {
+ if (interm_type)
+ {
+ /* Replace the original type conversion HALF_TYPE->TYPE with
+ HALF_TYPE->INTERM_TYPE. */
+ if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
+ {
+ new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
+ /* Check if the already created pattern stmt is what we need. */
+ if (!is_gimple_assign (new_stmt)
+ || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
+ || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
+ return false;
+
+ oprnd = gimple_assign_lhs (new_stmt);
+ }
+ else
+ {
+ /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
+ oprnd = gimple_assign_rhs1 (def_stmt);
+ tmp = create_tmp_var (interm_type, NULL);
+ add_referenced_var (tmp);
+ new_oprnd = make_ssa_name (tmp, NULL);
+ new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
+ oprnd, NULL_TREE);
+ SSA_NAME_DEF_STMT (new_oprnd) = new_stmt;
+ STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
+ VEC_safe_push (gimple, heap, *stmts, def_stmt);
+ oprnd = new_oprnd;
+ }
+ }
+ else
+ {
+ /* Retrieve the operand before the type promotion. */
+ oprnd = gimple_assign_rhs1 (def_stmt);
+ }
+ }
+ else
+ {
+ if (interm_type)
+ {
+ /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
+ tmp = create_tmp_var (interm_type, NULL);
+ add_referenced_var (tmp);
+ new_oprnd = make_ssa_name (tmp, NULL);
+ new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
+ oprnd, NULL_TREE);
+ SSA_NAME_DEF_STMT (new_oprnd) = new_stmt;
+ oprnd = new_oprnd;
+ *new_def_stmt = new_stmt;
+ }
+
+ /* Otherwise, OPRND is already set. */
+ }
+
+ if (interm_type)
+ *new_type = interm_type;
+ else
+ *new_type = half_type;
+
+ if (TREE_CODE (oprnd0) == INTEGER_CST)
+ {
+ *op0 = fold_convert (*new_type, const_oprnd);
+ *op1 = oprnd;
+ }
+ else
+ {
+ *op1 = fold_convert (*new_type, const_oprnd);
+ *op0 = oprnd;
+ }
+
+ return true;
+}
+
+
+/* Try to find a statement or a sequence of statements that can be performed
+ on a smaller type:
+
+ type x_t;
+ TYPE x_T, res0_T, res1_T;
+ loop:
+ S1 x_t = *p;
+ S2 x_T = (TYPE) x_t;
+ S3 res0_T = op (x_T, C0);
+ S4 res1_T = op (res0_T, C1);
+ S5 ... = () res1_T; - type demotion
+
+ where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
+ constants.
+ Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
+ be 'type' or some intermediate type. For now, we expect S5 to be a type
+ demotion operation. We also check that S3 and S4 have only one use.
+.
+
+*/
+static gimple
+vect_recog_over_widening_pattern (VEC (gimple, heap) **stmts,
+ tree *type_in, tree *type_out)
+{
+ gimple stmt = VEC_pop (gimple, *stmts);
+ gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
+ tree op0, op1, vectype = NULL_TREE, lhs, use_lhs, use_type;
+ imm_use_iterator imm_iter;
+ use_operand_p use_p;
+ int nuses = 0;
+ tree var = NULL_TREE, new_type = NULL_TREE, tmp, new_oprnd;
+ bool first;
+ struct loop *loop = (gimple_bb (stmt))->loop_father;
+
+ first = true;
+ while (1)
+ {
+ if (!vinfo_for_stmt (stmt)
+ || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
+ return NULL;
+
+ new_def_stmt = NULL;
+ if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
+ &op0, &op1, &new_def_stmt,
+ stmts))
+ {
+ if (first)
+ return NULL;
+ else
+ break;
+ }
+
+ /* STMT can be performed on a smaller type. Check its uses. */
+ lhs = gimple_assign_lhs (stmt);
+ nuses = 0;
+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
+ {
+ if (is_gimple_debug (USE_STMT (use_p)))
+ continue;
+ use_stmt = USE_STMT (use_p);
+ nuses++;
+ }
+
+ if (nuses != 1 || !is_gimple_assign (use_stmt)
+ || !gimple_bb (use_stmt)
+ || !flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
+ return NULL;
+
+ /* Create pattern statement for STMT. */
+ vectype = get_vectype_for_scalar_type (new_type);
+ if (!vectype)
+ return NULL;
+
+ /* We want to collect all the statements for which we create pattern
+ statetments, except for the case when the last statement in the
+ sequence doesn't have a corresponding pattern statement. In such
+ case we associate the last pattern statement with the last statement
+ in the sequence. Therefore, we only add an original statetement to
+ the list if we know that it is not the last. */
+ if (prev_stmt)
+ VEC_safe_push (gimple, heap, *stmts, prev_stmt);
+
+ var = vect_recog_temp_ssa_var (new_type, NULL);
+ pattern_stmt = gimple_build_assign_with_ops (
+ gimple_assign_rhs_code (stmt), var, op0, op1);
+ SSA_NAME_DEF_STMT (var) = pattern_stmt;
+ STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
+ STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (stmt)) = new_def_stmt;
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ {
+ fprintf (vect_dump, "created pattern stmt: ");
+ print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
+ }
+
+ prev_stmt = stmt;
+ stmt = use_stmt;
+
+ first = false;
+ }
+
+ /* We got a sequence. We expect it to end with a type demotion operation.
+ Otherwise, we quit (for now). There are three possible cases: the
+ conversion is to NEW_TYPE (we don't do anything), the conversion is to
+ a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
+ NEW_TYPE differs (we create a new conversion statement). */
+ if (gimple_assign_rhs_code (use_stmt) == CONVERT_EXPR)
+ {
+ use_lhs = gimple_assign_lhs (use_stmt);
+ use_type = TREE_TYPE (use_lhs);
+ /* Support only type promotion or signedess change. */
+ if (!INTEGRAL_TYPE_P (use_type)
+ || TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
+ return NULL;
+
+ if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
+ || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
+ {
+ /* Create NEW_TYPE->USE_TYPE conversion. */
+ tmp = create_tmp_var (use_type, NULL);
+ add_referenced_var (tmp);
+ new_oprnd = make_ssa_name (tmp, NULL);
+ pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
+ var, NULL_TREE);
+ SSA_NAME_DEF_STMT (new_oprnd) = pattern_stmt;
+ STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
+
+ *type_in = get_vectype_for_scalar_type (new_type);
+ *type_out = get_vectype_for_scalar_type (use_type);
+
+ /* We created a pattern statement for the last statement in the
+ sequence, so we don't need to associate it with the pattern
+ statement created for PREV_STMT. Therefore, we add PREV_STMT
+ to the list in order to mark it later in vect_pattern_recog_1. */
+ if (prev_stmt)
+ VEC_safe_push (gimple, heap, *stmts, prev_stmt);
+ }
+ else
+ {
+ if (prev_stmt)
+ STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (use_stmt))
+ = STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (prev_stmt));
+
+ *type_in = vectype;
+ *type_out = NULL_TREE;
+ }
+
+ VEC_safe_push (gimple, heap, *stmts, use_stmt);
+ }
+ else
+ /* TODO: support general case, create a conversion to the correct type. */
+ return NULL;
+
+ /* Pattern detected. */
+ if (vect_print_dump_info (REPORT_DETAILS))
+ {
+ fprintf (vect_dump, "vect_recog_over_widening_pattern: detected: ");
+ print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
+ }
+
+ return pattern_stmt;
+}
+
+
+/* Mark statements that are involved in a pattern. */
+
+static inline void
+vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
+ tree pattern_vectype)
+{
+ stmt_vec_info pattern_stmt_info, def_stmt_info;
+ stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
+ loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
+ gimple def_stmt;
+
+ set_vinfo_for_stmt (pattern_stmt,
+ new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL));
+ gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
+ pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
+
+ STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
+ STMT_VINFO_DEF_TYPE (pattern_stmt_info)
+ = STMT_VINFO_DEF_TYPE (orig_stmt_info);
+ STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
+ STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
+ STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
+ STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info)
+ = STMT_VINFO_PATTERN_DEF_STMT (orig_stmt_info);
+ if (STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info))
+ {
+ def_stmt = STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info);
+ set_vinfo_for_stmt (def_stmt,
+ new_stmt_vec_info (def_stmt, loop_vinfo, NULL));
+ gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
+ def_stmt_info = vinfo_for_stmt (def_stmt);
+ STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
+ STMT_VINFO_DEF_TYPE (def_stmt_info)
+ = STMT_VINFO_DEF_TYPE (orig_stmt_info);
+ STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
+ }
+}
+
/* Function vect_pattern_recog_1
Input:
@@ -856,7 +1295,6 @@ vect_pattern_recog_1 (
{
gimple stmt = gsi_stmt (si), pattern_stmt;
stmt_vec_info stmt_info;
- stmt_vec_info pattern_stmt_info;
loop_vec_info loop_vinfo;
tree pattern_vectype;
tree type_in, type_out;
@@ -924,26 +1362,17 @@ vect_pattern_recog_1 (
}
/* Mark the stmts that are involved in the pattern. */
- set_vinfo_for_stmt (pattern_stmt,
- new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL));
- gimple_set_bb (pattern_stmt, gimple_bb (stmt));
- pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
+ vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
- STMT_VINFO_RELATED_STMT (pattern_stmt_info) = stmt;
- STMT_VINFO_DEF_TYPE (pattern_stmt_info) = STMT_VINFO_DEF_TYPE (stmt_info);
- STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
- STMT_VINFO_IN_PATTERN_P (stmt_info) = true;
- STMT_VINFO_RELATED_STMT (stmt_info) = pattern_stmt;
-
/* Patterns cannot be vectorized using SLP, because they change the order of
computation. */
FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
if (next == stmt)
VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i);
- /* In case of widen-mult by a constant, it is possible that an additional
- pattern stmt is created and inserted in STMTS_TO_REPLACE. We create a
- stmt_info for it, and mark the relevant statements. */
+ /* It is possible that additional pattern stmt are created and inserted in
+ STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
+ relevant statements. */
for (i = 0; VEC_iterate (gimple, stmts_to_replace, i, stmt)
&& (unsigned) i < (VEC_length (gimple, stmts_to_replace) - 1);
i++)
@@ -956,16 +1385,7 @@ vect_pattern_recog_1 (
print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
}
- set_vinfo_for_stmt (pattern_stmt,
- new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL));
- gimple_set_bb (pattern_stmt, gimple_bb (stmt));
- pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
-
- STMT_VINFO_RELATED_STMT (pattern_stmt_info) = stmt;
- STMT_VINFO_DEF_TYPE (pattern_stmt_info)
- = STMT_VINFO_DEF_TYPE (stmt_info);
- STMT_VINFO_VECTYPE (pattern_stmt_info) = STMT_VINFO_VECTYPE (stmt_info);
- STMT_VINFO_IN_PATTERN_P (stmt_info) = true;
+ vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
}
VEC_free (gimple, heap, stmts_to_replace);
===================================================================
@@ -2212,6 +2212,42 @@ vectorizable_assignment (gimple stmt, gimple_stmt_
}
+/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
+ either as shift by a scalar or by a vector. */
+
+bool
+vect_supportable_shift (enum tree_code code, tree scalar_type)
+{
+
+ enum machine_mode vec_mode;
+ optab optab;
+ int icode;
+ tree vectype;
+
+ vectype = get_vectype_for_scalar_type (scalar_type);
+ if (!vectype)
+ return false;
+
+ optab = optab_for_tree_code (code, vectype, optab_scalar);
+ if (!optab
+ || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
+ {
+ optab = optab_for_tree_code (code, vectype, optab_vector);
+ if (!optab
+ || (optab_handler (optab, TYPE_MODE (vectype))
+ == CODE_FOR_nothing))
+ return false;
+ }
+
+ vec_mode = TYPE_MODE (vectype);
+ icode = (int) optab_handler (optab, vec_mode);
+ if (icode == CODE_FOR_nothing)
+ return false;
+
+ return true;
+}
+
+
/* Function vectorizable_shift.
Check if STMT performs a shift operation that can be vectorized.
@@ -4890,7 +4926,7 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vect
enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
bool ok;
tree scalar_type, vectype;
- gimple pattern_stmt;
+ gimple pattern_stmt, pattern_def_stmt;
if (vect_print_dump_info (REPORT_DETAILS))
{
@@ -4960,6 +4996,23 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vect
return false;
}
+ if (is_pattern_stmt_p (stmt_info)
+ && (pattern_def_stmt = STMT_VINFO_PATTERN_DEF_STMT (stmt_info))
+ && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
+ || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt))))
+ {
+ /* Analyze def stmt of STMT if it's a pattern stmt. */
+ if (vect_print_dump_info (REPORT_DETAILS))
+ {
+ fprintf (vect_dump, "==> examining pattern def statement: ");
+ print_gimple_stmt (vect_dump, pattern_def_stmt, 0, TDF_SLIM);
+ }
+
+ if (!vect_analyze_stmt (pattern_def_stmt, need_to_vectorize, node))
+ return false;
+ }
+
+
switch (STMT_VINFO_DEF_TYPE (stmt_info))
{
case vect_internal_def:
@@ -5280,6 +5333,7 @@ new_stmt_vec_info (gimple stmt, loop_vec_info loop
STMT_VINFO_VECTORIZABLE (res) = true;
STMT_VINFO_IN_PATTERN_P (res) = false;
STMT_VINFO_RELATED_STMT (res) = NULL;
+ STMT_VINFO_PATTERN_DEF_STMT (res) = NULL;
STMT_VINFO_DATA_REF (res) = NULL;
STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;