From patchwork Wed Jun 1 09:23:09 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ira Rosen X-Patchwork-Id: 1702 Return-Path: Delivered-To: unknown Received: from imap.gmail.com (74.125.159.109) by localhost6.localdomain6 with IMAP4-SSL; 08 Jun 2011 14:54:31 -0000 Delivered-To: patches@linaro.org Received: by 10.52.181.10 with SMTP id ds10cs296452vdc; Wed, 1 Jun 2011 02:23:10 -0700 (PDT) Received: by 10.229.111.98 with SMTP id r34mr5300729qcp.3.1306920189759; Wed, 01 Jun 2011 02:23:09 -0700 (PDT) Received: from mail-qy0-f178.google.com (mail-qy0-f178.google.com [209.85.216.178]) by mx.google.com with ESMTPS id e26si1906894qcs.31.2011.06.01.02.23.09 (version=TLSv1/SSLv3 cipher=OTHER); Wed, 01 Jun 2011 02:23:09 -0700 (PDT) Received-SPF: neutral (google.com: 209.85.216.178 is neither permitted nor denied by best guess record for domain of ira.rosen@linaro.org) client-ip=209.85.216.178; Authentication-Results: mx.google.com; spf=neutral (google.com: 209.85.216.178 is neither permitted nor denied by best guess record for domain of ira.rosen@linaro.org) smtp.mail=ira.rosen@linaro.org Received: by qyk2 with SMTP id 2so3355683qyk.16 for ; Wed, 01 Jun 2011 02:23:09 -0700 (PDT) MIME-Version: 1.0 Received: by 10.224.135.133 with SMTP id n5mr5078545qat.392.1306920189365; Wed, 01 Jun 2011 02:23:09 -0700 (PDT) Received: by 10.224.60.74 with HTTP; Wed, 1 Jun 2011 02:23:09 -0700 (PDT) Date: Wed, 1 Jun 2011 12:23:09 +0300 Message-ID: Subject: [patch] Improve detection of widening multiplication in the vectorizer From: Ira Rosen To: gcc-patches@gcc.gnu.org Cc: Patch Tracking Hi, The vectorizer expects widening multiplication pattern to be: type a_t, b_t; TYPE a_T, b_T, prod_T; a_T = (TYPE) a_t; b_T = (TYPE) b_t; prod_T = a_T * b_T; where type 'TYPE' is double the size of type 'type'. This works fine when the types are signed. For the unsigned types the code looks like: unsigned type a_t, b_t; unsigned TYPE u_prod_T; TYPE a_T, b_T, prod_T; a_T = (TYPE) a_t; b_T = (TYPE) b_t; prod_T = a_T * b_T; u_prod_T = (unsigned TYPE) prod_T; i.e., the multiplication is done on signed, followed by a cast to unsigned. This patch adds a support of such patterns and generates WIDEN_MULT_EXPR for the unsigned type. Another unsupported case is multiplication by a constant (e.g., b_T is a constant). This patch checks that the constant fits the smaller type 'type' and recognizes such cases as widening multiplication. Bootstrapped and tested on powerpc64-suse-linux. Tested the vectorization testsuite on arm-linux-gnueabi. I'll commit the patch shortly if there are no comments/objections. Ira ChangeLog: * tree-vectorizer.h (vect_recog_func_ptr): Make last argument to be a pointer. * tree-vect-patterns.c (vect_recog_widen_sum_pattern, vect_recog_widen_mult_pattern, vect_recog_dot_prod_pattern, vect_recog_pow_pattern): Likewise. (vect_pattern_recog_1): Remove declaration. (widened_name_p): Remove declaration. Add new argument to specify whether to check that both types are either signed or unsigned. (vect_recog_widen_mult_pattern): Update documentation. Handle unsigned patterns and multiplication by constants. (vect_pattern_recog_1): Update vect_recog_func references. Use statement information from the statement returned from pattern detection functions. (vect_pattern_recog): Update vect_recog_func reference. * tree-vect-stmts.c (vectorizable_type_promotion): For widening multiplication by a constant use the type of the other operand. testsuite/ChangeLog: * lib/target-supports.exp (check_effective_target_vect_widen_mult_qi_to_hi): Add NEON as supporting target. (check_effective_target_vect_widen_mult_hi_to_si): Likewise. (check_effective_target_vect_widen_mult_qi_to_hi_pattern): New. (check_effective_target_vect_widen_mult_hi_to_si_pattern): New. * gcc.dg/vect/vect-widen-mult-u8.c: Expect to be vectorized using widening multiplication on targets that support it. * gcc.dg/vect/vect-widen-mult-u16.c: Likewise. * gcc.dg/vect/vect-widen-mult-const-s16.c: New test. * gcc.dg/vect/vect-widen-mult-const-u16.c: New test. Index: testsuite/lib/target-supports.exp =================================================================== --- testsuite/lib/target-supports.exp (revision 174475) +++ testsuite/lib/target-supports.exp (working copy) @@ -2668,7 +2668,8 @@ proc check_effective_target_vect_widen_mult_qi_to_ } else { set et_vect_widen_mult_qi_to_hi_saved 0 } - if { [istarget powerpc*-*-*] } { + if { [istarget powerpc*-*-*] + || ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } { set et_vect_widen_mult_qi_to_hi_saved 1 } } @@ -2701,7 +2702,8 @@ proc check_effective_target_vect_widen_mult_hi_to_ || [istarget spu-*-*] || [istarget ia64-*-*] || [istarget i?86-*-*] - || [istarget x86_64-*-*] } { + || [istarget x86_64-*-*] + || ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } { set et_vect_widen_mult_hi_to_si_saved 1 } } @@ -2710,6 +2712,52 @@ proc check_effective_target_vect_widen_mult_hi_to_ } # Return 1 if the target plus current options supports a vector +# widening multiplication of *char* args into *short* result, 0 otherwise. +# +# This won't change for different subtargets so cache the result. + +proc check_effective_target_vect_widen_mult_qi_to_hi_pattern { } { + global et_vect_widen_mult_qi_to_hi_pattern + + if [info exists et_vect_widen_mult_qi_to_hi_pattern_saved] { + verbose "check_effective_target_vect_widen_mult_qi_to_hi_pattern: using cached result" 2 + } else { + set et_vect_widen_mult_qi_to_hi_pattern_saved 0 + if { [istarget powerpc*-*-*] + || ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } { + set et_vect_widen_mult_qi_to_hi_pattern_saved 1 + } + } + verbose "check_effective_target_vect_widen_mult_qi_to_hi_pattern: returning $et_vect_widen_mult_qi_to_hi_pattern_saved" 2 + return $et_vect_widen_mult_qi_to_hi_pattern_saved +} + +# Return 1 if the target plus current options supports a vector +# widening multiplication of *short* args into *int* result, 0 otherwise. +# +# This won't change for different subtargets so cache the result. + +proc check_effective_target_vect_widen_mult_hi_to_si_pattern { } { + global et_vect_widen_mult_hi_to_si_pattern + + if [info exists et_vect_widen_mult_hi_to_si_pattern_saved] { + verbose "check_effective_target_vect_widen_mult_hi_to_si_pattern: using cached result" 2 + } else { + set et_vect_widen_mult_hi_to_si_pattern_saved 0 + if { [istarget powerpc*-*-*] + || [istarget spu-*-*] + || [istarget ia64-*-*] + || [istarget i?86-*-*] + || [istarget x86_64-*-*] + || ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } { + set et_vect_widen_mult_hi_to_si_pattern_saved 1 + } + } + verbose "check_effective_target_vect_widen_mult_hi_to_si_pattern: returning $et_vect_widen_mult_hi_to_si_pattern_saved" 2 + return $et_vect_widen_mult_hi_to_si_pattern_saved +} + +# Return 1 if the target plus current options supports a vector # dot-product of signed chars, 0 otherwise. # # This won't change for different subtargets so cache the result. Index: testsuite/gcc.dg/vect/vect-widen-mult-u8.c =================================================================== --- testsuite/gcc.dg/vect/vect-widen-mult-u8.c (revision 174475) +++ testsuite/gcc.dg/vect/vect-widen-mult-u8.c (working copy) @@ -9,7 +9,7 @@ unsigned char X[N] __attribute__ ((__aligned__(__B unsigned char Y[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); unsigned short result[N]; -/* char->short widening-mult */ +/* unsigned char-> unsigned short widening-mult. */ __attribute__ ((noinline)) int foo1(int len) { int i; @@ -28,8 +28,7 @@ int main (void) for (i=0; i + +#define N 32 + +__attribute__ ((noinline)) void +foo (int *__restrict a, + short *__restrict b, + int n) +{ + int i; + + for (i = 0; i < n; i++) + a[i] = b[i] * 2333; + + for (i = 0; i < n; i++) + if (a[i] != b[i] * 2333) + abort (); +} + +__attribute__ ((noinline)) void +bar (int *__restrict a, + short *__restrict b, + int n) +{ + int i; + + for (i = 0; i < n; i++) + a[i] = b[i] * (short) 2333; + + for (i = 0; i < n; i++) + if (a[i] != b[i] * (short) 2333) + abort (); +} + +int main (void) +{ + int i; + int a[N]; + short b[N]; + + for (i = 0; i < N; i++) + { + a[i] = 0; + b[i] = i; + __asm__ volatile (""); + } + + foo (a, b, N); + bar (a, b, N); + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_widen_mult_hi_to_si } } } */ +/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ +/* { dg-final { scan-tree-dump-times "pattern recognized" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ + Index: testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c =================================================================== --- testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c (revision 0) +++ testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c (revision 0) @@ -0,0 +1,77 @@ +/* { dg-require-effective-target vect_int } */ + +#include "tree-vect.h" +#include + +#define N 32 + +__attribute__ ((noinline)) void +foo (unsigned int *__restrict a, + unsigned short *__restrict b, + int n) +{ + int i; + + for (i = 0; i < n; i++) + a[i] = b[i] * 2333; + + for (i = 0; i < n; i++) + if (a[i] != b[i] * 2333) + abort (); +} + +__attribute__ ((noinline)) void +bar (unsigned int *__restrict a, + unsigned short *__restrict b, + int n) +{ + int i; + + for (i = 0; i < n; i++) + a[i] = (unsigned short) 2333 * b[i]; + + for (i = 0; i < n; i++) + if (a[i] != b[i] * (unsigned short) 2333) + abort (); +} + +__attribute__ ((noinline)) void +baz (unsigned int *__restrict a, + unsigned short *__restrict b, + int n) +{ + int i; + + for (i = 0; i < n; i++) + a[i] = b[i] * 233333333; + + for (i = 0; i < n; i++) + if (a[i] != b[i] * 233333333) + abort (); +} + + +int main (void) +{ + int i; + unsigned int a[N]; + unsigned short b[N]; + + for (i = 0; i < N; i++) + { + a[i] = 0; + b[i] = i; + __asm__ volatile (""); + } + + foo (a, b, N); + bar (a, b, N); + baz (a, b, N); + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" { target vect_widen_mult_hi_to_si } } } */ +/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ +/* { dg-final { scan-tree-dump-times "pattern recognized" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ + Index: testsuite/gcc.dg/vect/vect-widen-mult-u16.c =================================================================== --- testsuite/gcc.dg/vect/vect-widen-mult-u16.c (revision 174475) +++ testsuite/gcc.dg/vect/vect-widen-mult-u16.c (working copy) @@ -9,13 +9,11 @@ unsigned short X[N] __attribute__ ((__aligned__(__ unsigned short Y[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); unsigned int result[N]; -/* short->int widening-mult */ +/* unsigned short->unsigned int widening-mult. */ __attribute__ ((noinline)) int foo1(int len) { int i; - /* Not vectorized because X[i] and Y[i] are casted to 'int' - so the widening multiplication pattern is not recognized. */ for (i=0; i */ static gimple -vect_recog_widen_mult_pattern (gimple last_stmt, +vect_recog_widen_mult_pattern (gimple *last_stmt, tree *type_in, tree *type_out) { @@ -367,40 +387,111 @@ static gimple tree oprnd0, oprnd1; tree type, half_type0, half_type1; gimple pattern_stmt; - tree vectype, vectype_out; + tree vectype, vectype_out = NULL_TREE; tree dummy; tree var; enum tree_code dummy_code; int dummy_int; VEC (tree, heap) *dummy_vec; + bool op0_ok, op1_ok; - if (!is_gimple_assign (last_stmt)) + if (!is_gimple_assign (*last_stmt)) return NULL; - type = gimple_expr_type (last_stmt); + type = gimple_expr_type (*last_stmt); /* Starting from LAST_STMT, follow the defs of its uses in search of the above pattern. */ - if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR) + if (gimple_assign_rhs_code (*last_stmt) != MULT_EXPR) return NULL; - oprnd0 = gimple_assign_rhs1 (last_stmt); - oprnd1 = gimple_assign_rhs2 (last_stmt); + oprnd0 = gimple_assign_rhs1 (*last_stmt); + oprnd1 = gimple_assign_rhs2 (*last_stmt); if (!types_compatible_p (TREE_TYPE (oprnd0), type) || !types_compatible_p (TREE_TYPE (oprnd1), type)) return NULL; - /* Check argument 0 */ - if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0)) - return NULL; - oprnd0 = gimple_assign_rhs1 (def_stmt0); + /* Check argument 0. */ + op0_ok = widened_name_p (oprnd0, *last_stmt, &half_type0, &def_stmt0, false); + /* Check argument 1. */ + op1_ok = widened_name_p (oprnd1, *last_stmt, &half_type1, &def_stmt1, false); - /* Check argument 1 */ - if (!widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1)) + /* In case of multiplication by a constant one of the operands may not match + the pattern, but not both. */ + if (!op0_ok && !op1_ok) return NULL; - oprnd1 = gimple_assign_rhs1 (def_stmt1); + if (op0_ok && op1_ok) + { + oprnd0 = gimple_assign_rhs1 (def_stmt0); + oprnd1 = gimple_assign_rhs1 (def_stmt1); + } + else if (!op0_ok) + { + if (CONSTANT_CLASS_P (oprnd0) + && TREE_CODE (half_type1) == INTEGER_TYPE + && tree_int_cst_lt (oprnd0, TYPE_MAXVAL (half_type1)) + && tree_int_cst_lt (TYPE_MINVAL (half_type1), oprnd0)) + { + /* OPRND0 is a constant of HALF_TYPE1. */ + half_type0 = half_type1; + oprnd1 = gimple_assign_rhs1 (def_stmt1); + } + else + return NULL; + } + else if (!op1_ok) + { + if (CONSTANT_CLASS_P (oprnd1) + && TREE_CODE (half_type0) == INTEGER_TYPE + && tree_int_cst_lt (oprnd1, TYPE_MAXVAL (half_type0)) + && tree_int_cst_lt (TYPE_MINVAL (half_type0), oprnd1)) + { + /* OPRND1 is a constant of HALF_TYPE0. */ + half_type1 = half_type0; + oprnd0 = gimple_assign_rhs1 (def_stmt0); + } + else + return NULL; + } + + /* Handle unsigned case. Look for + S6 u_prod_T = (unsigned TYPE) prod_T; + Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */ + if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0)) + { + tree lhs = gimple_assign_lhs (*last_stmt), use_lhs; + imm_use_iterator imm_iter; + use_operand_p use_p; + int nuses = 0; + gimple use_stmt = NULL; + tree use_type; + + if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1)) + return NULL; + + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) + { + use_stmt = USE_STMT (use_p); + nuses++; + } + + if (nuses != 1 || !is_gimple_assign (use_stmt) + || gimple_assign_rhs_code (use_stmt) != NOP_EXPR) + return NULL; + + use_lhs = gimple_assign_lhs (use_stmt); + use_type = TREE_TYPE (use_lhs); + if (!INTEGRAL_TYPE_P (use_type) + || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type)) + || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type))) + return NULL; + + type = use_type; + *last_stmt = use_stmt; + } + if (!types_compatible_p (half_type0, half_type1)) return NULL; @@ -413,7 +504,7 @@ static gimple vectype_out = get_vectype_for_scalar_type (type); if (!vectype || !vectype_out - || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt, + || !supportable_widening_operation (WIDEN_MULT_EXPR, *last_stmt, vectype_out, vectype, &dummy, &dummy, &dummy_code, &dummy_code, &dummy_int, &dummy_vec)) @@ -462,16 +553,16 @@ static gimple */ static gimple -vect_recog_pow_pattern (gimple last_stmt, tree *type_in, tree *type_out) +vect_recog_pow_pattern (gimple *last_stmt, tree *type_in, tree *type_out) { tree fn, base, exp = NULL; gimple stmt; tree var; - if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL) + if (!is_gimple_call (*last_stmt) || gimple_call_lhs (*last_stmt) == NULL) return NULL; - fn = gimple_call_fndecl (last_stmt); + fn = gimple_call_fndecl (*last_stmt); if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL) return NULL; @@ -481,8 +572,8 @@ static gimple case BUILT_IN_POWI: case BUILT_IN_POWF: case BUILT_IN_POW: - base = gimple_call_arg (last_stmt, 0); - exp = gimple_call_arg (last_stmt, 1); + base = gimple_call_arg (*last_stmt, 0); + exp = gimple_call_arg (*last_stmt, 1); if (TREE_CODE (exp) != REAL_CST && TREE_CODE (exp) != INTEGER_CST) return NULL; @@ -574,21 +665,21 @@ static gimple inner-loop nested in an outer-loop that us being vectorized). */ static gimple -vect_recog_widen_sum_pattern (gimple last_stmt, tree *type_in, tree *type_out) +vect_recog_widen_sum_pattern (gimple *last_stmt, tree *type_in, tree *type_out) { gimple stmt; tree oprnd0, oprnd1; - stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); + stmt_vec_info stmt_vinfo = vinfo_for_stmt (*last_stmt); tree type, half_type; gimple pattern_stmt; loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo); struct loop *loop = LOOP_VINFO_LOOP (loop_info); tree var; - if (!is_gimple_assign (last_stmt)) + if (!is_gimple_assign (*last_stmt)) return NULL; - type = gimple_expr_type (last_stmt); + type = gimple_expr_type (*last_stmt); /* Look for the following pattern DX = (TYPE) X; @@ -600,25 +691,25 @@ static gimple /* Starting from LAST_STMT, follow the defs of its uses in search of the above pattern. */ - if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR) + if (gimple_assign_rhs_code (*last_stmt) != PLUS_EXPR) return NULL; if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def) return NULL; - oprnd0 = gimple_assign_rhs1 (last_stmt); - oprnd1 = gimple_assign_rhs2 (last_stmt); + oprnd0 = gimple_assign_rhs1 (*last_stmt); + oprnd1 = gimple_assign_rhs2 (*last_stmt); if (!types_compatible_p (TREE_TYPE (oprnd0), type) || !types_compatible_p (TREE_TYPE (oprnd1), type)) return NULL; - /* So far so good. Since last_stmt was detected as a (summation) reduction, + /* So far so good. Since *last_stmt was detected as a (summation) reduction, we know that oprnd1 is the reduction variable (defined by a loop-header phi), and oprnd0 is an ssa-name defined by a stmt in the loop body. Left to check that oprnd0 is defined by a cast from type 'type' to type 'TYPE'. */ - if (!widened_name_p (oprnd0, last_stmt, &half_type, &stmt)) + if (!widened_name_p (oprnd0, *last_stmt, &half_type, &stmt, true)) return NULL; oprnd0 = gimple_assign_rhs1 (stmt); @@ -639,7 +730,7 @@ static gimple /* We don't allow changing the order of the computation in the inner-loop when doing outer-loop vectorization. */ - gcc_assert (!nested_in_vect_loop_p (loop, last_stmt)); + gcc_assert (!nested_in_vect_loop_p (loop, *last_stmt)); return pattern_stmt; } @@ -669,23 +760,27 @@ static gimple static void vect_pattern_recog_1 ( - gimple (* vect_recog_func) (gimple, tree *, tree *), + gimple (* vect_recog_func) (gimple *, tree *, tree *), gimple_stmt_iterator si) { gimple stmt = gsi_stmt (si), pattern_stmt; - stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + stmt_vec_info stmt_info; stmt_vec_info pattern_stmt_info; - loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + loop_vec_info loop_vinfo; tree pattern_vectype; tree type_in, type_out; enum tree_code code; int i; gimple next; - pattern_stmt = (* vect_recog_func) (stmt, &type_in, &type_out); + pattern_stmt = (* vect_recog_func) (&stmt, &type_in, &type_out); if (!pattern_stmt) return; + si = gsi_for_stmt (stmt); + stmt_info = vinfo_for_stmt (stmt); + loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + if (VECTOR_MODE_P (TYPE_MODE (type_in))) { /* No need to check target support (already checked by the pattern @@ -832,7 +927,7 @@ vect_pattern_recog (loop_vec_info loop_vinfo) unsigned int nbbs = loop->num_nodes; gimple_stmt_iterator si; unsigned int i, j; - gimple (* vect_recog_func_ptr) (gimple, tree *, tree *); + gimple (* vect_recog_func_ptr) (gimple *, tree *, tree *); if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vect_pattern_recog ==="); Index: tree-vect-stmts.c =================================================================== --- tree-vect-stmts.c (revision 174475) +++ tree-vect-stmts.c (working copy) @@ -3232,6 +3232,33 @@ vectorizable_type_promotion (gimple stmt, gimple_s fprintf (vect_dump, "use not simple."); return false; } + + op_type = TREE_CODE_LENGTH (code); + if (op_type == binary_op) + { + bool ok; + + op1 = gimple_assign_rhs2 (stmt); + if (code == WIDEN_MULT_EXPR) + { + /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of + OP1. */ + if (CONSTANT_CLASS_P (op0)) + ok = vect_is_simple_use_1 (op1, loop_vinfo, NULL, + &def_stmt, &def, &dt[1], &vectype_in); + else + ok = vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def, + &dt[1]); + + if (!ok) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "use not simple."); + return false; + } + } + } + /* If op0 is an external or constant def use a vector type with the same size as the output vector type. */ if (!vectype_in) @@ -3264,18 +3291,6 @@ vectorizable_type_promotion (gimple stmt, gimple_s gcc_assert (ncopies >= 1); - op_type = TREE_CODE_LENGTH (code); - if (op_type == binary_op) - { - op1 = gimple_assign_rhs2 (stmt); - if (!vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def, &dt[1])) - { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "use not simple."); - return false; - } - } - /* Supportable by target? */ if (!supportable_widening_operation (code, stmt, vectype_out, vectype_in, &decl1, &decl2, &code1, &code2, @@ -3301,6 +3316,14 @@ vectorizable_type_promotion (gimple stmt, gimple_s fprintf (vect_dump, "transform type promotion operation. ncopies = %d.", ncopies); + if (code == WIDEN_MULT_EXPR) + { + if (CONSTANT_CLASS_P (op0)) + op0 = fold_convert (TREE_TYPE (op1), op0); + else if (CONSTANT_CLASS_P (op1)) + op1 = fold_convert (TREE_TYPE (op0), op1); + } + /* Handle def. */ /* In case of multi-step promotion, we first generate promotion operations to the intermediate types, and then from that types to the final one.