From patchwork Sun Sep 25 11:54:23 2011
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-Patchwork-Submitter: Ira Rosen <ira.rosen@linaro.org>
X-Patchwork-Id: 4316
Return-Path: <patch+caf_=linaro-patchwork=canonical.com@linaro.org>
X-Original-To: patchwork@peony.canonical.com
Delivered-To: patchwork@peony.canonical.com
Received: from fiordland.canonical.com (fiordland.canonical.com
 [91.189.94.145])
 by peony.canonical.com (Postfix) with ESMTP id EFA3023EF9
 for <patchwork@peony.canonical.com>;
 Sun, 25 Sep 2011 11:54:25 +0000 (UTC)
Received: from mail-fx0-f52.google.com (mail-fx0-f52.google.com
 [209.85.161.52])
 by fiordland.canonical.com (Postfix) with ESMTP id CCA80A182B7
 for <linaro-patchwork@canonical.com>;
 Sun, 25 Sep 2011 11:54:25 +0000 (UTC)
Received: by fxe23 with SMTP id 23so7142457fxe.11
 for <linaro-patchwork@canonical.com>;
 Sun, 25 Sep 2011 04:54:25 -0700 (PDT)
Received: by 10.223.57.17 with SMTP id a17mr3724938fah.65.1316951665598;
 Sun, 25 Sep 2011 04:54:25 -0700 (PDT)
X-Forwarded-To: linaro-patchwork@canonical.com
X-Forwarded-For: patch@linaro.org linaro-patchwork@canonical.com
Delivered-To: patches@linaro.org
Received: by 10.152.3.234 with SMTP id f10cs7332laf;
 Sun, 25 Sep 2011 04:54:25 -0700 (PDT)
Received: by 10.150.139.11 with SMTP id m11mr4973933ybd.7.1316951664119;
 Sun, 25 Sep 2011 04:54:24 -0700 (PDT)
Received: from mail-gw0-f50.google.com (mail-gw0-f50.google.com
 [74.125.83.50]) by mx.google.com with ESMTPS id
 u41si3471289ybu.75.2011.09.25.04.54.23
 (version=TLSv1/SSLv3 cipher=OTHER);
 Sun, 25 Sep 2011 04:54:24 -0700 (PDT)
Received-SPF: neutral (google.com: 74.125.83.50 is neither permitted nor
 denied by best guess record for domain of
 ira.rosen@linaro.org) client-ip=74.125.83.50; 
Authentication-Results: mx.google.com;
 spf=neutral (google.com: 74.125.83.50 is neither
 permitted nor denied by best guess record for domain of
 ira.rosen@linaro.org) smtp.mail=ira.rosen@linaro.org
Received: by gwj19 with SMTP id 19so3662756gwj.37
 for <patches@linaro.org>; Sun, 25 Sep 2011 04:54:23 -0700 (PDT)
MIME-Version: 1.0
Received: by 10.150.65.13 with SMTP id n13mr5339314yba.293.1316951663406;
 Sun, 25 Sep 2011 04:54:23 -0700 (PDT)
Received: by 10.151.113.18 with HTTP; Sun, 25 Sep 2011 04:54:23 -0700 (PDT)
In-Reply-To: <CAFiYyc1WC17=FdqXCVW04nj_b1S-U8Jx_m0G6jAwwCyRxoikbg@mail.gmail.com>
References: <CAKSNEw5jqUqSsmTKQDE18Nkg7BJLCHjOFkHKLoMFDf07UcQbyg@mail.gmail.com>
 <CAFiYyc1WC17=FdqXCVW04nj_b1S-U8Jx_m0G6jAwwCyRxoikbg@mail.gmail.com>
Date: Sun, 25 Sep 2011 14:54:23 +0300
Message-ID: <CAKSNEw5hcEAZe-XH9mGApHj-wEvXLzF6nsv8VYmYAn=DqUMPyg@mail.gmail.com>
Subject: Re: [patch] Support a choice of vector size in SLP
From: Ira Rosen <ira.rosen@linaro.org>
To: Richard Guenther <richard.guenther@gmail.com>
Cc: gcc-patches@gcc.gnu.org, Patch Tracking <patches@linaro.org>

On 25 September 2011 14:45, Richard Guenther <richard.guenther@gmail.com> wrote:
> On Sun, Sep 25, 2011 at 12:59 PM, Ira Rosen <ira.rosen@linaro.org> wrote:
>> Hi,
>>
>> This patch supports an automatic choice of vector size in basic block
>> vectorization similar to the loop vectorization case.
>>
>> I am not sure about the new keyword.
>
> The testsuite one?  I guess we should name them vect128, vect256, etc.,
> as testcases will be looking for an absolute size, not a relative ("half") one.

OK, changing it to:


Thanks,
Ira

>
> Richard.
>
>> Bootstrapped on powerpc64-suse-linux, tested on powerpc64-suse-linux
>> and arm-linux-gnueabi.
>>
>> Thanks,
>> Ira
>>
>> ChangeLog:
>>
>>        * tree-vect-slp.c (vect_slp_analyze_bb_1): Split out core part
>>        of vect_analyze_bb here.
>>        (vect_analyze_bb): Loop over vector sizes calling vect_analyze_bb_1.
>>
>> testsuite/ChangeLog:
>>
>>        * lib/target-supports.exp (check_effective_target_vect_half_size): New.
>>        * gcc.dg/vect/bb-slp-11.c: Expect the error message twice in case
>>        of multiple vector sizes.
>>        * gcc.dg/vect/bb-slp-26.c: New.
>>
>> Index: testsuite/lib/target-supports.exp
>> ===================================================================
>> --- testsuite/lib/target-supports.exp   (revision 179159)
>> +++ testsuite/lib/target-supports.exp   (working copy)
>> @@ -3393,6 +3393,24 @@ proc check_effective_target_vect_multiple_sizes {
>>     return $et_vect_multiple_sizes_saved
>>  }
>>
>> +# Return 1 if the target supports vectors of 8 chars, 4 shorts and 2 ints.
>> +
>> +proc check_effective_target_vect_half_size { } {
>> +    global et_vect_half_size
>> +
>> +    if [info exists et_vect_half_size_saved] {
>> +        verbose "check_effective_target_vect_half_size: using cached result" 2
>> +    } else {
>> +        set et_vect_half_size_saved 0
>> +        if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) } {
>> +           set et_vect_half_size_saved 1
>> +        }
>> +    }
>> +
>> +    verbose "check_effective_target_vect_half_size: returning
>> $et_vect_half_size_saved" 2
>> +    return $et_vect_half_size_saved
>> +}
>> +
>>  # Return 1 if the target supports section-anchors
>>
>>  proc check_effective_target_section_anchors { } {
>> Index: testsuite/gcc.dg/vect/bb-slp-26.c
>> ===================================================================
>> --- testsuite/gcc.dg/vect/bb-slp-26.c   (revision 0)
>> +++ testsuite/gcc.dg/vect/bb-slp-26.c   (revision 0)
>> @@ -0,0 +1,59 @@
>> +/* { dg-require-effective-target vect_int } */
>> +
>> +#include <stdarg.h>
>> +#include "tree-vect.h"
>> +
>> +#define A 3
>> +#define B 4
>> +#define N 256
>> +
>> +char src[N], dst[N];
>> +
>> +void foo (char * __restrict__ dst, char * __restrict__ src, int h,
>> int stride, int dummy)
>> +{
>> +  int i;
>> +  h /= 16;
>> +  for (i = 0; i < h; i++)
>> +    {
>> +      dst[0] += A*src[0] + src[stride];
>> +      dst[1] += A*src[1] + src[1+stride];
>> +      dst[2] += A*src[2] + src[2+stride];
>> +      dst[3] += A*src[3] + src[3+stride];
>> +      dst[4] += A*src[4] + src[4+stride];
>> +      dst[5] += A*src[5] + src[5+stride];
>> +      dst[6] += A*src[6] + src[6+stride];
>> +      dst[7] += A*src[7] + src[7+stride];
>> +      dst += 8;
>> +      src += 8;
>> +      if (dummy == 32)
>> +        abort ();
>> +   }
>> +}
>> +
>> +
>> +int main (void)
>> +{
>> +  int i;
>> +
>> +  check_vect ();
>> +
>> +  for (i = 0; i < N; i++)
>> +    {
>> +       dst[i] = 0;
>> +       src[i] = i/8;
>> +    }
>> +
>> +  foo (dst, src, N, 8, 0);
>> +
>> +  for (i = 0; i < N/2; i++)
>> +    {
>> +      if (dst[i] != A * src[i] + src[i+8])
>> +        abort ();
>> +    }
>> +
>> +  return 0;
>> +}
>> +
>> +/* { dg-final { scan-tree-dump-times "basic block vectorized using
>> SLP" 1 "slp" { target vect_half_size } } } */
>> +/* { dg-final { cleanup-tree-dump "slp" } } */
>> +
>> Index: testsuite/gcc.dg/vect/bb-slp-11.c
>> ===================================================================
>> --- testsuite/gcc.dg/vect/bb-slp-11.c   (revision 179159)
>> +++ testsuite/gcc.dg/vect/bb-slp-11.c   (working copy)
>> @@ -49,6 +49,7 @@ int main (void)
>>  }
>>
>>  /* { dg-final { scan-tree-dump-times "basic block vectorized using
>> SLP" 0 "slp" } } */
>> -/* { dg-final { scan-tree-dump-times "SLP with multiple types" 1 "slp" } } */
>> +/* { dg-final { scan-tree-dump-times "SLP with multiple types" 1
>> "slp" { xfail vect_multiple_sizes } } } */
>> +/* { dg-final { scan-tree-dump-times "SLP with multiple types" 2
>> "slp" { target vect_multiple_sizes } } } */
>>  /* { dg-final { cleanup-tree-dump "slp" } } */
>>
>> Index: tree-vect-slp.c
>> ===================================================================
>> --- tree-vect-slp.c     (revision 179159)
>> +++ tree-vect-slp.c     (working copy)
>> @@ -1694,42 +1694,18 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb
>>
>>  /* Check if the basic block can be vectorized.  */
>>
>> -bb_vec_info
>> -vect_slp_analyze_bb (basic_block bb)
>> +static bb_vec_info
>> +vect_slp_analyze_bb_1 (basic_block bb)
>>  {
>>   bb_vec_info bb_vinfo;
>>   VEC (ddr_p, heap) *ddrs;
>>   VEC (slp_instance, heap) *slp_instances;
>>   slp_instance instance;
>> -  int i, insns = 0;
>> -  gimple_stmt_iterator gsi;
>> +  int i;
>>   int min_vf = 2;
>>   int max_vf = MAX_VECTORIZATION_FACTOR;
>>   bool data_dependence_in_bb = false;
>>
>> -  current_vector_size = 0;
>> -
>> -  if (vect_print_dump_info (REPORT_DETAILS))
>> -    fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
>> -
>> -  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
>> -    {
>> -      gimple stmt = gsi_stmt (gsi);
>> -      if (!is_gimple_debug (stmt)
>> -         && !gimple_nop_p (stmt)
>> -         && gimple_code (stmt) != GIMPLE_LABEL)
>> -       insns++;
>> -    }
>> -
>> -  if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
>> -    {
>> -      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
>> -        fprintf (vect_dump, "not vectorized: too many instructions in basic "
>> -                            "block.\n");
>> -
>> -      return NULL;
>> -    }
>> -
>>   bb_vinfo = new_bb_vec_info (bb);
>>   if (!bb_vinfo)
>>     return NULL;
>> @@ -1849,6 +1825,61 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb
>>  }
>>
>>
>> +bb_vec_info
>> +vect_slp_analyze_bb (basic_block bb)
>> +{
>> +  bb_vec_info bb_vinfo;
>> +  int insns = 0;
>> +  gimple_stmt_iterator gsi;
>> +  unsigned int vector_sizes;
>> +
>> +  if (vect_print_dump_info (REPORT_DETAILS))
>> +    fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
>> +
>> +  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
>> +    {
>> +      gimple stmt = gsi_stmt (gsi);
>> +      if (!is_gimple_debug (stmt)
>> +          && !gimple_nop_p (stmt)
>> +          && gimple_code (stmt) != GIMPLE_LABEL)
>> +        insns++;
>> +    }
>> +
>> +  if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
>> +    {
>> +      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
>> +        fprintf (vect_dump, "not vectorized: too many instructions in basic "
>> +                            "block.\n");
>> +
>> +      return NULL;
>> +    }
>> +
>> +  /* Autodetect first vector size we try.  */
>> +  current_vector_size = 0;
>> +  vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
>> +
>> +  while (1)
>> +    {
>> +      bb_vinfo = vect_slp_analyze_bb_1 (bb);
>> +      if (bb_vinfo)
>> +        return bb_vinfo;
>> +
>> +      destroy_bb_vec_info (bb_vinfo);
>> +
>> +      vector_sizes &= ~current_vector_size;
>> +      if (vector_sizes == 0
>> +          || current_vector_size == 0)
>> +        return NULL;
>> +
>> +      /* Try the next biggest vector size.  */
>> +      current_vector_size = 1 << floor_log2 (vector_sizes);
>> +      if (vect_print_dump_info (REPORT_DETAILS))
>> +        fprintf (vect_dump, "***** Re-trying analysis with "
>> +                 "vector size %d\n", current_vector_size);
>> +    }
>> +}
>> +
>> +
>>  /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
>>    the number of created vector stmts depends on the unrolling factor).
>>    However, the actual number of vector stmts for every SLP node depends on
>>
>

Index: testsuite/lib/target-supports.exp
===================================================================
--- testsuite/lib/target-supports.exp   (revision 179159)
+++ testsuite/lib/target-supports.exp   (working copy)
@@ -3393,6 +3393,24 @@ proc check_effective_target_vect_multiple_sizes {
     return $et_vect_multiple_sizes_saved
 }

+# Return 1 if the target supports vectors of 64 bits.
+
+proc check_effective_target_vect64 { } {
+    global et_vect64
+
+    if [info exists et_vect64_saved] {
+        verbose "check_effective_target_vect64: using cached result" 2
+    } else {
+        set et_vect64_saved 0
+        if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) } {
+           set et_vect64_saved 1
+        }
+    }
+
+    verbose "check_effective_target_vect64: returning $et_vect64_saved" 2
+    return $et_vect64_saved
+}
+
 # Return 1 if the target supports section-anchors

 proc check_effective_target_section_anchors { } {