From patchwork Sun Sep 25 11:54:23 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: Ira Rosen X-Patchwork-Id: 4316 Return-Path: X-Original-To: patchwork@peony.canonical.com Delivered-To: patchwork@peony.canonical.com Received: from fiordland.canonical.com (fiordland.canonical.com [91.189.94.145]) by peony.canonical.com (Postfix) with ESMTP id EFA3023EF9 for ; Sun, 25 Sep 2011 11:54:25 +0000 (UTC) Received: from mail-fx0-f52.google.com (mail-fx0-f52.google.com [209.85.161.52]) by fiordland.canonical.com (Postfix) with ESMTP id CCA80A182B7 for ; Sun, 25 Sep 2011 11:54:25 +0000 (UTC) Received: by fxe23 with SMTP id 23so7142457fxe.11 for ; Sun, 25 Sep 2011 04:54:25 -0700 (PDT) Received: by 10.223.57.17 with SMTP id a17mr3724938fah.65.1316951665598; Sun, 25 Sep 2011 04:54:25 -0700 (PDT) X-Forwarded-To: linaro-patchwork@canonical.com X-Forwarded-For: patch@linaro.org linaro-patchwork@canonical.com Delivered-To: patches@linaro.org Received: by 10.152.3.234 with SMTP id f10cs7332laf; Sun, 25 Sep 2011 04:54:25 -0700 (PDT) Received: by 10.150.139.11 with SMTP id m11mr4973933ybd.7.1316951664119; Sun, 25 Sep 2011 04:54:24 -0700 (PDT) Received: from mail-gw0-f50.google.com (mail-gw0-f50.google.com [74.125.83.50]) by mx.google.com with ESMTPS id u41si3471289ybu.75.2011.09.25.04.54.23 (version=TLSv1/SSLv3 cipher=OTHER); Sun, 25 Sep 2011 04:54:24 -0700 (PDT) Received-SPF: neutral (google.com: 74.125.83.50 is neither permitted nor denied by best guess record for domain of ira.rosen@linaro.org) client-ip=74.125.83.50; Authentication-Results: mx.google.com; spf=neutral (google.com: 74.125.83.50 is neither permitted nor denied by best guess record for domain of ira.rosen@linaro.org) smtp.mail=ira.rosen@linaro.org Received: by gwj19 with SMTP id 19so3662756gwj.37 for ; Sun, 25 Sep 2011 04:54:23 -0700 (PDT) MIME-Version: 1.0 Received: by 10.150.65.13 with SMTP id n13mr5339314yba.293.1316951663406; Sun, 25 Sep 2011 04:54:23 -0700 (PDT) Received: by 10.151.113.18 with HTTP; Sun, 25 Sep 2011 04:54:23 -0700 (PDT) In-Reply-To: References: Date: Sun, 25 Sep 2011 14:54:23 +0300 Message-ID: Subject: Re: [patch] Support a choice of vector size in SLP From: Ira Rosen To: Richard Guenther Cc: gcc-patches@gcc.gnu.org, Patch Tracking On 25 September 2011 14:45, Richard Guenther wrote: > On Sun, Sep 25, 2011 at 12:59 PM, Ira Rosen wrote: >> Hi, >> >> This patch supports an automatic choice of vector size in basic block >> vectorization similar to the loop vectorization case. >> >> I am not sure about the new keyword. > > The testsuite one?  I guess we should name them vect128, vect256, etc., > as testcases will be looking for an absolute size, not a relative ("half") one. OK, changing it to: Thanks, Ira > > Richard. > >> Bootstrapped on powerpc64-suse-linux, tested on powerpc64-suse-linux >> and arm-linux-gnueabi. >> >> Thanks, >> Ira >> >> ChangeLog: >> >>        * tree-vect-slp.c (vect_slp_analyze_bb_1): Split out core part >>        of vect_analyze_bb here. >>        (vect_analyze_bb): Loop over vector sizes calling vect_analyze_bb_1. >> >> testsuite/ChangeLog: >> >>        * lib/target-supports.exp (check_effective_target_vect_half_size): New. >>        * gcc.dg/vect/bb-slp-11.c: Expect the error message twice in case >>        of multiple vector sizes. >>        * gcc.dg/vect/bb-slp-26.c: New. >> >> Index: testsuite/lib/target-supports.exp >> =================================================================== >> --- testsuite/lib/target-supports.exp   (revision 179159) >> +++ testsuite/lib/target-supports.exp   (working copy) >> @@ -3393,6 +3393,24 @@ proc check_effective_target_vect_multiple_sizes { >>     return $et_vect_multiple_sizes_saved >>  } >> >> +# Return 1 if the target supports vectors of 8 chars, 4 shorts and 2 ints. >> + >> +proc check_effective_target_vect_half_size { } { >> +    global et_vect_half_size >> + >> +    if [info exists et_vect_half_size_saved] { >> +        verbose "check_effective_target_vect_half_size: using cached result" 2 >> +    } else { >> +        set et_vect_half_size_saved 0 >> +        if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) } { >> +           set et_vect_half_size_saved 1 >> +        } >> +    } >> + >> +    verbose "check_effective_target_vect_half_size: returning >> $et_vect_half_size_saved" 2 >> +    return $et_vect_half_size_saved >> +} >> + >>  # Return 1 if the target supports section-anchors >> >>  proc check_effective_target_section_anchors { } { >> Index: testsuite/gcc.dg/vect/bb-slp-26.c >> =================================================================== >> --- testsuite/gcc.dg/vect/bb-slp-26.c   (revision 0) >> +++ testsuite/gcc.dg/vect/bb-slp-26.c   (revision 0) >> @@ -0,0 +1,59 @@ >> +/* { dg-require-effective-target vect_int } */ >> + >> +#include >> +#include "tree-vect.h" >> + >> +#define A 3 >> +#define B 4 >> +#define N 256 >> + >> +char src[N], dst[N]; >> + >> +void foo (char * __restrict__ dst, char * __restrict__ src, int h, >> int stride, int dummy) >> +{ >> +  int i; >> +  h /= 16; >> +  for (i = 0; i < h; i++) >> +    { >> +      dst[0] += A*src[0] + src[stride]; >> +      dst[1] += A*src[1] + src[1+stride]; >> +      dst[2] += A*src[2] + src[2+stride]; >> +      dst[3] += A*src[3] + src[3+stride]; >> +      dst[4] += A*src[4] + src[4+stride]; >> +      dst[5] += A*src[5] + src[5+stride]; >> +      dst[6] += A*src[6] + src[6+stride]; >> +      dst[7] += A*src[7] + src[7+stride]; >> +      dst += 8; >> +      src += 8; >> +      if (dummy == 32) >> +        abort (); >> +   } >> +} >> + >> + >> +int main (void) >> +{ >> +  int i; >> + >> +  check_vect (); >> + >> +  for (i = 0; i < N; i++) >> +    { >> +       dst[i] = 0; >> +       src[i] = i/8; >> +    } >> + >> +  foo (dst, src, N, 8, 0); >> + >> +  for (i = 0; i < N/2; i++) >> +    { >> +      if (dst[i] != A * src[i] + src[i+8]) >> +        abort (); >> +    } >> + >> +  return 0; >> +} >> + >> +/* { dg-final { scan-tree-dump-times "basic block vectorized using >> SLP" 1 "slp" { target vect_half_size } } } */ >> +/* { dg-final { cleanup-tree-dump "slp" } } */ >> + >> Index: testsuite/gcc.dg/vect/bb-slp-11.c >> =================================================================== >> --- testsuite/gcc.dg/vect/bb-slp-11.c   (revision 179159) >> +++ testsuite/gcc.dg/vect/bb-slp-11.c   (working copy) >> @@ -49,6 +49,7 @@ int main (void) >>  } >> >>  /* { dg-final { scan-tree-dump-times "basic block vectorized using >> SLP" 0 "slp" } } */ >> -/* { dg-final { scan-tree-dump-times "SLP with multiple types" 1 "slp" } } */ >> +/* { dg-final { scan-tree-dump-times "SLP with multiple types" 1 >> "slp" { xfail vect_multiple_sizes } } } */ >> +/* { dg-final { scan-tree-dump-times "SLP with multiple types" 2 >> "slp" { target vect_multiple_sizes } } } */ >>  /* { dg-final { cleanup-tree-dump "slp" } } */ >> >> Index: tree-vect-slp.c >> =================================================================== >> --- tree-vect-slp.c     (revision 179159) >> +++ tree-vect-slp.c     (working copy) >> @@ -1694,42 +1694,18 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb >> >>  /* Check if the basic block can be vectorized.  */ >> >> -bb_vec_info >> -vect_slp_analyze_bb (basic_block bb) >> +static bb_vec_info >> +vect_slp_analyze_bb_1 (basic_block bb) >>  { >>   bb_vec_info bb_vinfo; >>   VEC (ddr_p, heap) *ddrs; >>   VEC (slp_instance, heap) *slp_instances; >>   slp_instance instance; >> -  int i, insns = 0; >> -  gimple_stmt_iterator gsi; >> +  int i; >>   int min_vf = 2; >>   int max_vf = MAX_VECTORIZATION_FACTOR; >>   bool data_dependence_in_bb = false; >> >> -  current_vector_size = 0; >> - >> -  if (vect_print_dump_info (REPORT_DETAILS)) >> -    fprintf (vect_dump, "===vect_slp_analyze_bb===\n"); >> - >> -  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) >> -    { >> -      gimple stmt = gsi_stmt (gsi); >> -      if (!is_gimple_debug (stmt) >> -         && !gimple_nop_p (stmt) >> -         && gimple_code (stmt) != GIMPLE_LABEL) >> -       insns++; >> -    } >> - >> -  if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB)) >> -    { >> -      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) >> -        fprintf (vect_dump, "not vectorized: too many instructions in basic " >> -                            "block.\n"); >> - >> -      return NULL; >> -    } >> - >>   bb_vinfo = new_bb_vec_info (bb); >>   if (!bb_vinfo) >>     return NULL; >> @@ -1849,6 +1825,61 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb >>  } >> >> >> +bb_vec_info >> +vect_slp_analyze_bb (basic_block bb) >> +{ >> +  bb_vec_info bb_vinfo; >> +  int insns = 0; >> +  gimple_stmt_iterator gsi; >> +  unsigned int vector_sizes; >> + >> +  if (vect_print_dump_info (REPORT_DETAILS)) >> +    fprintf (vect_dump, "===vect_slp_analyze_bb===\n"); >> + >> +  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) >> +    { >> +      gimple stmt = gsi_stmt (gsi); >> +      if (!is_gimple_debug (stmt) >> +          && !gimple_nop_p (stmt) >> +          && gimple_code (stmt) != GIMPLE_LABEL) >> +        insns++; >> +    } >> + >> +  if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB)) >> +    { >> +      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) >> +        fprintf (vect_dump, "not vectorized: too many instructions in basic " >> +                            "block.\n"); >> + >> +      return NULL; >> +    } >> + >> +  /* Autodetect first vector size we try.  */ >> +  current_vector_size = 0; >> +  vector_sizes = targetm.vectorize.autovectorize_vector_sizes (); >> + >> +  while (1) >> +    { >> +      bb_vinfo = vect_slp_analyze_bb_1 (bb); >> +      if (bb_vinfo) >> +        return bb_vinfo; >> + >> +      destroy_bb_vec_info (bb_vinfo); >> + >> +      vector_sizes &= ~current_vector_size; >> +      if (vector_sizes == 0 >> +          || current_vector_size == 0) >> +        return NULL; >> + >> +      /* Try the next biggest vector size.  */ >> +      current_vector_size = 1 << floor_log2 (vector_sizes); >> +      if (vect_print_dump_info (REPORT_DETAILS)) >> +        fprintf (vect_dump, "***** Re-trying analysis with " >> +                 "vector size %d\n", current_vector_size); >> +    } >> +} >> + >> + >>  /* SLP costs are calculated according to SLP instance unrolling factor (i.e., >>    the number of created vector stmts depends on the unrolling factor). >>    However, the actual number of vector stmts for every SLP node depends on >> > Index: testsuite/lib/target-supports.exp =================================================================== --- testsuite/lib/target-supports.exp (revision 179159) +++ testsuite/lib/target-supports.exp (working copy) @@ -3393,6 +3393,24 @@ proc check_effective_target_vect_multiple_sizes { return $et_vect_multiple_sizes_saved } +# Return 1 if the target supports vectors of 64 bits. + +proc check_effective_target_vect64 { } { + global et_vect64 + + if [info exists et_vect64_saved] { + verbose "check_effective_target_vect64: using cached result" 2 + } else { + set et_vect64_saved 0 + if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) } { + set et_vect64_saved 1 + } + } + + verbose "check_effective_target_vect64: returning $et_vect64_saved" 2 + return $et_vect64_saved +} + # Return 1 if the target supports section-anchors proc check_effective_target_section_anchors { } {