From 71baaf8393dd79a98b4c0216e56d87083caf0177 Mon Sep 17 00:00:00 2001
From: Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org>
Date: Mon, 12 Feb 2018 10:44:00 +1100
Subject: [PATCH 2/4] tree-loop-unroller
Change-Id: I58c25b5f2e796d4166af3ea4e50a0f4a3078b6c2
---
gcc/Makefile.in | 1 +
gcc/common.opt | 4 +
gcc/passes.def | 1 +
gcc/timevar.def | 1 +
gcc/tree-pass.h | 1 +
gcc/tree-ssa-loop-unroll.c | 268 +++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 276 insertions(+)
create mode 100644 gcc/tree-ssa-loop-unroll.c
@@ -1536,6 +1536,7 @@ OBJS = \
tree-ssa-loop-im.o \
tree-ssa-loop-ivcanon.o \
tree-ssa-loop-ivopts.o \
+ tree-ssa-loop-unroll.o \
tree-ssa-loop-manip.o \
tree-ssa-loop-niter.o \
tree-ssa-loop-prefetch.o \
@@ -1770,6 +1770,10 @@ fivopts
Common Report Var(flag_ivopts) Init(1) Optimization
Optimize induction variables on trees.
+ftree-loop-unroll
+Common Report Var(flag_tree_loop_unroll) Init(1) Optimization
+Perform loop unrolling in gimple.
+
fjump-tables
Common Var(flag_jump_tables) Init(1) Optimization
Use jump tables for sufficiently large switch statements.
@@ -302,6 +302,7 @@ along with GCC; see the file COPYING3. If not see
NEXT_PASS (pass_predcom);
NEXT_PASS (pass_complete_unroll);
NEXT_PASS (pass_slp_vectorize);
+ NEXT_PASS (pass_tree_loop_unroll);
NEXT_PASS (pass_loop_prefetch);
/* Run IVOPTs after the last pass that uses data-reference analysis
as that doesn't handle TARGET_MEM_REFs. */
@@ -202,6 +202,7 @@ DEFTIMEVAR (TV_TREE_LOOP_DISTRIBUTION, "tree loop distribution")
DEFTIMEVAR (TV_CHECK_DATA_DEPS , "tree check data dependences")
DEFTIMEVAR (TV_TREE_PREFETCH , "tree prefetching")
DEFTIMEVAR (TV_TREE_LOOP_IVOPTS , "tree iv optimization")
+DEFTIMEVAR (TV_TREE_LOOP_UNROLL , "tree loop unroll")
DEFTIMEVAR (TV_PREDCOM , "predictive commoning")
DEFTIMEVAR (TV_TREE_CH , "tree copy headers")
DEFTIMEVAR (TV_TREE_SSA_UNCPROP , "tree SSA uncprop")
@@ -388,6 +388,7 @@ extern gimple_opt_pass *make_pass_complete_unrolli (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_parallelize_loops (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_loop_prefetch (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_iv_optimize (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_tree_loop_unroll (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_tree_loop_done (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_ch (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_ch_vect (gcc::context *ctxt);
new file mode 100644
@@ -0,0 +1,268 @@
+
+/* Tree Loop Unroller.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "hash-set.h"
+#include "machmode.h"
+#include "tree.h"
+#include "tree-pass.h"
+#include "target.h"
+#include "gimple.h"
+#include "cfgloop.h"
+#include "tree-ssa-loop.h"
+#include "tree-scalar-evolution.h"
+#include "tree-ssa-loop-manip.h"
+#include "tree-ssa-loop-niter.h"
+#include "tree-ssa-loop-ivopts.h"
+#include "gimple.h"
+#include "predict.h"
+#include "cfghooks.h"
+#include "tree-inline.h"
+#include "gimple-iterator.h"
+#include "fold-const.h"
+#include "tree-data-ref.h"
+#include "params.h"
+
+/* Count the load memory streams in the LOOP. If the streams are larger
+ (compared to MAX_LOAD_STREAMS), we dont need to compute all of
+ them. This is used to limit the partial unrolling factor to avoid
+ too many memory load streams. */
+
+static signed
+count_mem_load_streams (struct loop *loop, signed max_load_streams)
+{
+ basic_block *bbs = get_loop_body (loop);
+ unsigned nbbs = loop->num_nodes;
+ gimple_stmt_iterator gsi;
+ signed count = 0;
+ hash_set <data_reference_p> *dr_set = new hash_set<data_reference_p> ();
+ vec<data_reference_p> datarefs_vec;
+ datarefs_vec.create (20);
+ unsigned k;
+
+ for (unsigned i = 0; i < nbbs; i++)
+ {
+ basic_block bb = bbs[i];
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
+ gsi_next (&gsi))
+ {
+ gimple *stmt = gsi_stmt (gsi);
+ tree access_fn;
+ vec<tree> access_fns;
+ datarefs_vec.truncate (0);
+
+ if (!find_data_references_in_stmt (loop, stmt, &datarefs_vec))
+ continue;
+
+ for (unsigned j = 0; j < datarefs_vec.length (); ++j)
+ {
+ data_reference_p dr = datarefs_vec[j];
+ if (!DR_IS_READ (dr))
+ continue;
+ access_fns = DR_ACCESS_FNS (dr);
+
+ FOR_EACH_VEC_ELT (access_fns, k, access_fn)
+ {
+ if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
+ {
+ if (!dr_set->add (dr))
+ count++;
+ break;
+ }
+ }
+ }
+
+ if (count > max_load_streams / 2)
+ break;
+ }
+ }
+ free_data_refs (datarefs_vec);
+ free (dr_set);
+ return count;
+}
+
+/* Verify that loop in a form that is suitable for unrolling. */
+
+static bool
+loop_form_ok_p (struct loop *loop)
+{
+ if (!single_exit (loop))
+ return false;
+
+ /* Loop has preheader. */
+ if (!loop_preheader_edge (loop))
+ return false;
+
+ return true;
+}
+
+/* Return false if the cost model does not prefer LOOP to be unrolled.
+ Return true and update the DESC if we decided that the LOOP
+ is to be unrolled. Also, in this case, determine the factor (NUNROLL)
+ by which the LOOP should be unrolled. */
+
+static bool
+decide_unroll_iterations (struct loop *loop,
+ struct tree_niter_desc *desc,
+ signed *nunroll)
+{
+ /* nunroll = total number of copies of the original loop body in
+ unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
+ signed ninsns = tree_num_loop_insns (loop, &eni_size_weights);
+ signed ninsns_outer = tree_num_loop_insns (loop_outer (loop),
+ &eni_size_weights);
+ signed n_unroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / ninsns;
+ signed load_streams = 0;
+ signed max_load_streams = -1;
+ signed n_unroll2;
+
+ if (ninsns_outer >= PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS))
+ return false;
+ ninsns_outer -= ninsns;
+ n_unroll2 = (PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) - ninsns_outer)/ ninsns;
+ if (n_unroll2 < n_unroll)
+ n_unroll = n_unroll2;
+
+ if (n_unroll > PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
+ n_unroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
+
+ /* Check for simple loops. */
+ edge exit = single_exit (loop);
+ if (!number_of_iterations_exit (loop, exit,
+ desc, false, false)
+ || !integer_onep (desc->assumptions)
+ || !integer_zerop (desc->may_be_zero)
+ || !desc->niter)
+ return false;
+
+ /* Now force nunroll to be power of 2. */
+ n_unroll = 1 << (floor_log2 (n_unroll));
+ if (targetm.hw_max_mem_read_streams
+ && (max_load_streams = targetm.hw_max_mem_read_streams ()) > 0)
+ {
+ load_streams = count_mem_load_streams (loop, max_load_streams);
+ if (load_streams > 0)
+ {
+ signed t = 1 << (floor_log2 (max_load_streams / load_streams));
+ if (t < n_unroll)
+ n_unroll = t;
+ }
+ }
+
+ if (!can_unroll_loop_p (loop, n_unroll, desc))
+ return false;
+
+ *nunroll = n_unroll;
+ return true;
+}
+
+/* Unroll LOOP based on the cost model. */
+
+static bool
+unroll_loop (struct loop *loop)
+{
+ struct tree_niter_desc desc;
+ signed n_unroll;
+
+ if (!decide_unroll_iterations (loop, &desc, &n_unroll))
+ return false;
+ if (n_unroll <= 1)
+ return false;
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "\nunrollong loop=%d times=%d\n", loop->num, n_unroll);
+ initialize_original_copy_tables ();
+ tree_unroll_loop (loop, n_unroll,
+ single_dom_exit (loop), &desc);
+ free_original_copy_tables ();
+ return true;
+}
+
+static unsigned
+execute_tree_loop_unroll ()
+{
+ struct loop *loop;
+ bool changed = false;
+ int todo_flags = 0;
+ if (optimize_function_for_size_p (cfun))
+ return 0;
+
+ estimate_numbers_of_iterations (cfun);
+ FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
+ {
+ if (!loop_form_ok_p (loop))
+ continue;
+ if (unroll_loop (loop))
+ {
+ changed |= true;
+ free_numbers_of_iterations_estimates (loop);
+ }
+ }
+
+ if (changed)
+ {
+ scev_reset ();
+ todo_flags |= TODO_cleanup_cfg;
+ }
+
+ return todo_flags;
+}
+
+namespace {
+const pass_data pass_data_tree_loop_unroll =
+{
+ GIMPLE_PASS, /* type */
+ "tree-loop-unroll", /* name */
+ OPTGROUP_NONE, /* optinfo_flags */
+ TV_TREE_LOOP_UNROLL, /* tv_id */
+ PROP_ssa, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ (TODO_update_ssa | TODO_verify_all),
+};
+
+class pass_tree_loop_unroll : public gimple_opt_pass
+{
+public:
+ pass_tree_loop_unroll (gcc::context *ctxt)
+ : gimple_opt_pass (pass_data_tree_loop_unroll, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ opt_pass * clone () { return new pass_tree_loop_unroll (m_ctxt); }
+ virtual bool gate (function *) { return flag_tree_loop_unroll != 0; }
+ virtual unsigned int execute (function *)
+ {
+ return execute_tree_loop_unroll ();
+ }
+
+}; // class pass_tree_loop_unroll
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_tree_loop_unroll (gcc::context *ctxt)
+{
+ return new pass_tree_loop_unroll (ctxt);
+}
+
--
2.7.4