@@ -1296,6 +1296,7 @@ OBJS = \
gimple-ssa-isolate-paths.o \
gimple-ssa-nonnull-compare.o \
gimple-ssa-split-paths.o \
+ gimple-ssa-store-merging.o \
gimple-ssa-strength-reduction.o \
gimple-ssa-sprintf.o \
gimple-ssa-warn-alloca.o \
@@ -1463,6 +1463,10 @@ fstrict-volatile-bitfields
Common Report Var(flag_strict_volatile_bitfields) Init(-1) Optimization
Force bitfield accesses to match their type width.
+fstore-merging
+Common Report Var(flag_store_merging) Optimization
+Merge adjacent stores.
+
fguess-branch-probability
Common Report Var(flag_guess_branch_prob) Optimization
Enable guessing of branch probabilities.
@@ -405,7 +405,7 @@ Objective-C and Objective-C++ Dialects}.
-fsingle-precision-constant -fsplit-ivs-in-unroller -fsplit-loops@gol
-fsplit-paths @gol
-fsplit-wide-types -fssa-backprop -fssa-phiopt @gol
--fstdarg-opt -fstrict-aliasing @gol
+-fstdarg-opt -fstore-merging -fstrict-aliasing @gol
-fstrict-overflow -fthread-jumps -ftracer -ftree-bit-ccp @gol
-ftree-builtin-call-dce -ftree-ccp -ftree-ch @gol
-ftree-coalesce-vars -ftree-copy-prop -ftree-dce -ftree-dominator-opts @gol
@@ -416,8 +416,8 @@ Objective-C and Objective-C++ Dialects}.
-ftree-loop-vectorize @gol
-ftree-parallelize-loops=@var{n} -ftree-pre -ftree-partial-pre -ftree-pta @gol
-ftree-reassoc -ftree-sink -ftree-slsr -ftree-sra @gol
--ftree-switch-conversion -ftree-tail-merge -ftree-ter @gol
--ftree-vectorize -ftree-vrp -funconstrained-commons @gol
+-ftree-switch-conversion -ftree-tail-merge @gol
+-ftree-ter -ftree-vectorize -ftree-vrp -funconstrained-commons @gol
-funit-at-a-time -funroll-all-loops -funroll-loops @gol
-funsafe-math-optimizations -funswitch-loops @gol
-fipa-ra -fvariable-expansion-in-unroller -fvect-cost-model -fvpt @gol
@@ -6752,6 +6752,7 @@ compilation time.
-fsplit-wide-types @gol
-fssa-backprop @gol
-fssa-phiopt @gol
+-fstore-merging @gol
-ftree-bit-ccp @gol
-ftree-ccp @gol
-ftree-ch @gol
@@ -8094,6 +8095,13 @@ Perform scalar replacement of aggregates. This pass replaces structure
references with scalars to prevent committing structures to memory too
early. This flag is enabled by default at @option{-O} and higher.
+@item -fstore-merging
+@opindex fstore-merging
+Perform merging of narrow stores to consecutive memory addresses. This pass
+merges contiguous stores of immediate values narrower than a word into fewer
+wider stores to reduce the number of instructions. This is enabled by default
+at @option{-O} and higher.
+
@item -ftree-ter
@opindex ftree-ter
Perform temporary expression replacement during the SSA->normal phase. Single
@@ -27,6 +27,7 @@ extern int folding_initializer;
/* Convert between trees and native memory representation. */
extern int native_encode_expr (const_tree, unsigned char *, int, int off = -1);
extern tree native_interpret_expr (tree, const unsigned char *, int);
+extern bool can_native_encode_type_p (tree);
/* Fold constants as much as possible in an expression.
Returns the simplified expression.
@@ -7516,6 +7516,26 @@ can_native_interpret_type_p (tree type)
}
}
+/* Return true iff a constant of type TYPE is accepted by
+ native_encode_expr. */
+
+bool
+can_native_encode_type_p (tree type)
+{
+ switch (TREE_CODE (type))
+ {
+ case INTEGER_TYPE:
+ case REAL_TYPE:
+ case FIXED_POINT_TYPE:
+ case COMPLEX_TYPE:
+ case VECTOR_TYPE:
+ case POINTER_TYPE:
+ return true;
+ default:
+ return false;
+ }
+}
+
/* Fold a VIEW_CONVERT_EXPR of a constant expression EXPR to type
TYPE at compile-time. If we're unable to perform the conversion
return NULL_TREE. */
new file mode 100644
@@ -0,0 +1,1471 @@
+/* GIMPLE store merging pass.
+ Copyright (C) 2016 Free Software Foundation, Inc.
+ Contributed by ARM Ltd.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3. If not see
+ <http://www.gnu.org/licenses/>. */
+
+/* The purpose of this pass is to combine multiple memory stores of
+ constant values to consecutive memory locations into fewer wider stores.
+ For example, if we have a sequence peforming four byte stores to
+ consecutive memory locations:
+ [p ] := imm1;
+ [p + 1B] := imm2;
+ [p + 2B] := imm3;
+ [p + 3B] := imm4;
+ we can transform this into a single 4-byte store if the target supports it:
+ [p] := imm1:imm2:imm3:imm4 //concatenated immediates according to endianness.
+
+ The algorithm is applied to each basic block in three phases:
+
+ 1) Scan through the basic block recording constant assignments to
+ destinations that can be expressed as a store to memory of a certain size
+ at a certain bit offset. Record store chains to different bases in a
+ hash_map (m_stores) and make sure to terminate such chains when appropriate
+ (for example when when the stored values get used subsequently).
+ These stores can be a result of structure element initializers, array stores
+ etc. A store_immediate_info object is recorded for every such store.
+ Record as many such assignments to a single base as possible until a
+ statement that interferes with the store sequence is encountered.
+
+ 2) Analyze the chain of stores recorded in phase 1) (i.e. the vector of
+ store_immediate_info objects) and coalesce contiguous stores into
+ merged_store_group objects.
+
+ For example, given the stores:
+ [p ] := 0;
+ [p + 1B] := 1;
+ [p + 3B] := 0;
+ [p + 4B] := 1;
+ [p + 5B] := 0;
+ [p + 6B] := 0;
+ This phase would produce two merged_store_group objects, one recording the
+ two bytes stored in the memory region [p : p + 1] and another
+ recording the four bytes stored in the memory region [p + 3 : p + 6].
+
+ 3) The merged_store_group objects produced in phase 2) are processed
+ to generate the sequence of wider stores that set the contiguous memory
+ regions to the sequence of bytes that correspond to it. This may emit
+ multiple stores per store group to handle contiguous stores that are not
+ of a size that is a power of 2. For example it can try to emit a 40-bit
+ store as a 32-bit store followed by an 8-bit store.
+ We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT or
+ SLOW_UNALIGNED_ACCESS rules.
+
+ Note on endianness and example:
+ Consider 2 contiguous 16-bit stores followed by 2 contiguous 8-bit stores:
+ [p ] := 0x1234;
+ [p + 2B] := 0x5678;
+ [p + 4B] := 0xab;
+ [p + 5B] := 0xcd;
+
+ The memory layout for little-endian (LE) and big-endian (BE) must be:
+ p |LE|BE|
+ ---------
+ 0 |34|12|
+ 1 |12|34|
+ 2 |78|56|
+ 3 |56|78|
+ 4 |ab|ab|
+ 5 |cd|cd|
+
+ To merge these into a single 48-bit merged value 'val' in phase 2)
+ on little-endian we insert stores to higher (consecutive) bitpositions
+ into the most significant bits of the merged value.
+ The final merged value would be: 0xcdab56781234
+
+ For big-endian we insert stores to higher bitpositions into the least
+ significant bits of the merged value.
+ The final merged value would be: 0x12345678abcd
+
+ Then, in phase 3), we want to emit this 48-bit value as a 32-bit store
+ followed by a 16-bit store. Again, we must consider endianness when
+ breaking down the 48-bit value 'val' computed above.
+ For little endian we emit:
+ [p] (32-bit) := 0x56781234; // val & 0x0000ffffffff;
+ [p + 4B] (16-bit) := 0xcdab; // (val & 0xffff00000000) >> 32;
+
+ Whereas for big-endian we emit:
+ [p] (32-bit) := 0x12345678; // (val & 0xffffffff0000) >> 16;
+ [p + 4B] (16-bit) := 0xabcd; // val & 0x00000000ffff; */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "tree.h"
+#include "gimple.h"
+#include "builtins.h"
+#include "fold-const.h"
+#include "tree-pass.h"
+#include "ssa.h"
+#include "gimple-pretty-print.h"
+#include "alias.h"
+#include "fold-const.h"
+#include "params.h"
+#include "print-tree.h"
+#include "tree-hash-traits.h"
+#include "gimple-iterator.h"
+#include "gimplify.h"
+#include "stor-layout.h"
+#include "timevar.h"
+#include "tree-cfg.h"
+#include "tree-eh.h"
+#include "target.h"
+
+/* The maximum size (in bits) of the stores this pass should generate. */
+#define MAX_STORE_BITSIZE (BITS_PER_WORD)
+#define MAX_STORE_BYTES (MAX_STORE_BITSIZE / BITS_PER_UNIT)
+
+namespace {
+
+/* Struct recording the information about a single store of an immediate
+ to memory. These are created in the first phase and coalesced into
+ merged_store_group objects in the second phase. */
+
+struct store_immediate_info
+{
+ unsigned HOST_WIDE_INT bitsize;
+ unsigned HOST_WIDE_INT bitpos;
+ tree val;
+ tree dest;
+ gimple *stmt;
+ unsigned int order;
+ store_immediate_info (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT, tree,
+ tree, gimple *, unsigned int);
+};
+
+store_immediate_info::store_immediate_info (unsigned HOST_WIDE_INT bs,
+ unsigned HOST_WIDE_INT bp, tree v,
+ tree d, gimple *st,
+ unsigned int ord)
+ : bitsize (bs), bitpos (bp), val (v), dest (d), stmt (st), order (ord)
+{
+}
+
+/* Struct representing a group of stores to contiguous memory locations.
+ These are produced by the second phase (coalescing) and consumed in the
+ third phase that outputs the widened stores. */
+
+struct merged_store_group
+{
+ unsigned HOST_WIDE_INT start;
+ unsigned HOST_WIDE_INT width;
+ /* The size of the allocated memory for val. */
+ unsigned HOST_WIDE_INT buf_size;
+
+ unsigned int align;
+ unsigned int first_order;
+ unsigned int last_order;
+
+ auto_vec<struct store_immediate_info *> stores;
+ /* We record the first and last original statements in the sequence because
+ we'll need their vuse/vdef and replacement position. It's easier to keep
+ track of them separately as 'stores' is reordered by apply_stores. */
+ gimple *last_stmt;
+ gimple *first_stmt;
+ unsigned char *val;
+
+ merged_store_group (store_immediate_info *);
+ ~merged_store_group ();
+ void merge_into (store_immediate_info *);
+ void merge_overlapping (store_immediate_info *);
+ bool apply_stores ();
+};
+
+/* Debug helper. Dump LEN elements of byte array PTR to FD in hex. */
+
+static void
+dump_char_array (FILE *fd, unsigned char *ptr, unsigned int len)
+{
+ if (!fd)
+ return;
+
+ for (unsigned int i = 0; i < len; i++)
+ fprintf (fd, "%x ", ptr[i]);
+ fprintf (fd, "\n");
+}
+
+/* Fill a byte array PTR of SZ elements with zeroes. This is to be used by
+ encode_tree_to_bitpos to zero-initialize most likely small arrays but
+ with a non-compile-time-constant size. */
+
+static inline void
+zero_char_buf (unsigned char *ptr, unsigned int sz)
+{
+ for (unsigned int i = 0; i < sz; i++)
+ ptr[i] = 0;
+}
+
+/* Shift left the bytes in PTR of SZ elements by AMNT bits, carrying over the
+ bits between adjacent elements. AMNT should be within
+ [0, BITS_PER_UNIT).
+ Example, AMNT = 2:
+ 00011111|11100000 << 2 = 01111111|10000000
+ PTR[1] | PTR[0] PTR[1] | PTR[0]. */
+
+static void
+shift_bytes_in_array (unsigned char *ptr, unsigned int sz, unsigned int amnt)
+{
+ if (amnt == 0)
+ return;
+
+ unsigned char carry_over = 0U;
+ unsigned char carry_mask = (~0U) << ((unsigned char)(BITS_PER_UNIT - amnt));
+ unsigned char clear_mask = (~0U) << amnt;
+
+ for (unsigned int i = 0; i < sz; i++)
+ {
+ unsigned prev_carry_over = carry_over;
+ carry_over
+ = (ptr[i] & carry_mask) >> (BITS_PER_UNIT - amnt);
+
+ ptr[i] <<= amnt;
+ if (i != 0)
+ {
+ ptr[i] &= clear_mask;
+ ptr[i] |= prev_carry_over;
+ }
+ }
+}
+
+/* Like shift_bytes_in_array but for big-endian.
+ Shift right the bytes in PTR of SZ elements by AMNT bits, carrying over the
+ bits between adjacent elements. AMNT should be within
+ [0, BITS_PER_UNIT).
+ Example, AMNT = 2:
+ 00011111|11100000 >> 2 = 00000111|11111000
+ PTR[0] | PTR[1] PTR[0] | PTR[1]. */
+
+static void
+shift_bytes_in_array_right (unsigned char *ptr, unsigned int sz,
+ unsigned int amnt)
+{
+ if (amnt == 0)
+ return;
+
+ unsigned char carry_over = 0U;
+ unsigned char carry_mask = ~(~0U << amnt);
+
+ for (unsigned int i = 0; i < sz; i++)
+ {
+ unsigned prev_carry_over = carry_over;
+ carry_over
+ = (ptr[i] & carry_mask);
+
+ carry_over <<= ((unsigned char)BITS_PER_UNIT - amnt);
+ ptr[i] >>= amnt;
+ ptr[i] |= prev_carry_over;
+ }
+}
+
+/* Clear out LEN bits starting from bit START in the byte array
+ PTR. This clears the bits to the *right* from START.
+ START must be within [0, BITS_PER_UNIT) and counts starting from
+ the least significant bit. */
+
+static void
+clear_bit_region_be (unsigned char *ptr, unsigned int start,
+ unsigned int len)
+{
+ if (len == 0)
+ return;
+ /* Clear len bits to the right of start. */
+ else if (len <= start + 1)
+ {
+ unsigned char mask = (~(~0U << len));
+ mask = mask << (start + 1U - len);
+ ptr[0] &= ~mask;
+ }
+ else if (start != BITS_PER_UNIT - 1)
+ {
+ clear_bit_region_be (ptr, start, (start % BITS_PER_UNIT) + 1);
+ clear_bit_region_be (ptr + 1, BITS_PER_UNIT - 1,
+ len - (start % BITS_PER_UNIT) - 1);
+ }
+ else if (start == BITS_PER_UNIT - 1
+ && len > BITS_PER_UNIT)
+ {
+ unsigned int nbytes = len / BITS_PER_UNIT;
+ for (unsigned int i = 0; i < nbytes; i++)
+ ptr[i] = 0U;
+ if (len % BITS_PER_UNIT != 0)
+ clear_bit_region_be (ptr + nbytes, BITS_PER_UNIT - 1,
+ len % BITS_PER_UNIT);
+ }
+ else
+ gcc_unreachable ();
+}
+
+/* In the byte array PTR clear the bit region starting at bit
+ START and is LEN bits wide.
+ For regions spanning multiple bytes do this recursively until we reach
+ zero LEN or a region contained within a single byte. */
+
+static void
+clear_bit_region (unsigned char *ptr, unsigned int start,
+ unsigned int len)
+{
+ /* Degenerate base case. */
+ if (len == 0)
+ return;
+ else if (start >= BITS_PER_UNIT)
+ clear_bit_region (ptr + 1, start - BITS_PER_UNIT, len);
+ /* Second base case. */
+ else if ((start + len) <= BITS_PER_UNIT)
+ {
+ unsigned char mask = (~0U) << ((unsigned char)(BITS_PER_UNIT - len));
+ mask >>= BITS_PER_UNIT - (start + len);
+
+ ptr[0] &= ~mask;
+
+ return;
+ }
+ /* Clear most significant bits in a byte and proceed with the next byte. */
+ else if (start != 0)
+ {
+ clear_bit_region (ptr, start, BITS_PER_UNIT - start);
+ clear_bit_region (ptr + 1, 0, len - (BITS_PER_UNIT - start) + 1);
+ }
+ /* Whole bytes need to be cleared. */
+ else if (start == 0 && len > BITS_PER_UNIT)
+ {
+ unsigned int nbytes = len / BITS_PER_UNIT;
+ /* We could recurse on each byte but do the loop here to avoid
+ recursing too deep. */
+ for (unsigned int i = 0; i < nbytes; i++)
+ ptr[i] = 0U;
+ /* Clear the remaining sub-byte region if there is one. */
+ if (len % BITS_PER_UNIT != 0)
+ clear_bit_region (ptr + nbytes, 0, len % BITS_PER_UNIT);
+ }
+ else
+ gcc_unreachable ();
+}
+
+/* Write BITLEN bits of EXPR to the byte array PTR at
+ bit position BITPOS. PTR should contain TOTAL_BYTES elements.
+ Return true if the operation succeeded. */
+
+static bool
+encode_tree_to_bitpos (tree expr, unsigned char *ptr, int bitlen, int bitpos,
+ unsigned int total_bytes)
+{
+ unsigned int first_byte = bitpos / BITS_PER_UNIT;
+ tree tmp_int = expr;
+ bool sub_byte_op_p = (bitlen % BITS_PER_UNIT) || (bitpos % BITS_PER_UNIT)
+ || mode_for_size (bitlen, MODE_INT, 0) == BLKmode;
+
+ if (!sub_byte_op_p)
+ return native_encode_expr (tmp_int, ptr + first_byte, total_bytes, 0)
+ != 0;
+
+ /* LITTLE-ENDIAN
+ We are writing a non byte-sized quantity or at a position that is not
+ at a byte boundary.
+ |--------|--------|--------| ptr + first_byte
+ ^ ^
+ xxx xxxxxxxx xxx< bp>
+ |______EXPR____|
+
+ First native_encode_expr EPXR into a temporary buffer and shift each
+ byte in the buffer by 'bp' (carrying the bits over as necessary).
+ |00000000|00xxxxxx|xxxxxxxx| << bp = |000xxxxx|xxxxxxxx|xxx00000|
+ <------bitlen---->< bp>
+ Then we clear the destination bits:
+ |---00000|00000000|000-----| ptr + first_byte
+ <-------bitlen--->< bp>
+
+ Finally we ORR the bytes of the shifted EXPR into the cleared region:
+ |---xxxxx||xxxxxxxx||xxx-----| ptr + first_byte.
+
+ BIG-ENDIAN
+ We are writing a non byte-sized quantity or at a position that is not
+ at a byte boundary.
+ ptr + first_byte |--------|--------|--------|
+ ^ ^
+ <bp >xxx xxxxxxxx xxx
+ |_____EXPR_____|
+
+ First native_encode_expr EPXR into a temporary buffer and shift each
+ byte in the buffer to the right by (carrying the bits over as necessary).
+ We shift by as much as needed to align the most significant bit of EXPR
+ with bitpos:
+ |00xxxxxx|xxxxxxxx| >> 3 = |00000xxx|xxxxxxxx|xxxxx000|
+ <---bitlen----> <bp ><-----bitlen----->
+ Then we clear the destination bits:
+ ptr + first_byte |-----000||00000000||00000---|
+ <bp ><-------bitlen----->
+
+ Finally we ORR the bytes of the shifted EXPR into the cleared region:
+ ptr + first_byte |---xxxxx||xxxxxxxx||xxx-----|.
+ The awkwardness comes from the fact that bitpos is counted from the
+ most significant bit of a byte. */
+
+ /* Allocate an extra byte so that we have space to shift into. */
+ unsigned int byte_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (expr))) + 1;
+ unsigned char *tmpbuf = XALLOCAVEC (unsigned char, byte_size);
+ zero_char_buf (tmpbuf, byte_size);
+ /* The store detection code should only have allowed constants that are
+ accepted by native_encode_expr. */
+ if (native_encode_expr (expr, tmpbuf, byte_size, 0) == 0)
+ gcc_unreachable ();
+
+ /* The native_encode_expr machinery uses TYPE_MODE to determine how many
+ bytes to write. This means it can write more than
+ ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT bytes (for example
+ write 8 bytes for a bitlen of 40). Skip the bytes that are not within
+ bitlen and zero out the bits that are not relevant as well (that may
+ contain a sign bit due to sign-extension). */
+ unsigned int padding
+ = byte_size - ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT - 1;
+ if (BYTES_BIG_ENDIAN)
+ {
+ tmpbuf += padding;
+ byte_size -= padding;
+ if (bitlen % BITS_PER_UNIT != 0)
+ clear_bit_region_be (tmpbuf, BITS_PER_UNIT - 1,
+ BITS_PER_UNIT - (bitlen % BITS_PER_UNIT));
+ }
+
+ /* Clear the bit region in PTR where the bits from TMPBUF will be
+ inerted into. */
+ if (BYTES_BIG_ENDIAN)
+ clear_bit_region_be (ptr + first_byte,
+ BITS_PER_UNIT - 1 - (bitpos % BITS_PER_UNIT), bitlen);
+ else
+ clear_bit_region (ptr + first_byte, bitpos % BITS_PER_UNIT, bitlen);
+
+ int shift_amnt;
+ int bitlen_mod = bitlen % BITS_PER_UNIT;
+ int bitpos_mod = bitpos % BITS_PER_UNIT;
+
+ bool skip_byte = false;
+ if (BYTES_BIG_ENDIAN)
+ {
+ /* BITPOS and BITLEN are exactly aligned and no shifting
+ is necessary. */
+ if (bitpos_mod + bitlen_mod == BITS_PER_UNIT
+ || (bitpos_mod == 0 && bitlen_mod == 0))
+ shift_amnt = 0;
+ /* |. . . . . . . .|
+ <bp > <blen >.
+ We always shift right for BYTES_BIG_ENDIAN so shift the beginning
+ of the value until it aligns with 'bp' in the next byte over. */
+ else if (bitpos_mod + bitlen_mod < BITS_PER_UNIT)
+ {
+ shift_amnt = bitlen_mod + bitpos_mod;
+ skip_byte = bitlen_mod != 0;
+ }
+ /* |. . . . . . . .|
+ <----bp--->
+ <---blen---->.
+ Shift the value right within the same byte so it aligns with 'bp'. */
+ else
+ shift_amnt = bitlen_mod + bitpos_mod - BITS_PER_UNIT;
+ }
+ else
+ shift_amnt = bitpos % BITS_PER_UNIT;
+
+ /* Create the shifted version of EXPR. */
+ if (!BYTES_BIG_ENDIAN)
+ shift_bytes_in_array (tmpbuf, byte_size, shift_amnt);
+ else
+ {
+ gcc_assert (BYTES_BIG_ENDIAN);
+ shift_bytes_in_array_right (tmpbuf, byte_size, shift_amnt);
+ /* If shifting right forced us to move into the next byte skip the now
+ empty byte. */
+ if (skip_byte)
+ {
+ tmpbuf++;
+ byte_size--;
+ }
+ }
+
+ /* Insert the bits from TMPBUF. */
+ for (unsigned int i = 0; i < byte_size; i++)
+ ptr[first_byte + i] |= tmpbuf[i];
+
+ return true;
+}
+
+/* Sorting function for store_immediate_info objects.
+ Sorts them by bitposition. */
+
+static int
+sort_by_bitpos (const void *x, const void *y)
+{
+ store_immediate_info *const *tmp = (store_immediate_info * const *) x;
+ store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
+
+ if ((*tmp)->bitpos <= (*tmp2)->bitpos)
+ return -1;
+ else if ((*tmp)->bitpos > (*tmp2)->bitpos)
+ return 1;
+
+ gcc_unreachable ();
+}
+
+/* Sorting function for store_immediate_info objects.
+ Sorts them by the order field. */
+
+static int
+sort_by_order (const void *x, const void *y)
+{
+ store_immediate_info *const *tmp = (store_immediate_info * const *) x;
+ store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
+
+ if ((*tmp)->order < (*tmp2)->order)
+ return -1;
+ else if ((*tmp)->order > (*tmp2)->order)
+ return 1;
+
+ gcc_unreachable ();
+}
+
+/* Initialize a merged_store_group object from a store_immediate_info
+ object. */
+
+merged_store_group::merged_store_group (store_immediate_info *info)
+{
+ start = info->bitpos;
+ width = info->bitsize;
+ /* VAL has memory allocated for it in apply_stores once the group
+ width has been finalized. */
+ val = NULL;
+ align = get_object_alignment (info->dest);
+ stores.create (1);
+ stores.safe_push (info);
+ last_stmt = info->stmt;
+ last_order = info->order;
+ first_stmt = last_stmt;
+ first_order = last_order;
+ buf_size = 0;
+}
+
+merged_store_group::~merged_store_group ()
+{
+ if (val)
+ XDELETEVEC (val);
+}
+
+/* Merge a store recorded by INFO into this merged store.
+ The store is not overlapping with the existing recorded
+ stores. */
+
+void
+merged_store_group::merge_into (store_immediate_info *info)
+{
+ unsigned HOST_WIDE_INT wid = info->bitsize;
+ /* Make sure we're inserting in the position we think we're inserting. */
+ gcc_assert (info->bitpos == start + width);
+
+ width += wid;
+ gimple *stmt = info->stmt;
+ stores.safe_push (info);
+ if (info->order > last_order)
+ {
+ last_order = info->order;
+ last_stmt = stmt;
+ }
+ else if (info->order < first_order)
+ {
+ first_order = info->order;
+ first_stmt = stmt;
+ }
+}
+
+/* Merge a store described by INFO into this merged store.
+ INFO overlaps in some way with the current store (i.e. it's not contiguous
+ which is handled by merged_store_group::merge_into). */
+
+void
+merged_store_group::merge_overlapping (store_immediate_info *info)
+{
+ gimple *stmt = info->stmt;
+ stores.safe_push (info);
+
+ /* If the store extends the size of the group, extend the width. */
+ if ((info->bitpos + info->bitsize) > (start + width))
+ width += info->bitpos + info->bitsize - (start + width);
+
+ if (info->order > last_order)
+ {
+ last_order = info->order;
+ last_stmt = stmt;
+ }
+ else if (info->order < first_order)
+ {
+ first_order = info->order;
+ first_stmt = stmt;
+ }
+}
+
+/* Go through all the recorded stores in this group in program order and
+ apply their values to the VAL byte array to create the final merged
+ value. Return true if the operation succeeded. */
+
+bool
+merged_store_group::apply_stores ()
+{
+ /* The total width of the stores must add up to a whole number of bytes
+ and start at a byte boundary. We don't support emitting bitfield
+ references for now. Also, make sure we have more than one store
+ in the group, otherwise we cannot merge anything. */
+ if (width % BITS_PER_UNIT != 0
+ || start % BITS_PER_UNIT != 0
+ || stores.length () == 1)
+ return false;
+
+ stores.qsort (sort_by_order);
+ struct store_immediate_info *info;
+ unsigned int i;
+ /* Create a buffer of a size that is 2 times the number of bytes we're
+ storing. That way native_encode_expr can write power-of-2-sized
+ chunks without overrunning. */
+ buf_size
+ = 2 * (ROUND_UP (width, BITS_PER_UNIT) / BITS_PER_UNIT);
+ val = XCNEWVEC (unsigned char, buf_size);
+
+ FOR_EACH_VEC_ELT (stores, i, info)
+ {
+ unsigned int pos_in_buffer = info->bitpos - start;
+ bool ret = encode_tree_to_bitpos (info->val, val, info->bitsize,
+ pos_in_buffer, buf_size);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ if (ret)
+ {
+ fprintf (dump_file, "After writing ");
+ print_generic_expr (dump_file, info->val, 0);
+ fprintf (dump_file, " of size " HOST_WIDE_INT_PRINT_DEC
+ " at position %d the merged region contains:\n",
+ info->bitsize, pos_in_buffer);
+ dump_char_array (dump_file, val, buf_size);
+ }
+ else
+ fprintf (dump_file, "Failed to merge stores\n");
+ }
+ if (!ret)
+ return false;
+ }
+ return true;
+}
+
+/* Structure describing the store chain. */
+
+struct imm_store_chain_info
+{
+ auto_vec<struct store_immediate_info *> m_store_info;
+ auto_vec<merged_store_group *> m_merged_store_groups;
+
+ bool terminate_and_process_chain (tree);
+ bool coalesce_immediate_stores ();
+ bool output_merged_store (tree, merged_store_group *);
+ bool output_merged_stores (tree);
+};
+
+const pass_data pass_data_tree_store_merging = {
+ GIMPLE_PASS, /* type */
+ "store-merging", /* name */
+ OPTGROUP_NONE, /* optinfo_flags */
+ TV_GIMPLE_STORE_MERGING, /* tv_id */
+ PROP_ssa, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_update_ssa, /* todo_flags_finish */
+};
+
+class pass_store_merging : public gimple_opt_pass
+{
+public:
+ pass_store_merging (gcc::context *ctxt)
+ : gimple_opt_pass (pass_data_tree_store_merging, ctxt)
+ {
+ }
+
+ /* Pass not supported for PDP-endianness. */
+ virtual bool
+ gate (function *)
+ {
+ return flag_store_merging && (WORDS_BIG_ENDIAN == BYTES_BIG_ENDIAN);
+ }
+
+ virtual unsigned int execute (function *);
+
+private:
+ hash_map<tree_operand_hash, struct imm_store_chain_info *> m_stores;
+
+ bool terminate_and_process_all_chains ();
+ bool terminate_all_aliasing_chains (tree, tree, bool, gimple *);
+ bool terminate_and_release_chain (tree);
+}; // class pass_store_merging
+
+/* Terminate and process all recorded chains. Return true if any changes
+ were made. */
+
+bool
+pass_store_merging::terminate_and_process_all_chains ()
+{
+ hash_map<tree_operand_hash, struct imm_store_chain_info *>::iterator iter
+ = m_stores.begin ();
+ bool ret = false;
+ for (; iter != m_stores.end (); ++iter)
+ ret |= terminate_and_release_chain ((*iter).first);
+
+ return ret;
+}
+
+/* Terminate all chains that are affected by the assignment to DEST, appearing
+ in statement STMT and ultimately points to the object BASE. Return true if
+ at least one aliasing chain was terminated. BASE and DEST are allowed to
+ be NULL_TREE. In that case the aliasing checks are performed on the whole
+ statement rather than a particular operand in it. VAR_OFFSET_P signifies
+ whether STMT represents a store to BASE offset by a variable amount.
+ If that is the case we have to terminate any chain anchored at BASE. */
+
+bool
+pass_store_merging::terminate_all_aliasing_chains (tree dest, tree base,
+ bool var_offset_p,
+ gimple *stmt)
+{
+ bool ret = false;
+
+ /* If the statement doesn't touch memory it can't alias. */
+ if (!gimple_vuse (stmt))
+ return false;
+
+ struct imm_store_chain_info **chain_info = NULL;
+
+ /* Check if the assignment destination (BASE) is part of a store chain.
+ This is to catch non-constant stores to destinations that may be part
+ of a chain. */
+ if (base)
+ {
+ chain_info = m_stores.get (base);
+ if (chain_info)
+ {
+ /* We have a chain at BASE and we're writing to [BASE + <variable>].
+ This can interfere with any of the stores so terminate
+ the chain. */
+ if (var_offset_p)
+ {
+ terminate_and_release_chain (base);
+ ret = true;
+ }
+ /* Otherwise go through every store in the chain to see if it
+ aliases with any of them. */
+ else
+ {
+ struct store_immediate_info *info;
+ unsigned int i;
+ FOR_EACH_VEC_ELT ((*chain_info)->m_store_info, i, info)
+ {
+ if (refs_may_alias_p (info->dest, dest))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file,
+ "stmt causes chain termination:\n");
+ print_gimple_stmt (dump_file, stmt, 0, 0);
+ }
+ terminate_and_release_chain (base);
+ ret = true;
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ hash_map<tree_operand_hash, struct imm_store_chain_info *>::iterator iter
+ = m_stores.begin ();
+
+ /* Check for aliasing with all other store chains. */
+ for (; iter != m_stores.end (); ++iter)
+ {
+ /* We already checked all the stores in chain_info and terminated the
+ chain if necessary. Skip it here. */
+ if (chain_info && (*chain_info) == (*iter).second)
+ continue;
+
+ tree key = (*iter).first;
+ if (ref_maybe_used_by_stmt_p (stmt, key)
+ || stmt_may_clobber_ref_p (stmt, key))
+ {
+ terminate_and_release_chain (key);
+ ret = true;
+ }
+ }
+
+ return ret;
+}
+
+/* Helper function. Terminate the recorded chain storing to base object
+ BASE. Return true if the merging and output was successful. The m_stores
+ entry is removed after the processing in any case. */
+
+bool
+pass_store_merging::terminate_and_release_chain (tree base)
+{
+ struct imm_store_chain_info **chain_info = m_stores.get (base);
+
+ if (!chain_info)
+ return false;
+
+ gcc_assert (*chain_info);
+
+ bool ret = (*chain_info)->terminate_and_process_chain (base);
+ delete *chain_info;
+ m_stores.remove (base);
+
+ return ret;
+}
+
+/* Go through the candidate stores recorded in m_store_info and merge them
+ into merged_store_group objects recorded into m_merged_store_groups
+ representing the widened stores. Return true if coalescing was successful
+ and the number of widened stores is fewer than the original number
+ of stores. */
+
+bool
+imm_store_chain_info::coalesce_immediate_stores ()
+{
+ /* Anything less can't be processed. */
+ if (m_store_info.length () < 2)
+ return false;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Attempting to coalesce %u stores in chain.\n",
+ m_store_info.length ());
+
+ store_immediate_info *info;
+ unsigned int i;
+
+ /* Order the stores by the bitposition they write to. */
+ m_store_info.qsort (sort_by_bitpos);
+
+ info = m_store_info[0];
+ merged_store_group *merged_store = new merged_store_group (info);
+
+ FOR_EACH_VEC_ELT (m_store_info, i, info)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "Store %u:\nbitsize:" HOST_WIDE_INT_PRINT_DEC
+ " bitpos:" HOST_WIDE_INT_PRINT_DEC " val:\n",
+ i, info->bitsize, info->bitpos);
+ print_generic_expr (dump_file, info->val, 0);
+ fprintf (dump_file, "\n------------\n");
+ }
+
+ if (i == 0)
+ continue;
+
+ /* |---store 1---|
+ |---store 2---|
+ Overlapping stores. */
+ unsigned HOST_WIDE_INT start = info->bitpos;
+ if (IN_RANGE (start, merged_store->start,
+ merged_store->start + merged_store->width - 1))
+ {
+ merged_store->merge_overlapping (info);
+ continue;
+ }
+
+ /* |---store 1---| <gap> |---store 2---|.
+ Gap between stores. Start a new group. */
+ if (start != merged_store->start + merged_store->width)
+ {
+ /* Try to apply all the stores recorded for the group to determine
+ the bitpattern they write and discard it if that fails.
+ This will also reject single-store groups. */
+ if (!merged_store->apply_stores ())
+ delete merged_store;
+ else
+ m_merged_store_groups.safe_push (merged_store);
+
+ merged_store = new merged_store_group (info);
+
+ continue;
+ }
+
+ /* |---store 1---||---store 2---|
+ This store is consecutive to the previous one.
+ Merge it into the current store group. */
+ merged_store->merge_into (info);
+ }
+
+ /* Record or discard the last store group. */
+ if (!merged_store->apply_stores ())
+ delete merged_store;
+ else
+ m_merged_store_groups.safe_push (merged_store);
+
+ gcc_assert (m_merged_store_groups.length () <= m_store_info.length ());
+ bool success
+ = !m_merged_store_groups.is_empty ()
+ && m_merged_store_groups.length () < m_store_info.length ();
+
+ if (success && dump_file)
+ fprintf (dump_file, "Coalescing successful!\n"
+ "Merged into %u stores\n",
+ m_merged_store_groups.length ());
+
+ return success;
+}
+
+/* Return the type to use for the merged stores described by STMTS.
+ This is needed to get the alias sets right. */
+
+static tree
+get_alias_type_for_stmts (auto_vec<gimple *> &stmts)
+{
+ gimple *stmt;
+ unsigned int i;
+ tree lhs = gimple_assign_lhs (stmts[0]);
+ tree type = reference_alias_ptr_type (lhs);
+
+ FOR_EACH_VEC_ELT (stmts, i, stmt)
+ {
+ if (i == 0)
+ continue;
+
+ lhs = gimple_assign_lhs (stmt);
+ tree type1 = reference_alias_ptr_type (lhs);
+ if (!alias_ptr_types_compatible_p (type, type1))
+ return ptr_type_node;
+ }
+ return type;
+}
+
+/* Return the location_t information we can find among the statements
+ in STMTS. */
+
+static location_t
+get_location_for_stmts (auto_vec<gimple *> &stmts)
+{
+ gimple *stmt;
+ unsigned int i;
+
+ FOR_EACH_VEC_ELT (stmts, i, stmt)
+ if (gimple_has_location (stmt))
+ return gimple_location (stmt);
+
+ return UNKNOWN_LOCATION;
+}
+
+/* Used to decribe a store resulting from splitting a wide store in smaller
+ regularly-sized stores in split_group. */
+
+struct split_store
+{
+ unsigned HOST_WIDE_INT bytepos;
+ unsigned HOST_WIDE_INT size;
+ unsigned HOST_WIDE_INT align;
+ auto_vec<gimple *> orig_stmts;
+ split_store (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
+ unsigned HOST_WIDE_INT);
+};
+
+/* Simple constructor. */
+
+split_store::split_store (unsigned HOST_WIDE_INT bp,
+ unsigned HOST_WIDE_INT sz,
+ unsigned HOST_WIDE_INT al)
+ : bytepos (bp), size (sz), align (al)
+{
+ orig_stmts.create (0);
+}
+
+/* Record all statements corresponding to stores in GROUP that write to
+ the region starting at BITPOS and is of size BITSIZE. Record such
+ statements in STMTS. The stores in GROUP must be sorted by
+ bitposition. */
+
+static void
+find_constituent_stmts (struct merged_store_group *group,
+ auto_vec<gimple *> &stmts,
+ unsigned HOST_WIDE_INT bitpos,
+ unsigned HOST_WIDE_INT bitsize)
+{
+ struct store_immediate_info *info;
+ unsigned int i;
+ unsigned HOST_WIDE_INT end = bitpos + bitsize;
+ FOR_EACH_VEC_ELT (group->stores, i, info)
+ {
+ unsigned HOST_WIDE_INT stmt_start = info->bitpos;
+ unsigned HOST_WIDE_INT stmt_end = stmt_start + info->bitsize;
+ if (stmt_end < bitpos)
+ continue;
+ /* The stores in GROUP are ordered by bitposition so if we're past
+ the region for this group return early. */
+ if (stmt_start > end)
+ return;
+
+ if (IN_RANGE (stmt_start, bitpos, bitpos + bitsize)
+ || IN_RANGE (stmt_end, bitpos, end)
+ /* The statement writes a region that completely encloses the region
+ that this group writes. Unlikely to occur but let's
+ handle it. */
+ || IN_RANGE (bitpos, stmt_start, stmt_end))
+ stmts.safe_push (info->stmt);
+ }
+}
+
+/* Split a merged store described by GROUP by populating the SPLIT_STORES
+ vector with split_store structs describing the byte offset (from the base),
+ the bit size and alignment of each store as well as the original statements
+ involved in each such split group.
+ This is to separate the splitting strategy from the statement
+ building/emission/linking done in output_merged_store.
+ At the moment just start with the widest possible size and keep emitting
+ the widest we can until we have emitted all the bytes, halving the size
+ when appropriate. */
+
+static bool
+split_group (merged_store_group *group,
+ auto_vec<struct split_store *> &split_stores)
+{
+ unsigned HOST_WIDE_INT pos = group->start;
+ unsigned HOST_WIDE_INT size = group->width;
+ unsigned HOST_WIDE_INT bytepos = pos / BITS_PER_UNIT;
+ unsigned HOST_WIDE_INT align = group->align;
+
+ /* We don't handle partial bitfields for now. We shouldn't have
+ reached this far. */
+ gcc_assert ((size % BITS_PER_UNIT == 0) && (pos % BITS_PER_UNIT == 0));
+
+ bool allow_unaligned
+ = !STRICT_ALIGNMENT && PARAM_VALUE (PARAM_STORE_MERGING_ALLOW_UNALIGNED);
+
+ unsigned int try_size = MAX_STORE_BITSIZE;
+ while (try_size > size
+ || (!allow_unaligned
+ && try_size > align))
+ {
+ try_size /= 2;
+ if (try_size < BITS_PER_UNIT)
+ return false;
+ }
+
+ unsigned HOST_WIDE_INT try_pos = bytepos;
+ group->stores.qsort (sort_by_bitpos);
+
+ while (size > 0)
+ {
+ struct split_store *store = new split_store (try_pos, try_size, align);
+ unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
+ find_constituent_stmts (group, store->orig_stmts, try_bitpos, try_size);
+ split_stores.safe_push (store);
+
+ try_pos += try_size / BITS_PER_UNIT;
+
+ size -= try_size;
+ align = try_size;
+ while (size < try_size)
+ try_size /= 2;
+ }
+ return true;
+}
+
+/* Given a merged store group GROUP output the widened version of it.
+ The store chain is against the base object BASE.
+ Try store sizes of at most MAX_STORE_BITSIZE bits wide and don't output
+ unaligned stores for STRICT_ALIGNMENT targets or if it's too expensive.
+ Make sure that the number of statements output is less than the number of
+ original statements. If a better sequence is possible emit it and
+ return true. */
+
+bool
+imm_store_chain_info::output_merged_store (tree base, merged_store_group *group)
+{
+ unsigned HOST_WIDE_INT start_byte_pos = group->start / BITS_PER_UNIT;
+
+ unsigned int orig_num_stmts = group->stores.length ();
+ if (orig_num_stmts < 2)
+ return false;
+
+ auto_vec<struct split_store *> split_stores;
+ split_stores.create (0);
+ if (!split_group (group, split_stores))
+ return false;
+
+ gimple_stmt_iterator last_gsi = gsi_for_stmt (group->last_stmt);
+ gimple_seq seq = NULL;
+ unsigned int num_stmts = 0;
+ tree last_vdef, new_vuse;
+ last_vdef = gimple_vdef (group->last_stmt);
+ new_vuse = gimple_vuse (group->last_stmt);
+
+ gimple *stmt = NULL;
+ /* The new SSA names created. Keep track of them so that we can free them
+ if we decide to not use the new sequence. */
+ auto_vec<tree> new_ssa_names;
+ split_store *split_store;
+ unsigned int i;
+ bool fail = false;
+
+ FOR_EACH_VEC_ELT (split_stores, i, split_store)
+ {
+ unsigned HOST_WIDE_INT try_size = split_store->size;
+ unsigned HOST_WIDE_INT try_pos = split_store->bytepos;
+ unsigned HOST_WIDE_INT align = split_store->align;
+ tree offset_type = get_alias_type_for_stmts (split_store->orig_stmts);
+ location_t loc = get_location_for_stmts (split_store->orig_stmts);
+
+ tree int_type = build_nonstandard_integer_type (try_size, UNSIGNED);
+ SET_TYPE_ALIGN (int_type, align);
+ tree addr = build_fold_addr_expr (base);
+ tree dest = fold_build2 (MEM_REF, int_type, addr,
+ build_int_cst (offset_type, try_pos));
+
+ tree src = native_interpret_expr (int_type,
+ group->val + try_pos - start_byte_pos,
+ group->buf_size);
+
+ stmt = gimple_build_assign (dest, src);
+ gimple_set_location (stmt, loc);
+ gimple_set_vuse (stmt, new_vuse);
+ gimple_seq_add_stmt_without_update (&seq, stmt);
+
+ /* We didn't manage to reduce the number of statements. Bail out. */
+ if (++num_stmts == orig_num_stmts)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "Exceeded original number of stmts (%u)."
+ " Not profitable to emit new sequence.\n",
+ orig_num_stmts);
+ }
+ unsigned int ssa_count;
+ tree ssa_name;
+ /* Don't forget to cleanup the temporary SSA names. */
+ FOR_EACH_VEC_ELT (new_ssa_names, ssa_count, ssa_name)
+ release_ssa_name (ssa_name);
+
+ fail = true;
+ break;
+ }
+
+ tree new_vdef;
+ if (i < split_stores.length () - 1)
+ {
+ new_vdef = make_ssa_name (gimple_vop (cfun), stmt);
+ new_ssa_names.safe_push (new_vdef);
+ }
+ else
+ new_vdef = last_vdef;
+
+ gimple_set_vdef (stmt, new_vdef);
+ SSA_NAME_DEF_STMT (new_vdef) = stmt;
+ new_vuse = new_vdef;
+ }
+
+ FOR_EACH_VEC_ELT (split_stores, i, split_store)
+ delete split_store;
+
+ if (fail)
+ return false;
+
+ gcc_assert (seq);
+ if (dump_file)
+ {
+ fprintf (dump_file,
+ "New sequence of %u stmts to replace old one of %u stmts\n",
+ num_stmts, orig_num_stmts);
+ if (dump_flags & TDF_DETAILS)
+ print_gimple_seq (dump_file, seq, 0, TDF_VOPS | TDF_MEMSYMS);
+ }
+ gsi_insert_seq_after (&last_gsi, seq, GSI_SAME_STMT);
+
+ return true;
+}
+
+/* Process the merged_store_group objects created in the coalescing phase.
+ The stores are all against the base object BASE.
+ Try to output the widened stores and delete the original statements if
+ successful. Return true iff any changes were made. */
+
+bool
+imm_store_chain_info::output_merged_stores (tree base)
+{
+ unsigned int i;
+ merged_store_group *merged_store;
+ bool ret = false;
+ FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_store)
+ {
+ if (output_merged_store (base, merged_store))
+ {
+ unsigned int j;
+ store_immediate_info *store;
+ FOR_EACH_VEC_ELT (merged_store->stores, j, store)
+ {
+ gimple *stmt = store->stmt;
+ gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
+ gsi_remove (&gsi, true);
+ if (stmt != merged_store->last_stmt)
+ {
+ unlink_stmt_vdef (stmt);
+ release_defs (stmt);
+ }
+ }
+ ret = true;
+ }
+ }
+ if (ret && dump_file)
+ fprintf (dump_file, "Merging successful!\n");
+
+ return ret;
+}
+
+/* Coalesce the store_immediate_info objects recorded against the base object
+ BASE in the first phase and output them.
+ Delete the allocated structures.
+ Return true if any changes were made. */
+
+bool
+imm_store_chain_info::terminate_and_process_chain (tree base)
+{
+ /* Process store chain. */
+ bool ret = false;
+ if (m_store_info.length () > 1)
+ {
+ ret = coalesce_immediate_stores ();
+ if (ret)
+ ret = output_merged_stores (base);
+ }
+
+ /* Delete all the entries we allocated ourselves. */
+ store_immediate_info *info;
+ unsigned int i;
+ FOR_EACH_VEC_ELT (m_store_info, i, info)
+ delete info;
+
+ merged_store_group *merged_info;
+ FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_info)
+ delete merged_info;
+
+ return ret;
+}
+
+/* Return true iff LHS is a destination potentially interesting for
+ store merging. In practice these are the codes that get_inner_reference
+ can process. */
+
+static bool
+lhs_valid_for_store_merging_p (tree lhs)
+{
+ tree_code code = TREE_CODE (lhs);
+
+ if (code == ARRAY_REF || code == ARRAY_RANGE_REF || code == MEM_REF
+ || code == COMPONENT_REF || code == BIT_FIELD_REF)
+ return true;
+
+ return false;
+}
+
+/* Return true if the tree RHS is a constant we want to consider
+ during store merging. In practice accept all codes that
+ native_encode_expr accepts. */
+
+static bool
+rhs_valid_for_store_merging_p (tree rhs)
+{
+ tree type = TREE_TYPE (rhs);
+ if (TREE_CODE_CLASS (TREE_CODE (rhs)) != tcc_constant
+ || !can_native_encode_type_p (type))
+ return false;
+
+ return true;
+}
+
+/* Entry point for the pass. Go over each basic block recording chains of
+ immediate stores. Upon encountering a terminating statement (as defined
+ by stmt_terminates_chain_p) process the recorded stores and emit the widened
+ variants. */
+
+unsigned int
+pass_store_merging::execute (function *fun)
+{
+ basic_block bb;
+ hash_set<gimple *> orig_stmts;
+
+ FOR_EACH_BB_FN (bb, fun)
+ {
+ gimple_stmt_iterator gsi;
+ unsigned HOST_WIDE_INT num_statements = 0;
+ /* Record the original statements so that we can keep track of
+ statements emitted in this pass and not re-process new
+ statements. */
+ for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ if (is_gimple_debug (gsi_stmt (gsi)))
+ continue;
+
+ if (++num_statements > 2)
+ break;
+ }
+
+ if (num_statements < 2)
+ continue;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Processing basic block <%d>:\n", bb->index);
+
+ for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple *stmt = gsi_stmt (gsi);
+
+ if (gimple_has_volatile_ops (stmt))
+ {
+ /* Terminate all chains. */
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Volatile access terminates "
+ "all chains\n");
+ terminate_and_process_all_chains ();
+ continue;
+ }
+
+ if (is_gimple_debug (stmt))
+ continue;
+
+ if (gimple_assign_single_p (stmt) && gimple_vdef (stmt)
+ && !stmt_can_throw_internal (stmt)
+ && lhs_valid_for_store_merging_p (gimple_assign_lhs (stmt)))
+ {
+ tree lhs = gimple_assign_lhs (stmt);
+ tree rhs = gimple_assign_rhs1 (stmt);
+
+ HOST_WIDE_INT bitsize, bitpos;
+ machine_mode mode;
+ int unsignedp = 0, reversep = 0, volatilep = 0;
+ tree offset, base_addr;
+ base_addr
+ = get_inner_reference (lhs, &bitsize, &bitpos, &offset, &mode,
+ &unsignedp, &reversep, &volatilep);
+ /* As a future enhancement we could handle stores with the same
+ base and offset. */
+ bool invalid = offset || reversep
+ || ((bitsize > MAX_BITSIZE_MODE_ANY_INT)
+ && (TREE_CODE (rhs) != INTEGER_CST))
+ || !rhs_valid_for_store_merging_p (rhs)
+ /* An access may not be volatile itself but base_addr may be
+ a volatile decl i.e. MEM[&volatile-decl]. The hashing for
+ tree_operand_hash won't consider such stores equal to each
+ other so we can't track chains on them. */
+ || TREE_THIS_VOLATILE (base_addr);
+
+ /* In some cases get_inner_reference may return a
+ MEM_REF [ptr + byteoffset]. For the purposes of this pass
+ canonicalize the base_addr to MEM_REF [ptr] and take
+ byteoffset into account in the bitpos. This occurs in
+ PR 23684 and this way we can catch more chains. */
+ if (TREE_CODE (base_addr) == MEM_REF
+ && POINTER_TYPE_P (TREE_TYPE (TREE_OPERAND (base_addr, 0))))
+ {
+ offset_int bit_off, byte_off = mem_ref_offset (base_addr);
+ bit_off = byte_off << LOG2_BITS_PER_UNIT;
+ bit_off += bitpos;
+ if (!wi::neg_p (bit_off) && wi::fits_shwi_p (bit_off))
+ {
+ bitpos = bit_off.to_shwi ();
+ base_addr = build2 (MEM_REF, TREE_TYPE (base_addr),
+ TREE_OPERAND (base_addr, 0),
+ build_zero_cst (TREE_TYPE (
+ TREE_OPERAND (base_addr, 1))));
+ }
+ else
+ invalid = true;
+ }
+
+ struct imm_store_chain_info **chain_info
+ = m_stores.get (base_addr);
+
+ if (!invalid)
+ {
+ store_immediate_info *info;
+ if (chain_info)
+ {
+ info = new store_immediate_info (
+ bitsize, bitpos, rhs, lhs, stmt,
+ (*chain_info)->m_store_info.length ());
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file,
+ "Recording immediate store from stmt:\n");
+ print_gimple_stmt (dump_file, stmt, 0, 0);
+ }
+ (*chain_info)->m_store_info.safe_push (info);
+ /* If we reach the limit of stores to merge in a chain
+ terminate and process the chain now. */
+ if ((*chain_info)->m_store_info.length ()
+ == (unsigned int)
+ PARAM_VALUE (PARAM_MAX_STORES_TO_MERGE))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file,
+ "Reached maximum number of statements"
+ " to merge:\n");
+ terminate_and_release_chain (base_addr);
+ }
+ continue;
+ }
+
+ /* Store aliases any existing chain? */
+ terminate_all_aliasing_chains (lhs, base_addr, false, stmt);
+ /* Start a new chain. */
+ struct imm_store_chain_info *new_chain
+ = new imm_store_chain_info;
+ info = new store_immediate_info (bitsize, bitpos, rhs, lhs,
+ stmt, 0);
+ new_chain->m_store_info.safe_push (info);
+ m_stores.put (base_addr, new_chain);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file,
+ "Starting new chain with statement:\n");
+ print_gimple_stmt (dump_file, stmt, 0, 0);
+ fprintf (dump_file, "The base object is:\n");
+ print_generic_expr (dump_file, base_addr, 0);
+ fprintf (dump_file, "\n");
+ }
+ }
+ else
+ terminate_all_aliasing_chains (lhs, base_addr,
+ offset != NULL_TREE, stmt);
+
+ continue;
+ }
+
+ terminate_all_aliasing_chains (NULL_TREE, NULL_TREE, false, stmt);
+ }
+ terminate_and_process_all_chains ();
+ }
+ return 0;
+}
+
+} // anon namespace
+
+/* Construct and return a store merging pass object. */
+
+gimple_opt_pass *
+make_pass_store_merging (gcc::context *ctxt)
+{
+ return new pass_store_merging (ctxt);
+}
@@ -521,6 +521,7 @@ static const struct default_options default_options_table[] =
{ OPT_LEVELS_2_PLUS, OPT_fisolate_erroneous_paths_dereference, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_fipa_ra, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_flra_remat, NULL, 1 },
+ { OPT_LEVELS_2_PLUS, OPT_fstore_merging, NULL, 1 },
/* -O3 optimizations. */
{ OPT_LEVELS_3_PLUS, OPT_ftree_loop_distribute_patterns, NULL, 1 },
@@ -1100,6 +1100,18 @@ DEFPARAM (PARAM_MAX_TAIL_MERGE_COMPARISONS,
"Maximum amount of similar bbs to compare a bb with.",
10, 0, 0)
+DEFPARAM (PARAM_STORE_MERGING_ALLOW_UNALIGNED,
+ "store-merging-allow-unaligned",
+ "Allow the store merging pass to introduce unaligned stores "
+ "if it is legal to do so",
+ 1, 0, 1)
+
+DEFPARAM (PARAM_MAX_STORES_TO_MERGE,
+ "max-stores-to-merge",
+ "Maximum number of constant stores to merge in the"
+ "store merging pass",
+ 64, 2, 0)
+
DEFPARAM (PARAM_MAX_TAIL_MERGE_ITERATIONS,
"max-tail-merge-iterations",
"Maximum amount of iterations of the pass over a function.",
@@ -332,6 +332,7 @@ along with GCC; see the file COPYING3. If not see
NEXT_PASS (pass_phiopt);
NEXT_PASS (pass_fold_builtins);
NEXT_PASS (pass_optimize_widening_mul);
+ NEXT_PASS (pass_store_merging);
NEXT_PASS (pass_tail_calls);
/* If DCE is not run before checking for uninitialized uses,
we may get false warnings (e.g., testsuite/gcc.dg/uninit-5.c).
@@ -1,5 +1,5 @@
// { dg-do compile }
-// { dg-options "-O2 -fstrict-aliasing -fdump-tree-optimized" }
+// { dg-options "-O2 -fstrict-aliasing -fno-store-merging -fdump-tree-optimized" }
// Test that placement new does not introduce an unnecessary memory
// barrier.
new file mode 100644
@@ -0,0 +1,122 @@
+/* PR middle-end/22141 */
+
+extern void abort (void);
+
+struct S
+{
+ struct T
+ {
+ char a;
+ char b;
+ char c;
+ char d;
+ } t;
+} u;
+
+struct U
+{
+ struct S s[4];
+};
+
+void __attribute__((noinline))
+c1 (struct T *p)
+{
+ if (p->a != 1 || p->b != 2 || p->c != 3 || p->d != 4)
+ abort ();
+ __builtin_memset (p, 0xaa, sizeof (*p));
+}
+
+void __attribute__((noinline))
+c2 (struct S *p)
+{
+ c1 (&p->t);
+}
+
+void __attribute__((noinline))
+c3 (struct U *p)
+{
+ c2 (&p->s[2]);
+}
+
+void __attribute__((noinline))
+f1 (void)
+{
+ u = (struct S) { { 1, 2, 3, 4 } };
+}
+
+void __attribute__((noinline))
+f2 (void)
+{
+ u.t.a = 1;
+ u.t.b = 2;
+ u.t.c = 3;
+ u.t.d = 4;
+}
+
+void __attribute__((noinline))
+f3 (void)
+{
+ u.t.d = 4;
+ u.t.b = 2;
+ u.t.a = 1;
+ u.t.c = 3;
+}
+
+void __attribute__((noinline))
+f4 (void)
+{
+ struct S v;
+ v.t.a = 1;
+ v.t.b = 2;
+ v.t.c = 3;
+ v.t.d = 4;
+ c2 (&v);
+}
+
+void __attribute__((noinline))
+f5 (struct S *p)
+{
+ p->t.a = 1;
+ p->t.c = 3;
+ p->t.d = 4;
+ p->t.b = 2;
+}
+
+void __attribute__((noinline))
+f6 (void)
+{
+ struct U v;
+ v.s[2].t.a = 1;
+ v.s[2].t.b = 2;
+ v.s[2].t.c = 3;
+ v.s[2].t.d = 4;
+ c3 (&v);
+}
+
+void __attribute__((noinline))
+f7 (struct U *p)
+{
+ p->s[2].t.a = 1;
+ p->s[2].t.c = 3;
+ p->s[2].t.d = 4;
+ p->s[2].t.b = 2;
+}
+
+int
+main (void)
+{
+ struct U w;
+ f1 ();
+ c2 (&u);
+ f2 ();
+ c1 (&u.t);
+ f3 ();
+ c2 (&u);
+ f4 ();
+ f5 (&u);
+ c2 (&u);
+ f6 ();
+ f7 (&w);
+ c3 (&w);
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,122 @@
+/* PR middle-end/22141 */
+
+extern void abort (void);
+
+struct S
+{
+ struct T
+ {
+ char a;
+ char b;
+ char c;
+ char d;
+ } t;
+} u __attribute__((aligned));
+
+struct U
+{
+ struct S s[4];
+};
+
+void __attribute__((noinline))
+c1 (struct T *p)
+{
+ if (p->a != 1 || p->b != 2 || p->c != 3 || p->d != 4)
+ abort ();
+ __builtin_memset (p, 0xaa, sizeof (*p));
+}
+
+void __attribute__((noinline))
+c2 (struct S *p)
+{
+ c1 (&p->t);
+}
+
+void __attribute__((noinline))
+c3 (struct U *p)
+{
+ c2 (&p->s[2]);
+}
+
+void __attribute__((noinline))
+f1 (void)
+{
+ u = (struct S) { { 1, 2, 3, 4 } };
+}
+
+void __attribute__((noinline))
+f2 (void)
+{
+ u.t.a = 1;
+ u.t.b = 2;
+ u.t.c = 3;
+ u.t.d = 4;
+}
+
+void __attribute__((noinline))
+f3 (void)
+{
+ u.t.d = 4;
+ u.t.b = 2;
+ u.t.a = 1;
+ u.t.c = 3;
+}
+
+void __attribute__((noinline))
+f4 (void)
+{
+ struct S v __attribute__((aligned));
+ v.t.a = 1;
+ v.t.b = 2;
+ v.t.c = 3;
+ v.t.d = 4;
+ c2 (&v);
+}
+
+void __attribute__((noinline))
+f5 (struct S *p)
+{
+ p->t.a = 1;
+ p->t.c = 3;
+ p->t.d = 4;
+ p->t.b = 2;
+}
+
+void __attribute__((noinline))
+f6 (void)
+{
+ struct U v __attribute__((aligned));
+ v.s[2].t.a = 1;
+ v.s[2].t.b = 2;
+ v.s[2].t.c = 3;
+ v.s[2].t.d = 4;
+ c3 (&v);
+}
+
+void __attribute__((noinline))
+f7 (struct U *p)
+{
+ p->s[2].t.a = 1;
+ p->s[2].t.c = 3;
+ p->s[2].t.d = 4;
+ p->s[2].t.b = 2;
+}
+
+int
+main (void)
+{
+ struct U w __attribute__((aligned));
+ f1 ();
+ c2 (&u);
+ f2 ();
+ c1 (&u.t);
+ f3 ();
+ c2 (&u);
+ f4 ();
+ f5 (&u);
+ c2 (&u);
+ f6 ();
+ f7 (&w);
+ c3 (&w);
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target non_strict_align } */
+/* { dg-options "-O2 -fdump-tree-store-merging" } */
+
+struct bar {
+ int a;
+ char b;
+ char c;
+ char d;
+ char e;
+ char f;
+ char g;
+};
+
+void
+foo1 (struct bar *p)
+{
+ p->b = 0;
+ p->a = 0;
+ p->c = 0;
+ p->d = 0;
+ p->e = 0;
+}
+
+void
+foo2 (struct bar *p)
+{
+ p->b = 0;
+ p->a = 0;
+ p->c = 1;
+ p->d = 0;
+ p->e = 0;
+}
+
+/* { dg-final { scan-tree-dump-times "Merging successful" 2 "store-merging" } } */
new file mode 100644
@@ -0,0 +1,80 @@
+/* { dg-do run } */
+/* { dg-require-effective-target non_strict_align } */
+/* { dg-options "-O2 -fdump-tree-store-merging" } */
+
+struct bar
+{
+ int a;
+ unsigned char b;
+ unsigned char c;
+ short d;
+ unsigned char e;
+ unsigned char f;
+ unsigned char g;
+};
+
+__attribute__ ((noinline)) void
+foozero (struct bar *p)
+{
+ p->b = 0;
+ p->a = 0;
+ p->c = 0;
+ p->d = 0;
+ p->e = 0;
+ p->f = 0;
+ p->g = 0;
+}
+
+__attribute__ ((noinline)) void
+foo1 (struct bar *p)
+{
+ p->b = 1;
+ p->a = 2;
+ p->c = 3;
+ p->d = 4;
+ p->e = 5;
+ p->f = 0;
+ p->g = 0xff;
+}
+
+__attribute__ ((noinline)) void
+foo2 (struct bar *p, struct bar *p2)
+{
+ p->b = 0xff;
+ p2->b = 0xa;
+ p->a = 0xfffff;
+ p2->c = 0xc;
+ p->c = 0xff;
+ p2->d = 0xbf;
+ p->d = 0xfff;
+}
+
+int
+main (void)
+{
+ struct bar b1, b2;
+ foozero (&b1);
+ foozero (&b2);
+
+ foo1 (&b1);
+ if (b1.b != 1 || b1.a != 2 || b1.c != 3 || b1.d != 4 || b1.e != 5
+ || b1.f != 0 || b1.g != 0xff)
+ __builtin_abort ();
+
+ foozero (&b1);
+ /* Make sure writes to aliasing struct pointers preserve the
+ correct order. */
+ foo2 (&b1, &b1);
+ if (b1.b != 0xa || b1.a != 0xfffff || b1.c != 0xff || b1.d != 0xfff)
+ __builtin_abort ();
+
+ foozero (&b1);
+ foo2 (&b1, &b2);
+ if (b1.a != 0xfffff || b1.b != 0xff || b1.c != 0xff || b1.d != 0xfff
+ || b2.b != 0xa || b2.c != 0xc || b2.d != 0xbf)
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "Merging successful" 2 "store-merging" } } */
new file mode 100644
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target non_strict_align } */
+/* { dg-options "-O2 -fdump-tree-store-merging-details" } */
+
+/* Make sure stores to volatile addresses don't get combined with
+ other accesses. */
+
+struct bar
+{
+ int a;
+ char b;
+ char c;
+ volatile short d;
+ char e;
+ char f;
+ char g;
+};
+
+void
+foozero (struct bar *p)
+{
+ p->b = 0xa;
+ p->a = 0xb;
+ p->c = 0xc;
+ p->d = 0;
+ p->e = 0xd;
+ p->f = 0xe;
+ p->g = 0xf;
+}
+
+/* { dg-final { scan-tree-dump "Volatile access terminates all chains" "store-merging" } } */
+/* { dg-final { scan-tree-dump-times "=\{v\} 0;" 1 "store-merging" } } */
new file mode 100644
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target non_strict_align } */
+/* { dg-options "-O2 -fdump-tree-store-merging" } */
+
+/* Check that we can merge interleaving stores that are guaranteed
+ to be non-aliasing. */
+
+struct bar
+{
+ int a;
+ char b;
+ char c;
+ short d;
+ char e;
+ char f;
+ char g;
+};
+
+void
+foozero (struct bar *restrict p, struct bar *restrict p2)
+{
+ p->b = 0xff;
+ p2->b = 0xa;
+ p->a = 0xfffff;
+ p2->a = 0xab;
+ p2->c = 0xc;
+ p->c = 0xff;
+ p2->d = 0xbf;
+ p->d = 0xfff;
+}
+
+/* { dg-final { scan-tree-dump-times "Merging successful" 2 "store-merging" } } */
new file mode 100644
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target non_strict_align } */
+/* { dg-options "-O2 -fdump-tree-store-merging" } */
+
+/* Make sure that non-aliasing non-constant interspersed stores do not
+ stop chains. */
+
+struct bar {
+ int a;
+ char b;
+ char c;
+ char d;
+ char e;
+ char g;
+};
+
+void
+foo1 (struct bar *p, char tmp)
+{
+ p->a = 0;
+ p->b = 0;
+ p->g = tmp;
+ p->c = 0;
+ p->d = 0;
+ p->e = 0;
+}
+
+
+/* { dg-final { scan-tree-dump-times "Merging successful" 1 "store-merging" } } */
+/* { dg-final { scan-tree-dump-times "MEM\\\[.*\\\]" 1 "store-merging" } } */
new file mode 100644
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-require-effective-target non_strict_align } */
+/* { dg-options "-O2 -fdump-tree-store-merging" } */
+
+/* Check that we can widen accesses to bitfields. */
+
+struct bar {
+ int a : 3;
+ unsigned char b : 4;
+ unsigned char c : 1;
+ char d;
+ char e;
+ char f;
+ char g;
+};
+
+__attribute__ ((noinline)) void
+foozero (struct bar *p)
+{
+ p->b = 0;
+ p->a = 0;
+ p->c = 0;
+ p->d = 0;
+ p->e = 0;
+ p->f = 0;
+ p->g = 0;
+}
+
+__attribute__ ((noinline)) void
+foo1 (struct bar *p)
+{
+ p->b = 3;
+ p->a = 2;
+ p->c = 1;
+ p->d = 4;
+ p->e = 5;
+}
+
+int
+main (void)
+{
+ struct bar p;
+ foozero (&p);
+ foo1 (&p);
+ if (p.a != 2 || p.b != 3 || p.c != 1 || p.d != 4 || p.e != 5
+ || p.f != 0 || p.g != 0)
+ __builtin_abort ();
+
+ return 0;
+}
+
+
+/* { dg-final { scan-tree-dump-times "Merging successful" 2 "store-merging" } } */
new file mode 100644
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target non_strict_align } */
+/* { dg-options "-O2 -fdump-tree-store-merging" } */
+
+/* Check that we can merge consecutive array members through the pointer.
+ PR rtl-optimization/23684. */
+
+void
+foo (char *input)
+{
+ input = __builtin_assume_aligned (input, 8);
+ input[0] = 'H';
+ input[1] = 'e';
+ input[2] = 'l';
+ input[3] = 'l';
+ input[4] = 'o';
+ input[5] = ' ';
+ input[6] = 'w';
+ input[7] = 'o';
+ input[8] = 'r';
+ input[9] = 'l';
+ input[10] = 'd';
+ input[11] = '\0';
+}
+
+/* { dg-final { scan-tree-dump-times "Merging successful" 1 "store-merging" } } */
@@ -3,22 +3,22 @@
int arr[4][4];
void
-foo ()
+foo (int x, int y)
{
- arr[0][1] = 1;
- arr[1][0] = -1;
- arr[2][0] = 1;
- arr[1][1] = -1;
- arr[0][2] = 1;
- arr[0][3] = -1;
- arr[1][2] = 1;
- arr[2][1] = -1;
- arr[3][0] = 1;
- arr[3][1] = -1;
- arr[2][2] = 1;
- arr[1][3] = -1;
- arr[2][3] = 1;
- arr[3][2] = -1;
+ arr[0][1] = x;
+ arr[1][0] = y;
+ arr[2][0] = x;
+ arr[1][1] = y;
+ arr[0][2] = x;
+ arr[0][3] = y;
+ arr[1][2] = x;
+ arr[2][1] = y;
+ arr[3][0] = x;
+ arr[3][1] = y;
+ arr[2][2] = x;
+ arr[1][3] = y;
+ arr[2][3] = x;
+ arr[3][2] = y;
}
/* { dg-final { scan-assembler-times "stp\tw\[0-9\]+, w\[0-9\]" 7 } } */
@@ -3,22 +3,22 @@
float arr[4][4];
void
-foo ()
+foo (float x, float y)
{
- arr[0][1] = 1;
- arr[1][0] = -1;
- arr[2][0] = 1;
- arr[1][1] = -1;
- arr[0][2] = 1;
- arr[0][3] = -1;
- arr[1][2] = 1;
- arr[2][1] = -1;
- arr[3][0] = 1;
- arr[3][1] = -1;
- arr[2][2] = 1;
- arr[1][3] = -1;
- arr[2][3] = 1;
- arr[3][2] = -1;
+ arr[0][1] = x;
+ arr[1][0] = y;
+ arr[2][0] = x;
+ arr[1][1] = y;
+ arr[0][2] = x;
+ arr[0][3] = y;
+ arr[1][2] = x;
+ arr[2][1] = y;
+ arr[3][0] = x;
+ arr[3][1] = y;
+ arr[2][2] = x;
+ arr[1][3] = y;
+ arr[2][3] = x;
+ arr[3][2] = y;
}
/* { dg-final { scan-assembler-times "stp\ts\[0-9\]+, s\[0-9\]" 7 } } */
new file mode 100644
@@ -0,0 +1,126 @@
+/* PR middle-end/22141 */
+/* { dg-do compile } */
+/* { dg-options "-Os" } */
+
+extern void abort (void);
+
+struct S
+{
+ struct T
+ {
+ char a;
+ char b;
+ char c;
+ char d;
+ } t;
+} u;
+
+struct U
+{
+ struct S s[4];
+};
+
+void __attribute__((noinline))
+c1 (struct T *p)
+{
+ if (p->a != 1 || p->b != 2 || p->c != 3 || p->d != 4)
+ abort ();
+ __builtin_memset (p, 0xaa, sizeof (*p));
+}
+
+void __attribute__((noinline))
+c2 (struct S *p)
+{
+ c1 (&p->t);
+}
+
+void __attribute__((noinline))
+c3 (struct U *p)
+{
+ c2 (&p->s[2]);
+}
+
+void __attribute__((noinline))
+f1 (void)
+{
+ u = (struct S) { { 1, 2, 3, 4 } };
+}
+
+void __attribute__((noinline))
+f2 (void)
+{
+ u.t.a = 1;
+ u.t.b = 2;
+ u.t.c = 3;
+ u.t.d = 4;
+}
+
+void __attribute__((noinline))
+f3 (void)
+{
+ u.t.d = 4;
+ u.t.b = 2;
+ u.t.a = 1;
+ u.t.c = 3;
+}
+
+void __attribute__((noinline))
+f4 (void)
+{
+ struct S v;
+ v.t.a = 1;
+ v.t.b = 2;
+ v.t.c = 3;
+ v.t.d = 4;
+ c2 (&v);
+}
+
+void __attribute__((noinline))
+f5 (struct S *p)
+{
+ p->t.a = 1;
+ p->t.c = 3;
+ p->t.d = 4;
+ p->t.b = 2;
+}
+
+void __attribute__((noinline))
+f6 (void)
+{
+ struct U v;
+ v.s[2].t.a = 1;
+ v.s[2].t.b = 2;
+ v.s[2].t.c = 3;
+ v.s[2].t.d = 4;
+ c3 (&v);
+}
+
+void __attribute__((noinline))
+f7 (struct U *p)
+{
+ p->s[2].t.a = 1;
+ p->s[2].t.c = 3;
+ p->s[2].t.d = 4;
+ p->s[2].t.b = 2;
+}
+
+int
+main (void)
+{
+ struct U w;
+ f1 ();
+ c2 (&u);
+ f2 ();
+ c1 (&u.t);
+ f3 ();
+ c2 (&u);
+ f4 ();
+ f5 (&u);
+ c2 (&u);
+ f6 ();
+ f7 (&w);
+ c3 (&w);
+ return 0;
+}
+
+/* { dg-final { scan-assembler-times "67305985\|4030201" 7 } } */
@@ -1,7 +1,7 @@
/* PR rtl-optimization/34012 */
/* { dg-do compile } */
/* { dg-require-effective-target lp64 } */
-/* { dg-options "-O2" } */
+/* { dg-options "-O2 -fno-store-merging" } */
void bar (long int *);
void
@@ -282,6 +282,7 @@ DEFTIMEVAR (TV_TREE_UNINIT , "uninit var analysis")
DEFTIMEVAR (TV_PLUGIN_INIT , "plugin initialization")
DEFTIMEVAR (TV_PLUGIN_RUN , "plugin execution")
DEFTIMEVAR (TV_GIMPLE_SLSR , "straight-line strength reduction")
+DEFTIMEVAR (TV_GIMPLE_STORE_MERGING , "store merging")
DEFTIMEVAR (TV_VTABLE_VERIFICATION , "vtable verification")
DEFTIMEVAR (TV_TREE_UBSAN , "tree ubsan")
DEFTIMEVAR (TV_INITIALIZE_RTL , "initialize rtl")
@@ -426,6 +426,7 @@ extern gimple_opt_pass *make_pass_late_warn_uninitialized (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_cse_reciprocals (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_cse_sincos (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_optimize_bswap (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_store_merging (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_optimize_widening_mul (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_warn_function_return (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_warn_function_noreturn (gcc::context *ctxt);