Message ID | 87indfmrgt.fsf@linaro.org |
---|---|
Headers | show |
Series | Make VEC_PERM_EXPR work for variable-length vectors | expand |
This patch reworks the VEC_PERM_EXPR folding so that more of it works for variable-length vectors. E.g. it means that we can now recognise variable-length permutes that reduce to a single vector, or cases in which a variable-length permute only needs one input. There should be no functional change for fixed-length vectors. 2017-12-09 Richard Sandiford <richard.sandiford@linaro.org> gcc/ * selftest.h (selftest::vec_perm_indices_c_tests): Declare. * selftest-run-tests.c (selftest::run_tests): Call it. * vector-builder.h (vector_builder::operator ==): New function. (vector_builder::operator !=): Likewise. * vec-perm-indices.h (vec_perm_indices::series_p): Declare. (vec_perm_indices::all_from_input_p): New function. * vec-perm-indices.c (vec_perm_indices::series_p): Likewise. (test_vec_perm_12, selftest::vec_perm_indices_c_tests): Likewise. * fold-const.c (fold_ternary_loc): Use tree_to_vec_perm_builder instead of reading the VECTOR_CST directly. Detect whether both vector inputs are the same before constructing the vec_perm_indices, and update the number of inputs argument accordingly. Use the utility functions added above. Only construct sel2 if we need to. Index: gcc/selftest.h =================================================================== *** gcc/selftest.h 2017-12-09 23:06:55.002855594 +0000 --- gcc/selftest.h 2017-12-09 23:21:51.517599734 +0000 *************** extern void vec_c_tests (); *** 201,206 **** --- 201,207 ---- extern void wide_int_cc_tests (); extern void predict_c_tests (); extern void simplify_rtx_c_tests (); + extern void vec_perm_indices_c_tests (); extern int num_passes; Index: gcc/selftest-run-tests.c =================================================================== *** gcc/selftest-run-tests.c 2017-12-09 23:06:55.002855594 +0000 --- gcc/selftest-run-tests.c 2017-12-09 23:21:51.517599734 +0000 *************** selftest::run_tests () *** 73,78 **** --- 73,79 ---- /* Mid-level data structures. */ input_c_tests (); + vec_perm_indices_c_tests (); tree_c_tests (); gimple_c_tests (); rtl_tests_c_tests (); Index: gcc/vector-builder.h =================================================================== *** gcc/vector-builder.h 2017-12-09 23:06:55.002855594 +0000 --- gcc/vector-builder.h 2017-12-09 23:21:51.518600090 +0000 *************** #define GCC_VECTOR_BUILDER_H *** 97,102 **** --- 97,105 ---- bool encoded_full_vector_p () const; T elt (unsigned int) const; + bool operator == (const Derived &) const; + bool operator != (const Derived &x) const { return !operator == (x); } + void finalize (); protected: *************** vector_builder<T, Derived>::new_vector ( *** 168,173 **** --- 171,196 ---- this->truncate (0); } + /* Return true if this vector and OTHER have the same elements and + are encoded in the same way. */ + + template<typename T, typename Derived> + bool + vector_builder<T, Derived>::operator == (const Derived &other) const + { + if (m_full_nelts != other.m_full_nelts + || m_npatterns != other.m_npatterns + || m_nelts_per_pattern != other.m_nelts_per_pattern) + return false; + + unsigned int nelts = encoded_nelts (); + for (unsigned int i = 0; i < nelts; ++i) + if (!derived ()->equal_p ((*this)[i], other[i])) + return false; + + return true; + } + /* Return the value of vector element I, which might or might not be encoded explicitly. */ Index: gcc/vec-perm-indices.h =================================================================== *** gcc/vec-perm-indices.h 2017-12-09 23:20:13.233112018 +0000 --- gcc/vec-perm-indices.h 2017-12-09 23:21:51.517599734 +0000 *************** typedef int_vector_builder<HOST_WIDE_INT *** 62,68 **** --- 62,70 ---- element_type clamp (element_type) const; element_type operator[] (unsigned int i) const; + bool series_p (unsigned int, unsigned int, element_type, element_type) const; bool all_in_range_p (element_type, element_type) const; + bool all_from_input_p (unsigned int) const; private: vec_perm_indices (const vec_perm_indices &); *************** vec_perm_indices::operator[] (unsigned i *** 119,122 **** --- 121,133 ---- return clamp (m_encoding.elt (i)); } + /* Return true if the permutation vector only selects elements from + input I. */ + + inline bool + vec_perm_indices::all_from_input_p (unsigned int i) const + { + return all_in_range_p (i * m_nelts_per_input, m_nelts_per_input); + } + #endif Index: gcc/vec-perm-indices.c =================================================================== *** gcc/vec-perm-indices.c 2017-12-09 23:20:13.233112018 +0000 --- gcc/vec-perm-indices.c 2017-12-09 23:21:51.517599734 +0000 *************** Software Foundation; either version 3, o *** 28,33 **** --- 28,34 ---- #include "rtl.h" #include "memmodel.h" #include "emit-rtl.h" + #include "selftest.h" /* Switch to a new permutation vector that selects between NINPUTS vector inputs that have NELTS_PER_INPUT elements each. Take the elements of the *************** vec_perm_indices::rotate_inputs (int del *** 85,90 **** --- 86,139 ---- m_encoding[i] = clamp (m_encoding[i] + element_delta); } + /* Return true if index OUT_BASE + I * OUT_STEP selects input + element IN_BASE + I * IN_STEP. */ + + bool + vec_perm_indices::series_p (unsigned int out_base, unsigned int out_step, + element_type in_base, element_type in_step) const + { + /* Check the base value. */ + if (clamp (m_encoding.elt (out_base)) != clamp (in_base)) + return false; + + unsigned int full_nelts = m_encoding.full_nelts (); + unsigned int npatterns = m_encoding.npatterns (); + + /* Calculate which multiple of OUT_STEP elements we need to get + back to the same pattern. */ + unsigned int cycle_length = least_common_multiple (out_step, npatterns); + + /* Check the steps. */ + in_step = clamp (in_step); + out_base += out_step; + unsigned int limit = 0; + for (;;) + { + /* Succeed if we've checked all the elements in the vector. */ + if (out_base >= full_nelts) + return true; + + if (out_base >= npatterns) + { + /* We've got to the end of the "foreground" values. Check + 2 elements from each pattern in the "background" values. */ + if (limit == 0) + limit = out_base + cycle_length * 2; + else if (out_base >= limit) + return true; + } + + element_type v0 = m_encoding.elt (out_base - out_step); + element_type v1 = m_encoding.elt (out_base); + if (clamp (v1 - v0) != in_step) + return false; + + out_base += out_step; + } + return true; + } + /* Return true if all elements of the permutation vector are in the range [START, START + SIZE). */ *************** vec_perm_indices_to_rtx (machine_mode mo *** 180,182 **** --- 229,280 ---- RTVEC_ELT (v, i) = gen_int_mode (indices[i], GET_MODE_INNER (mode)); return gen_rtx_CONST_VECTOR (mode, v); } + + #if CHECKING_P + + namespace selftest { + + /* Test a 12-element vector. */ + + static void + test_vec_perm_12 (void) + { + vec_perm_builder builder (12, 12, 1); + for (unsigned int i = 0; i < 4; ++i) + { + builder.quick_push (i * 5); + builder.quick_push (3 + i); + builder.quick_push (2 + 3 * i); + } + vec_perm_indices indices (builder, 1, 12); + ASSERT_TRUE (indices.series_p (0, 3, 0, 5)); + ASSERT_FALSE (indices.series_p (0, 3, 3, 5)); + ASSERT_FALSE (indices.series_p (0, 3, 0, 8)); + ASSERT_TRUE (indices.series_p (1, 3, 3, 1)); + ASSERT_TRUE (indices.series_p (2, 3, 2, 3)); + + ASSERT_TRUE (indices.series_p (0, 4, 0, 4)); + ASSERT_FALSE (indices.series_p (1, 4, 3, 4)); + + ASSERT_TRUE (indices.series_p (0, 6, 0, 10)); + ASSERT_FALSE (indices.series_p (0, 6, 0, 100)); + + ASSERT_FALSE (indices.series_p (1, 10, 3, 7)); + ASSERT_TRUE (indices.series_p (1, 10, 3, 8)); + + ASSERT_TRUE (indices.series_p (0, 12, 0, 10)); + ASSERT_TRUE (indices.series_p (0, 12, 0, 11)); + ASSERT_TRUE (indices.series_p (0, 12, 0, 100)); + } + + /* Run selftests for this file. */ + + void + vec_perm_indices_c_tests () + { + test_vec_perm_12 (); + } + + } // namespace selftest + + #endif Index: gcc/fold-const.c =================================================================== *** gcc/fold-const.c 2017-12-09 23:18:12.040041251 +0000 --- gcc/fold-const.c 2017-12-09 23:21:51.517599734 +0000 *************** fold_ternary_loc (location_t loc, enum t *** 11547,11645 **** case VEC_PERM_EXPR: if (TREE_CODE (arg2) == VECTOR_CST) { ! unsigned int nelts = VECTOR_CST_NELTS (arg2), i, mask, mask2; ! bool need_mask_canon = false; ! bool need_mask_canon2 = false; ! bool all_in_vec0 = true; ! bool all_in_vec1 = true; ! bool maybe_identity = true; ! bool single_arg = (op0 == op1); ! bool changed = false; ! ! mask2 = 2 * nelts - 1; ! mask = single_arg ? (nelts - 1) : mask2; ! gcc_assert (nelts == TYPE_VECTOR_SUBPARTS (type)); ! vec_perm_builder sel (nelts, nelts, 1); ! vec_perm_builder sel2 (nelts, nelts, 1); ! for (i = 0; i < nelts; i++) ! { ! tree val = VECTOR_CST_ELT (arg2, i); ! if (TREE_CODE (val) != INTEGER_CST) ! return NULL_TREE; ! ! /* Make sure that the perm value is in an acceptable ! range. */ ! wi::tree_to_wide_ref t = wi::to_wide (val); ! need_mask_canon |= wi::gtu_p (t, mask); ! need_mask_canon2 |= wi::gtu_p (t, mask2); ! unsigned int elt = t.to_uhwi () & mask; ! unsigned int elt2 = t.to_uhwi () & mask2; ! ! if (elt < nelts) ! all_in_vec1 = false; ! else ! all_in_vec0 = false; ! ! if ((elt & (nelts - 1)) != i) ! maybe_identity = false; ! ! sel.quick_push (elt); ! sel2.quick_push (elt2); ! } ! if (maybe_identity) ! { ! if (all_in_vec0) ! return op0; ! if (all_in_vec1) ! return op1; ! } ! if (all_in_vec0) ! op1 = op0; ! else if (all_in_vec1) ! { ! op0 = op1; ! for (i = 0; i < nelts; i++) ! sel[i] -= nelts; ! need_mask_canon = true; } - vec_perm_indices indices (sel, 2, nelts); if ((TREE_CODE (op0) == VECTOR_CST || TREE_CODE (op0) == CONSTRUCTOR) && (TREE_CODE (op1) == VECTOR_CST || TREE_CODE (op1) == CONSTRUCTOR)) { ! tree t = fold_vec_perm (type, op0, op1, indices); if (t != NULL_TREE) return t; } ! if (op0 == op1 && !single_arg) ! changed = true; ! /* Some targets are deficient and fail to expand a single ! argument permutation while still allowing an equivalent ! 2-argument version. */ ! if (need_mask_canon && arg2 == op2 ! && !can_vec_perm_const_p (TYPE_MODE (type), indices, false) ! && can_vec_perm_const_p (TYPE_MODE (type), ! vec_perm_indices (sel2, 2, nelts), ! false)) { ! need_mask_canon = need_mask_canon2; ! sel.truncate (0); ! sel.splice (sel2); ! } ! ! if (need_mask_canon && arg2 == op2) ! { ! tree eltype = TREE_TYPE (TREE_TYPE (arg2)); ! tree_vector_builder tsel (TREE_TYPE (arg2), nelts, 1); ! for (i = 0; i < nelts; i++) ! tsel.quick_push (build_int_cst (eltype, sel[i])); ! op2 = tsel.build (); changed = true; } --- 11547,11611 ---- case VEC_PERM_EXPR: if (TREE_CODE (arg2) == VECTOR_CST) { ! /* Build a vector of integers from the tree mask. */ ! vec_perm_builder builder; ! if (!tree_to_vec_perm_builder (&builder, arg2)) ! return NULL_TREE; ! /* Create a vec_perm_indices for the integer vector. */ ! unsigned int nelts = TYPE_VECTOR_SUBPARTS (type); ! bool single_arg = (op0 == op1); ! vec_perm_indices sel (builder, single_arg ? 1 : 2, nelts); ! /* Check for cases that fold to OP0 or OP1 in their original ! element order. */ ! if (sel.series_p (0, 1, 0, 1)) ! return op0; ! if (sel.series_p (0, 1, nelts, 1)) ! return op1; ! ! if (!single_arg) ! { ! if (sel.all_from_input_p (0)) ! op1 = op0; ! else if (sel.all_from_input_p (1)) ! { ! op0 = op1; ! sel.rotate_inputs (1); ! } } if ((TREE_CODE (op0) == VECTOR_CST || TREE_CODE (op0) == CONSTRUCTOR) && (TREE_CODE (op1) == VECTOR_CST || TREE_CODE (op1) == CONSTRUCTOR)) { ! tree t = fold_vec_perm (type, op0, op1, sel); if (t != NULL_TREE) return t; } ! bool changed = (op0 == op1 && !single_arg); ! /* Generate a canonical form of the selector. */ ! if (arg2 == op2 && sel.encoding () != builder) { ! /* Some targets are deficient and fail to expand a single ! argument permutation while still allowing an equivalent ! 2-argument version. */ ! if (sel.ninputs () == 2 ! || can_vec_perm_const_p (TYPE_MODE (type), sel, false)) ! op2 = vec_perm_indices_to_tree (TREE_TYPE (arg2), sel); ! else ! { ! vec_perm_indices sel2 (builder, 2, nelts); ! if (can_vec_perm_const_p (TYPE_MODE (type), sel2, false)) ! op2 = vec_perm_indices_to_tree (TREE_TYPE (arg2), sel2); ! else ! /* Not directly supported with either encoding, ! so use the preferred form. */ ! op2 = vec_perm_indices_to_tree (TREE_TYPE (arg2), sel); ! } changed = true; }
On Sun, Dec 10, 2017 at 12:06 AM, Richard Sandiford <richard.sandiford@linaro.org> wrote: > This series is a replacement for: > https://gcc.gnu.org/ml/gcc-patches/2017-11/msg00747.html > based on the feedback that using VEC_PERM_EXPR would be better. > > The changes are: > > (1) Remove the restriction that the selector elements have to have the > same width as the data elements, but only for constant selectors. > This lets through the cases we need without also allowing > potentially-expensive ops. Adding support for the variable > case can be done later if it seems useful, but it's not trivial. > > (2) Encode the integer form of constant selectors (vec_perm_indices) > in the same way as the new VECTOR_CST encoding, so that it can > cope with variable-length vectors. > > (3) Remove the vec_perm_const optab and reuse the target hook to emit > code. This avoids the need to create a CONST_VECTOR for the wide > selectors, and hence the need to have a corresponding wide vector > mode (which the target wouldn't otherwise need or support). Hmm. Makes sense I suppose. > (4) When handling the variable vec_perm optab, check that modes can store > all element indices before using them. > > (5) Unconditionally use ssizetype selector elements in permutes created > by the vectoriser. Why specifically _signed_ sizetype? That sounds like an odd choice. But I'll eventually see when looking at the patch. Does that mean we have a VNDImode vector unconditionally for the permute even though a vector matching the width of the data members would work? What happens if the target doesn't have vec_perm_const but vec_perm to handle all constant permutes? Going to look over the patches now. Thanks for (re-)doing the work. Richard. > (6) Make the AArch64 vec_perm_const handling handle variable-length vectors. > > Tested directly on trunk on aarch64-linux-gnu, x86_64-linux-gnu and > powerpc64le-linux-gnu. Also tested by comparing the before and after > assembly output for: > > arm-linux-gnueabi arm-linux-gnueabihf aarch64-linux-gnu > aarch64_be-linux-gnu ia64-linux-gnu i686-pc-linux-gnu > mipsisa64-linux-gnu mipsel-linux-gnu powerpc64-linux-gnu > powerpc64le-linux-gnu powerpc-eabispe x86_64-linux-gnu > sparc64-linux-gnu > > at -O3, which should cover all the ports that defined vec_perm_const. > The only difference was one instance of different RA for ia64-linux-gnu, > caused by using force_reg on a SUBREG that was previously used directly. > > OK to install? > > Thanks, > Richard
Richard Biener <richard.guenther@gmail.com> writes: > On Sun, Dec 10, 2017 at 12:06 AM, Richard Sandiford > <richard.sandiford@linaro.org> wrote: >> This series is a replacement for: >> https://gcc.gnu.org/ml/gcc-patches/2017-11/msg00747.html >> based on the feedback that using VEC_PERM_EXPR would be better. >> >> The changes are: >> >> (1) Remove the restriction that the selector elements have to have the >> same width as the data elements, but only for constant selectors. >> This lets through the cases we need without also allowing >> potentially-expensive ops. Adding support for the variable >> case can be done later if it seems useful, but it's not trivial. >> >> (2) Encode the integer form of constant selectors (vec_perm_indices) >> in the same way as the new VECTOR_CST encoding, so that it can >> cope with variable-length vectors. >> >> (3) Remove the vec_perm_const optab and reuse the target hook to emit >> code. This avoids the need to create a CONST_VECTOR for the wide >> selectors, and hence the need to have a corresponding wide vector >> mode (which the target wouldn't otherwise need or support). > > Hmm. Makes sense I suppose. > >> (4) When handling the variable vec_perm optab, check that modes can store >> all element indices before using them. >> >> (5) Unconditionally use ssizetype selector elements in permutes created >> by the vectoriser. > > Why specifically _signed_ sizetype? That sounds like an odd choice. But I'll > eventually see when looking at the patch. Sorry, should have said. The choice doesn't matter for vector lengths that are a power of 2, but for others, using a signed selector means that -1 always selects the last input element, whereas for unsigned selectors, the element selected by -1 would depend on the selector precision. (And the use of sizetype precision is pretty arbitrary.) > Does that mean we have a VNDImode vector unconditionally for the > permute even though a vector matching the width of the data members > would work? A VECTOR_CST of N DIs, yeah. It only becomes a VNDI at the rtl level if we're selecting 64-bit data elements. > What happens if the target doesn't have vec_perm_const but vec_perm to > handle all constant permutes? We'll try to represent the selector as VN?I, where ? matches the width of the data, but only after checking that that doesn't change the selector values. So for current targets we'll use vec_perm for constant permutes as before, but we wouldn't for 2-input V256QI interleaves. Thanks, Richard
On Tue, Dec 12, 2017 at 4:32 PM, Richard Sandiford <richard.sandiford@linaro.org> wrote: > Richard Biener <richard.guenther@gmail.com> writes: >> On Sun, Dec 10, 2017 at 12:06 AM, Richard Sandiford >> <richard.sandiford@linaro.org> wrote: >>> This series is a replacement for: >>> https://gcc.gnu.org/ml/gcc-patches/2017-11/msg00747.html >>> based on the feedback that using VEC_PERM_EXPR would be better. >>> >>> The changes are: >>> >>> (1) Remove the restriction that the selector elements have to have the >>> same width as the data elements, but only for constant selectors. >>> This lets through the cases we need without also allowing >>> potentially-expensive ops. Adding support for the variable >>> case can be done later if it seems useful, but it's not trivial. >>> >>> (2) Encode the integer form of constant selectors (vec_perm_indices) >>> in the same way as the new VECTOR_CST encoding, so that it can >>> cope with variable-length vectors. >>> >>> (3) Remove the vec_perm_const optab and reuse the target hook to emit >>> code. This avoids the need to create a CONST_VECTOR for the wide >>> selectors, and hence the need to have a corresponding wide vector >>> mode (which the target wouldn't otherwise need or support). >> >> Hmm. Makes sense I suppose. >> >>> (4) When handling the variable vec_perm optab, check that modes can store >>> all element indices before using them. >>> >>> (5) Unconditionally use ssizetype selector elements in permutes created >>> by the vectoriser. >> >> Why specifically _signed_ sizetype? That sounds like an odd choice. But I'll >> eventually see when looking at the patch. > > Sorry, should have said. The choice doesn't matter for vector lengths > that are a power of 2, which are the only ones we support anyway? > but for others, using a signed selector means that > -1 always selects the last input element, whereas for unsigned selectors, > the element selected by -1 would depend on the selector precision. (And the > use of sizetype precision is pretty arbitrary.) hmm, so you are saying that vec_perm <v1, v2, { -1, -2, ... }> is equal to vec_perm <v1, v2, {2*n-1, 2*n-2, ....}? tree.def defines VEC_PERM_EXPR via N = length(mask) foreach i in N: M = mask[i] % (2*N) A = M < N ? v0[M] : v1[M-N] which doesn't reflect this behavior. Does this behavior persist for variable vector permutations? > >> Does that mean we have a VNDImode vector unconditionally for the >> permute even though a vector matching the width of the data members >> would work? > > A VECTOR_CST of N DIs, yeah. It only becomes a VNDI at the rtl level > if we're selecting 64-bit data elements. And on GIMPLE? Do we have a vector type with ssizetype elements unconditionally? >> What happens if the target doesn't have vec_perm_const but vec_perm to >> handle all constant permutes? > > We'll try to represent the selector as VN?I, where ? matches the width > of the data, but only after checking that that doesn't change the > selector values. So for current targets we'll use vec_perm for constant > permutes as before, but we wouldn't for 2-input V256QI interleaves. I see. Richard. > Thanks, > Richard
Richard Biener <richard.guenther@gmail.com> writes: > On Tue, Dec 12, 2017 at 4:32 PM, Richard Sandiford > <richard.sandiford@linaro.org> wrote: >> Richard Biener <richard.guenther@gmail.com> writes: >>> On Sun, Dec 10, 2017 at 12:06 AM, Richard Sandiford >>> <richard.sandiford@linaro.org> wrote: >>>> This series is a replacement for: >>>> https://gcc.gnu.org/ml/gcc-patches/2017-11/msg00747.html >>>> based on the feedback that using VEC_PERM_EXPR would be better. >>>> >>>> The changes are: >>>> >>>> (1) Remove the restriction that the selector elements have to have the >>>> same width as the data elements, but only for constant selectors. >>>> This lets through the cases we need without also allowing >>>> potentially-expensive ops. Adding support for the variable >>>> case can be done later if it seems useful, but it's not trivial. >>>> >>>> (2) Encode the integer form of constant selectors (vec_perm_indices) >>>> in the same way as the new VECTOR_CST encoding, so that it can >>>> cope with variable-length vectors. >>>> >>>> (3) Remove the vec_perm_const optab and reuse the target hook to emit >>>> code. This avoids the need to create a CONST_VECTOR for the wide >>>> selectors, and hence the need to have a corresponding wide vector >>>> mode (which the target wouldn't otherwise need or support). >>> >>> Hmm. Makes sense I suppose. >>> >>>> (4) When handling the variable vec_perm optab, check that modes can store >>>> all element indices before using them. >>>> >>>> (5) Unconditionally use ssizetype selector elements in permutes created >>>> by the vectoriser. >>> >>> Why specifically _signed_ sizetype? That sounds like an odd choice. >>> But I'll >>> eventually see when looking at the patch. >> >> Sorry, should have said. The choice doesn't matter for vector lengths >> that are a power of 2, > > which are the only ones we support anyway? Yeah, for fixed-length at the tree level. The variable-length support allows (2^N)*X vectors for non-power-of-2 X though, and we support non-power-of-2 fixed-length vectors in RTL (e.g. V12QI). >> but for others, using a signed selector means that >> -1 always selects the last input element, whereas for unsigned selectors, >> the element selected by -1 would depend on the selector precision. (And the >> use of sizetype precision is pretty arbitrary.) > > hmm, so you are saying that vec_perm <v1, v2, { -1, -2, ... }> is equal > to vec_perm <v1, v2, {2*n-1, 2*n-2, ....}? Yeah. > tree.def defines VEC_PERM_EXPR via > > N = length(mask) > foreach i in N: > M = mask[i] % (2*N) > A = M < N ? v0[M] : v1[M-N] > > which doesn't reflect this behavior. Does this behavior persist for variable > vector permutations? __builtin_shuffle is defined to wrap though: The elements of the input vectors are numbered in memory ordering of @var{vec0} beginning at 0 and @var{vec1} beginning at @var{N}. The elements of @var{mask} are considered modulo @var{N} in the single-operand case and modulo @math{2*@var{N}} in the two-operand case. I think we need to preserve that for VEC_PERM_EXPR, otherwise we'd need to introduce the masking operation when lowering __builtin_shuffle to VEC_PERM_EXPR. >>> Does that mean we have a VNDImode vector unconditionally for the >>> permute even though a vector matching the width of the data members >>> would work? >> >> A VECTOR_CST of N DIs, yeah. It only becomes a VNDI at the rtl level >> if we're selecting 64-bit data elements. > > And on GIMPLE? Do we have a vector type with ssizetype elements > unconditionally? For autovectorised permutes, yes. Other permutes we keep the existing types. Thanks, Richard
Ping Richard Sandiford <richard.sandiford@linaro.org> writes: > This patch reworks the VEC_PERM_EXPR folding so that more of it works > for variable-length vectors. E.g. it means that we can now recognise > variable-length permutes that reduce to a single vector, or cases in > which a variable-length permute only needs one input. There should be > no functional change for fixed-length vectors. > > > 2017-12-09 Richard Sandiford <richard.sandiford@linaro.org> > > gcc/ > * selftest.h (selftest::vec_perm_indices_c_tests): Declare. > * selftest-run-tests.c (selftest::run_tests): Call it. > * vector-builder.h (vector_builder::operator ==): New function. > (vector_builder::operator !=): Likewise. > * vec-perm-indices.h (vec_perm_indices::series_p): Declare. > (vec_perm_indices::all_from_input_p): New function. > * vec-perm-indices.c (vec_perm_indices::series_p): Likewise. > (test_vec_perm_12, selftest::vec_perm_indices_c_tests): Likewise. > * fold-const.c (fold_ternary_loc): Use tree_to_vec_perm_builder > instead of reading the VECTOR_CST directly. Detect whether both > vector inputs are the same before constructing the vec_perm_indices, > and update the number of inputs argument accordingly. Use the > utility functions added above. Only construct sel2 if we need to. > > Index: gcc/selftest.h > =================================================================== > *** gcc/selftest.h 2017-12-09 23:06:55.002855594 +0000 > --- gcc/selftest.h 2017-12-09 23:21:51.517599734 +0000 > *************** extern void vec_c_tests (); > *** 201,206 **** > --- 201,207 ---- > extern void wide_int_cc_tests (); > extern void predict_c_tests (); > extern void simplify_rtx_c_tests (); > + extern void vec_perm_indices_c_tests (); > > extern int num_passes; > > Index: gcc/selftest-run-tests.c > =================================================================== > *** gcc/selftest-run-tests.c 2017-12-09 23:06:55.002855594 +0000 > --- gcc/selftest-run-tests.c 2017-12-09 23:21:51.517599734 +0000 > *************** selftest::run_tests () > *** 73,78 **** > --- 73,79 ---- > > /* Mid-level data structures. */ > input_c_tests (); > + vec_perm_indices_c_tests (); > tree_c_tests (); > gimple_c_tests (); > rtl_tests_c_tests (); > Index: gcc/vector-builder.h > =================================================================== > *** gcc/vector-builder.h 2017-12-09 23:06:55.002855594 +0000 > --- gcc/vector-builder.h 2017-12-09 23:21:51.518600090 +0000 > *************** #define GCC_VECTOR_BUILDER_H > *** 97,102 **** > --- 97,105 ---- > bool encoded_full_vector_p () const; > T elt (unsigned int) const; > > + bool operator == (const Derived &) const; > + bool operator != (const Derived &x) const { return !operator == (x); } > + > void finalize (); > > protected: > *************** vector_builder<T, Derived>::new_vector ( > *** 168,173 **** > --- 171,196 ---- > this->truncate (0); > } > > + /* Return true if this vector and OTHER have the same elements and > + are encoded in the same way. */ > + > + template<typename T, typename Derived> > + bool > + vector_builder<T, Derived>::operator == (const Derived &other) const > + { > + if (m_full_nelts != other.m_full_nelts > + || m_npatterns != other.m_npatterns > + || m_nelts_per_pattern != other.m_nelts_per_pattern) > + return false; > + > + unsigned int nelts = encoded_nelts (); > + for (unsigned int i = 0; i < nelts; ++i) > + if (!derived ()->equal_p ((*this)[i], other[i])) > + return false; > + > + return true; > + } > + > /* Return the value of vector element I, which might or might not be > encoded explicitly. */ > > Index: gcc/vec-perm-indices.h > =================================================================== > *** gcc/vec-perm-indices.h 2017-12-09 23:20:13.233112018 +0000 > --- gcc/vec-perm-indices.h 2017-12-09 23:21:51.517599734 +0000 > *************** typedef int_vector_builder<HOST_WIDE_INT > *** 62,68 **** > --- 62,70 ---- > > element_type clamp (element_type) const; > element_type operator[] (unsigned int i) const; > + bool series_p (unsigned int, unsigned int, element_type, element_type) const; > bool all_in_range_p (element_type, element_type) const; > + bool all_from_input_p (unsigned int) const; > > private: > vec_perm_indices (const vec_perm_indices &); > *************** vec_perm_indices::operator[] (unsigned i > *** 119,122 **** > --- 121,133 ---- > return clamp (m_encoding.elt (i)); > } > > + /* Return true if the permutation vector only selects elements from > + input I. */ > + > + inline bool > + vec_perm_indices::all_from_input_p (unsigned int i) const > + { > + return all_in_range_p (i * m_nelts_per_input, m_nelts_per_input); > + } > + > #endif > Index: gcc/vec-perm-indices.c > =================================================================== > *** gcc/vec-perm-indices.c 2017-12-09 23:20:13.233112018 +0000 > --- gcc/vec-perm-indices.c 2017-12-09 23:21:51.517599734 +0000 > *************** Software Foundation; either version 3, o > *** 28,33 **** > --- 28,34 ---- > #include "rtl.h" > #include "memmodel.h" > #include "emit-rtl.h" > + #include "selftest.h" > > /* Switch to a new permutation vector that selects between NINPUTS vector > inputs that have NELTS_PER_INPUT elements each. Take the elements of the > *************** vec_perm_indices::rotate_inputs (int del > *** 85,90 **** > --- 86,139 ---- > m_encoding[i] = clamp (m_encoding[i] + element_delta); > } > > + /* Return true if index OUT_BASE + I * OUT_STEP selects input > + element IN_BASE + I * IN_STEP. */ > + > + bool > + vec_perm_indices::series_p (unsigned int out_base, unsigned int out_step, > + element_type in_base, element_type in_step) const > + { > + /* Check the base value. */ > + if (clamp (m_encoding.elt (out_base)) != clamp (in_base)) > + return false; > + > + unsigned int full_nelts = m_encoding.full_nelts (); > + unsigned int npatterns = m_encoding.npatterns (); > + > + /* Calculate which multiple of OUT_STEP elements we need to get > + back to the same pattern. */ > + unsigned int cycle_length = least_common_multiple (out_step, npatterns); > + > + /* Check the steps. */ > + in_step = clamp (in_step); > + out_base += out_step; > + unsigned int limit = 0; > + for (;;) > + { > + /* Succeed if we've checked all the elements in the vector. */ > + if (out_base >= full_nelts) > + return true; > + > + if (out_base >= npatterns) > + { > + /* We've got to the end of the "foreground" values. Check > + 2 elements from each pattern in the "background" values. */ > + if (limit == 0) > + limit = out_base + cycle_length * 2; > + else if (out_base >= limit) > + return true; > + } > + > + element_type v0 = m_encoding.elt (out_base - out_step); > + element_type v1 = m_encoding.elt (out_base); > + if (clamp (v1 - v0) != in_step) > + return false; > + > + out_base += out_step; > + } > + return true; > + } > + > /* Return true if all elements of the permutation vector are in the range > [START, START + SIZE). */ > > *************** vec_perm_indices_to_rtx (machine_mode mo > *** 180,182 **** > --- 229,280 ---- > RTVEC_ELT (v, i) = gen_int_mode (indices[i], GET_MODE_INNER (mode)); > return gen_rtx_CONST_VECTOR (mode, v); > } > + > + #if CHECKING_P > + > + namespace selftest { > + > + /* Test a 12-element vector. */ > + > + static void > + test_vec_perm_12 (void) > + { > + vec_perm_builder builder (12, 12, 1); > + for (unsigned int i = 0; i < 4; ++i) > + { > + builder.quick_push (i * 5); > + builder.quick_push (3 + i); > + builder.quick_push (2 + 3 * i); > + } > + vec_perm_indices indices (builder, 1, 12); > + ASSERT_TRUE (indices.series_p (0, 3, 0, 5)); > + ASSERT_FALSE (indices.series_p (0, 3, 3, 5)); > + ASSERT_FALSE (indices.series_p (0, 3, 0, 8)); > + ASSERT_TRUE (indices.series_p (1, 3, 3, 1)); > + ASSERT_TRUE (indices.series_p (2, 3, 2, 3)); > + > + ASSERT_TRUE (indices.series_p (0, 4, 0, 4)); > + ASSERT_FALSE (indices.series_p (1, 4, 3, 4)); > + > + ASSERT_TRUE (indices.series_p (0, 6, 0, 10)); > + ASSERT_FALSE (indices.series_p (0, 6, 0, 100)); > + > + ASSERT_FALSE (indices.series_p (1, 10, 3, 7)); > + ASSERT_TRUE (indices.series_p (1, 10, 3, 8)); > + > + ASSERT_TRUE (indices.series_p (0, 12, 0, 10)); > + ASSERT_TRUE (indices.series_p (0, 12, 0, 11)); > + ASSERT_TRUE (indices.series_p (0, 12, 0, 100)); > + } > + > + /* Run selftests for this file. */ > + > + void > + vec_perm_indices_c_tests () > + { > + test_vec_perm_12 (); > + } > + > + } // namespace selftest > + > + #endif > Index: gcc/fold-const.c > =================================================================== > *** gcc/fold-const.c 2017-12-09 23:18:12.040041251 +0000 > --- gcc/fold-const.c 2017-12-09 23:21:51.517599734 +0000 > *************** fold_ternary_loc (location_t loc, enum t > *** 11547,11645 **** > case VEC_PERM_EXPR: > if (TREE_CODE (arg2) == VECTOR_CST) > { > ! unsigned int nelts = VECTOR_CST_NELTS (arg2), i, mask, mask2; > ! bool need_mask_canon = false; > ! bool need_mask_canon2 = false; > ! bool all_in_vec0 = true; > ! bool all_in_vec1 = true; > ! bool maybe_identity = true; > ! bool single_arg = (op0 == op1); > ! bool changed = false; > ! > ! mask2 = 2 * nelts - 1; > ! mask = single_arg ? (nelts - 1) : mask2; > ! gcc_assert (nelts == TYPE_VECTOR_SUBPARTS (type)); > ! vec_perm_builder sel (nelts, nelts, 1); > ! vec_perm_builder sel2 (nelts, nelts, 1); > ! for (i = 0; i < nelts; i++) > ! { > ! tree val = VECTOR_CST_ELT (arg2, i); > ! if (TREE_CODE (val) != INTEGER_CST) > ! return NULL_TREE; > ! > ! /* Make sure that the perm value is in an acceptable > ! range. */ > ! wi::tree_to_wide_ref t = wi::to_wide (val); > ! need_mask_canon |= wi::gtu_p (t, mask); > ! need_mask_canon2 |= wi::gtu_p (t, mask2); > ! unsigned int elt = t.to_uhwi () & mask; > ! unsigned int elt2 = t.to_uhwi () & mask2; > ! > ! if (elt < nelts) > ! all_in_vec1 = false; > ! else > ! all_in_vec0 = false; > ! > ! if ((elt & (nelts - 1)) != i) > ! maybe_identity = false; > ! > ! sel.quick_push (elt); > ! sel2.quick_push (elt2); > ! } > > ! if (maybe_identity) > ! { > ! if (all_in_vec0) > ! return op0; > ! if (all_in_vec1) > ! return op1; > ! } > > ! if (all_in_vec0) > ! op1 = op0; > ! else if (all_in_vec1) > ! { > ! op0 = op1; > ! for (i = 0; i < nelts; i++) > ! sel[i] -= nelts; > ! need_mask_canon = true; > } > > - vec_perm_indices indices (sel, 2, nelts); > if ((TREE_CODE (op0) == VECTOR_CST > || TREE_CODE (op0) == CONSTRUCTOR) > && (TREE_CODE (op1) == VECTOR_CST > || TREE_CODE (op1) == CONSTRUCTOR)) > { > ! tree t = fold_vec_perm (type, op0, op1, indices); > if (t != NULL_TREE) > return t; > } > > ! if (op0 == op1 && !single_arg) > ! changed = true; > > ! /* Some targets are deficient and fail to expand a single > ! argument permutation while still allowing an equivalent > ! 2-argument version. */ > ! if (need_mask_canon && arg2 == op2 > ! && !can_vec_perm_const_p (TYPE_MODE (type), indices, false) > ! && can_vec_perm_const_p (TYPE_MODE (type), > ! vec_perm_indices (sel2, 2, nelts), > ! false)) > { > ! need_mask_canon = need_mask_canon2; > ! sel.truncate (0); > ! sel.splice (sel2); > ! } > ! > ! if (need_mask_canon && arg2 == op2) > ! { > ! tree eltype = TREE_TYPE (TREE_TYPE (arg2)); > ! tree_vector_builder tsel (TREE_TYPE (arg2), nelts, 1); > ! for (i = 0; i < nelts; i++) > ! tsel.quick_push (build_int_cst (eltype, sel[i])); > ! op2 = tsel.build (); > changed = true; > } > > --- 11547,11611 ---- > case VEC_PERM_EXPR: > if (TREE_CODE (arg2) == VECTOR_CST) > { > ! /* Build a vector of integers from the tree mask. */ > ! vec_perm_builder builder; > ! if (!tree_to_vec_perm_builder (&builder, arg2)) > ! return NULL_TREE; > > ! /* Create a vec_perm_indices for the integer vector. */ > ! unsigned int nelts = TYPE_VECTOR_SUBPARTS (type); > ! bool single_arg = (op0 == op1); > ! vec_perm_indices sel (builder, single_arg ? 1 : 2, nelts); > > ! /* Check for cases that fold to OP0 or OP1 in their original > ! element order. */ > ! if (sel.series_p (0, 1, 0, 1)) > ! return op0; > ! if (sel.series_p (0, 1, nelts, 1)) > ! return op1; > ! > ! if (!single_arg) > ! { > ! if (sel.all_from_input_p (0)) > ! op1 = op0; > ! else if (sel.all_from_input_p (1)) > ! { > ! op0 = op1; > ! sel.rotate_inputs (1); > ! } > } > > if ((TREE_CODE (op0) == VECTOR_CST > || TREE_CODE (op0) == CONSTRUCTOR) > && (TREE_CODE (op1) == VECTOR_CST > || TREE_CODE (op1) == CONSTRUCTOR)) > { > ! tree t = fold_vec_perm (type, op0, op1, sel); > if (t != NULL_TREE) > return t; > } > > ! bool changed = (op0 == op1 && !single_arg); > > ! /* Generate a canonical form of the selector. */ > ! if (arg2 == op2 && sel.encoding () != builder) > { > ! /* Some targets are deficient and fail to expand a single > ! argument permutation while still allowing an equivalent > ! 2-argument version. */ > ! if (sel.ninputs () == 2 > ! || can_vec_perm_const_p (TYPE_MODE (type), sel, false)) > ! op2 = vec_perm_indices_to_tree (TREE_TYPE (arg2), sel); > ! else > ! { > ! vec_perm_indices sel2 (builder, 2, nelts); > ! if (can_vec_perm_const_p (TYPE_MODE (type), sel2, false)) > ! op2 = vec_perm_indices_to_tree (TREE_TYPE (arg2), sel2); > ! else > ! /* Not directly supported with either encoding, > ! so use the preferred form. */ > ! op2 = vec_perm_indices_to_tree (TREE_TYPE (arg2), sel); > ! } > changed = true; > } >
On Sun, Dec 10, 2017 at 12:23 AM, Richard Sandiford <richard.sandiford@linaro.org> wrote: > This patch reworks the VEC_PERM_EXPR folding so that more of it works > for variable-length vectors. E.g. it means that we can now recognise > variable-length permutes that reduce to a single vector, or cases in > which a variable-length permute only needs one input. There should be > no functional change for fixed-length vectors. Ok. Richard. > > 2017-12-09 Richard Sandiford <richard.sandiford@linaro.org> > > gcc/ > * selftest.h (selftest::vec_perm_indices_c_tests): Declare. > * selftest-run-tests.c (selftest::run_tests): Call it. > * vector-builder.h (vector_builder::operator ==): New function. > (vector_builder::operator !=): Likewise. > * vec-perm-indices.h (vec_perm_indices::series_p): Declare. > (vec_perm_indices::all_from_input_p): New function. > * vec-perm-indices.c (vec_perm_indices::series_p): Likewise. > (test_vec_perm_12, selftest::vec_perm_indices_c_tests): Likewise. > * fold-const.c (fold_ternary_loc): Use tree_to_vec_perm_builder > instead of reading the VECTOR_CST directly. Detect whether both > vector inputs are the same before constructing the vec_perm_indices, > and update the number of inputs argument accordingly. Use the > utility functions added above. Only construct sel2 if we need to. > > Index: gcc/selftest.h > =================================================================== > *** gcc/selftest.h 2017-12-09 23:06:55.002855594 +0000 > --- gcc/selftest.h 2017-12-09 23:21:51.517599734 +0000 > *************** extern void vec_c_tests (); > *** 201,206 **** > --- 201,207 ---- > extern void wide_int_cc_tests (); > extern void predict_c_tests (); > extern void simplify_rtx_c_tests (); > + extern void vec_perm_indices_c_tests (); > > extern int num_passes; > > Index: gcc/selftest-run-tests.c > =================================================================== > *** gcc/selftest-run-tests.c 2017-12-09 23:06:55.002855594 +0000 > --- gcc/selftest-run-tests.c 2017-12-09 23:21:51.517599734 +0000 > *************** selftest::run_tests () > *** 73,78 **** > --- 73,79 ---- > > /* Mid-level data structures. */ > input_c_tests (); > + vec_perm_indices_c_tests (); > tree_c_tests (); > gimple_c_tests (); > rtl_tests_c_tests (); > Index: gcc/vector-builder.h > =================================================================== > *** gcc/vector-builder.h 2017-12-09 23:06:55.002855594 +0000 > --- gcc/vector-builder.h 2017-12-09 23:21:51.518600090 +0000 > *************** #define GCC_VECTOR_BUILDER_H > *** 97,102 **** > --- 97,105 ---- > bool encoded_full_vector_p () const; > T elt (unsigned int) const; > > + bool operator == (const Derived &) const; > + bool operator != (const Derived &x) const { return !operator == (x); } > + > void finalize (); > > protected: > *************** vector_builder<T, Derived>::new_vector ( > *** 168,173 **** > --- 171,196 ---- > this->truncate (0); > } > > + /* Return true if this vector and OTHER have the same elements and > + are encoded in the same way. */ > + > + template<typename T, typename Derived> > + bool > + vector_builder<T, Derived>::operator == (const Derived &other) const > + { > + if (m_full_nelts != other.m_full_nelts > + || m_npatterns != other.m_npatterns > + || m_nelts_per_pattern != other.m_nelts_per_pattern) > + return false; > + > + unsigned int nelts = encoded_nelts (); > + for (unsigned int i = 0; i < nelts; ++i) > + if (!derived ()->equal_p ((*this)[i], other[i])) > + return false; > + > + return true; > + } > + > /* Return the value of vector element I, which might or might not be > encoded explicitly. */ > > Index: gcc/vec-perm-indices.h > =================================================================== > *** gcc/vec-perm-indices.h 2017-12-09 23:20:13.233112018 +0000 > --- gcc/vec-perm-indices.h 2017-12-09 23:21:51.517599734 +0000 > *************** typedef int_vector_builder<HOST_WIDE_INT > *** 62,68 **** > --- 62,70 ---- > > element_type clamp (element_type) const; > element_type operator[] (unsigned int i) const; > + bool series_p (unsigned int, unsigned int, element_type, element_type) const; > bool all_in_range_p (element_type, element_type) const; > + bool all_from_input_p (unsigned int) const; > > private: > vec_perm_indices (const vec_perm_indices &); > *************** vec_perm_indices::operator[] (unsigned i > *** 119,122 **** > --- 121,133 ---- > return clamp (m_encoding.elt (i)); > } > > + /* Return true if the permutation vector only selects elements from > + input I. */ > + > + inline bool > + vec_perm_indices::all_from_input_p (unsigned int i) const > + { > + return all_in_range_p (i * m_nelts_per_input, m_nelts_per_input); > + } > + > #endif > Index: gcc/vec-perm-indices.c > =================================================================== > *** gcc/vec-perm-indices.c 2017-12-09 23:20:13.233112018 +0000 > --- gcc/vec-perm-indices.c 2017-12-09 23:21:51.517599734 +0000 > *************** Software Foundation; either version 3, o > *** 28,33 **** > --- 28,34 ---- > #include "rtl.h" > #include "memmodel.h" > #include "emit-rtl.h" > + #include "selftest.h" > > /* Switch to a new permutation vector that selects between NINPUTS vector > inputs that have NELTS_PER_INPUT elements each. Take the elements of the > *************** vec_perm_indices::rotate_inputs (int del > *** 85,90 **** > --- 86,139 ---- > m_encoding[i] = clamp (m_encoding[i] + element_delta); > } > > + /* Return true if index OUT_BASE + I * OUT_STEP selects input > + element IN_BASE + I * IN_STEP. */ > + > + bool > + vec_perm_indices::series_p (unsigned int out_base, unsigned int out_step, > + element_type in_base, element_type in_step) const > + { > + /* Check the base value. */ > + if (clamp (m_encoding.elt (out_base)) != clamp (in_base)) > + return false; > + > + unsigned int full_nelts = m_encoding.full_nelts (); > + unsigned int npatterns = m_encoding.npatterns (); > + > + /* Calculate which multiple of OUT_STEP elements we need to get > + back to the same pattern. */ > + unsigned int cycle_length = least_common_multiple (out_step, npatterns); > + > + /* Check the steps. */ > + in_step = clamp (in_step); > + out_base += out_step; > + unsigned int limit = 0; > + for (;;) > + { > + /* Succeed if we've checked all the elements in the vector. */ > + if (out_base >= full_nelts) > + return true; > + > + if (out_base >= npatterns) > + { > + /* We've got to the end of the "foreground" values. Check > + 2 elements from each pattern in the "background" values. */ > + if (limit == 0) > + limit = out_base + cycle_length * 2; > + else if (out_base >= limit) > + return true; > + } > + > + element_type v0 = m_encoding.elt (out_base - out_step); > + element_type v1 = m_encoding.elt (out_base); > + if (clamp (v1 - v0) != in_step) > + return false; > + > + out_base += out_step; > + } > + return true; > + } > + > /* Return true if all elements of the permutation vector are in the range > [START, START + SIZE). */ > > *************** vec_perm_indices_to_rtx (machine_mode mo > *** 180,182 **** > --- 229,280 ---- > RTVEC_ELT (v, i) = gen_int_mode (indices[i], GET_MODE_INNER (mode)); > return gen_rtx_CONST_VECTOR (mode, v); > } > + > + #if CHECKING_P > + > + namespace selftest { > + > + /* Test a 12-element vector. */ > + > + static void > + test_vec_perm_12 (void) > + { > + vec_perm_builder builder (12, 12, 1); > + for (unsigned int i = 0; i < 4; ++i) > + { > + builder.quick_push (i * 5); > + builder.quick_push (3 + i); > + builder.quick_push (2 + 3 * i); > + } > + vec_perm_indices indices (builder, 1, 12); > + ASSERT_TRUE (indices.series_p (0, 3, 0, 5)); > + ASSERT_FALSE (indices.series_p (0, 3, 3, 5)); > + ASSERT_FALSE (indices.series_p (0, 3, 0, 8)); > + ASSERT_TRUE (indices.series_p (1, 3, 3, 1)); > + ASSERT_TRUE (indices.series_p (2, 3, 2, 3)); > + > + ASSERT_TRUE (indices.series_p (0, 4, 0, 4)); > + ASSERT_FALSE (indices.series_p (1, 4, 3, 4)); > + > + ASSERT_TRUE (indices.series_p (0, 6, 0, 10)); > + ASSERT_FALSE (indices.series_p (0, 6, 0, 100)); > + > + ASSERT_FALSE (indices.series_p (1, 10, 3, 7)); > + ASSERT_TRUE (indices.series_p (1, 10, 3, 8)); > + > + ASSERT_TRUE (indices.series_p (0, 12, 0, 10)); > + ASSERT_TRUE (indices.series_p (0, 12, 0, 11)); > + ASSERT_TRUE (indices.series_p (0, 12, 0, 100)); > + } > + > + /* Run selftests for this file. */ > + > + void > + vec_perm_indices_c_tests () > + { > + test_vec_perm_12 (); > + } > + > + } // namespace selftest > + > + #endif > Index: gcc/fold-const.c > =================================================================== > *** gcc/fold-const.c 2017-12-09 23:18:12.040041251 +0000 > --- gcc/fold-const.c 2017-12-09 23:21:51.517599734 +0000 > *************** fold_ternary_loc (location_t loc, enum t > *** 11547,11645 **** > case VEC_PERM_EXPR: > if (TREE_CODE (arg2) == VECTOR_CST) > { > ! unsigned int nelts = VECTOR_CST_NELTS (arg2), i, mask, mask2; > ! bool need_mask_canon = false; > ! bool need_mask_canon2 = false; > ! bool all_in_vec0 = true; > ! bool all_in_vec1 = true; > ! bool maybe_identity = true; > ! bool single_arg = (op0 == op1); > ! bool changed = false; > ! > ! mask2 = 2 * nelts - 1; > ! mask = single_arg ? (nelts - 1) : mask2; > ! gcc_assert (nelts == TYPE_VECTOR_SUBPARTS (type)); > ! vec_perm_builder sel (nelts, nelts, 1); > ! vec_perm_builder sel2 (nelts, nelts, 1); > ! for (i = 0; i < nelts; i++) > ! { > ! tree val = VECTOR_CST_ELT (arg2, i); > ! if (TREE_CODE (val) != INTEGER_CST) > ! return NULL_TREE; > ! > ! /* Make sure that the perm value is in an acceptable > ! range. */ > ! wi::tree_to_wide_ref t = wi::to_wide (val); > ! need_mask_canon |= wi::gtu_p (t, mask); > ! need_mask_canon2 |= wi::gtu_p (t, mask2); > ! unsigned int elt = t.to_uhwi () & mask; > ! unsigned int elt2 = t.to_uhwi () & mask2; > ! > ! if (elt < nelts) > ! all_in_vec1 = false; > ! else > ! all_in_vec0 = false; > ! > ! if ((elt & (nelts - 1)) != i) > ! maybe_identity = false; > ! > ! sel.quick_push (elt); > ! sel2.quick_push (elt2); > ! } > > ! if (maybe_identity) > ! { > ! if (all_in_vec0) > ! return op0; > ! if (all_in_vec1) > ! return op1; > ! } > > ! if (all_in_vec0) > ! op1 = op0; > ! else if (all_in_vec1) > ! { > ! op0 = op1; > ! for (i = 0; i < nelts; i++) > ! sel[i] -= nelts; > ! need_mask_canon = true; > } > > - vec_perm_indices indices (sel, 2, nelts); > if ((TREE_CODE (op0) == VECTOR_CST > || TREE_CODE (op0) == CONSTRUCTOR) > && (TREE_CODE (op1) == VECTOR_CST > || TREE_CODE (op1) == CONSTRUCTOR)) > { > ! tree t = fold_vec_perm (type, op0, op1, indices); > if (t != NULL_TREE) > return t; > } > > ! if (op0 == op1 && !single_arg) > ! changed = true; > > ! /* Some targets are deficient and fail to expand a single > ! argument permutation while still allowing an equivalent > ! 2-argument version. */ > ! if (need_mask_canon && arg2 == op2 > ! && !can_vec_perm_const_p (TYPE_MODE (type), indices, false) > ! && can_vec_perm_const_p (TYPE_MODE (type), > ! vec_perm_indices (sel2, 2, nelts), > ! false)) > { > ! need_mask_canon = need_mask_canon2; > ! sel.truncate (0); > ! sel.splice (sel2); > ! } > ! > ! if (need_mask_canon && arg2 == op2) > ! { > ! tree eltype = TREE_TYPE (TREE_TYPE (arg2)); > ! tree_vector_builder tsel (TREE_TYPE (arg2), nelts, 1); > ! for (i = 0; i < nelts; i++) > ! tsel.quick_push (build_int_cst (eltype, sel[i])); > ! op2 = tsel.build (); > changed = true; > } > > --- 11547,11611 ---- > case VEC_PERM_EXPR: > if (TREE_CODE (arg2) == VECTOR_CST) > { > ! /* Build a vector of integers from the tree mask. */ > ! vec_perm_builder builder; > ! if (!tree_to_vec_perm_builder (&builder, arg2)) > ! return NULL_TREE; > > ! /* Create a vec_perm_indices for the integer vector. */ > ! unsigned int nelts = TYPE_VECTOR_SUBPARTS (type); > ! bool single_arg = (op0 == op1); > ! vec_perm_indices sel (builder, single_arg ? 1 : 2, nelts); > > ! /* Check for cases that fold to OP0 or OP1 in their original > ! element order. */ > ! if (sel.series_p (0, 1, 0, 1)) > ! return op0; > ! if (sel.series_p (0, 1, nelts, 1)) > ! return op1; > ! > ! if (!single_arg) > ! { > ! if (sel.all_from_input_p (0)) > ! op1 = op0; > ! else if (sel.all_from_input_p (1)) > ! { > ! op0 = op1; > ! sel.rotate_inputs (1); > ! } > } > > if ((TREE_CODE (op0) == VECTOR_CST > || TREE_CODE (op0) == CONSTRUCTOR) > && (TREE_CODE (op1) == VECTOR_CST > || TREE_CODE (op1) == CONSTRUCTOR)) > { > ! tree t = fold_vec_perm (type, op0, op1, sel); > if (t != NULL_TREE) > return t; > } > > ! bool changed = (op0 == op1 && !single_arg); > > ! /* Generate a canonical form of the selector. */ > ! if (arg2 == op2 && sel.encoding () != builder) > { > ! /* Some targets are deficient and fail to expand a single > ! argument permutation while still allowing an equivalent > ! 2-argument version. */ > ! if (sel.ninputs () == 2 > ! || can_vec_perm_const_p (TYPE_MODE (type), sel, false)) > ! op2 = vec_perm_indices_to_tree (TREE_TYPE (arg2), sel); > ! else > ! { > ! vec_perm_indices sel2 (builder, 2, nelts); > ! if (can_vec_perm_const_p (TYPE_MODE (type), sel2, false)) > ! op2 = vec_perm_indices_to_tree (TREE_TYPE (arg2), sel2); > ! else > ! /* Not directly supported with either encoding, > ! so use the preferred form. */ > ! op2 = vec_perm_indices_to_tree (TREE_TYPE (arg2), sel); > ! } > changed = true; > } >