Message ID | 87a81mr8th.fsf@linaro.org |
---|---|
State | New |
Headers | show |
Series | Update interface to TARGET_VECTORIZE_VEC_PERM_CONST_OK | expand |
On Fri, Sep 22, 2017 at 6:34 PM, Richard Sandiford <richard.sandiford@linaro.org> wrote: > This patch makes TARGET_VECTORIZE_VEC_PERM_CONST_OK take the permute > vector in the form of a vec_perm_indices instead of an unsigned char *. > It follows on from the recent patch that did the same in target-independent > code. > > It was easy to make ARM and AArch64 use vec_perm_indices internally > as well, and converting AArch64 helps with SVE. I did try doing the same > for the other ports, but the surgery needed was much more invasive and > much less obviously correct. > > Tested on aarch64-linux-gnu, x86_64-linux-gnu and powerpc64le-linux-gnu. > Also tested by comparing the testsuite assembly output on at least one > target per CPU directory. OK to install? Ok. Thanks, Richard. > Richard > > > 2017-09-22 Richard Sandiford <richard.sandifird@linaro.org> > > gcc/ > * target.def (vec_perm_const_ok): Change sel parameter to > vec_perm_indices. > * optabs-query.c (can_vec_perm_p): Update accordingly. > * doc/tm.texi: Regenerate. > * config/aarch64/aarch64.c (expand_vec_perm_d): Change perm to > auto_vec_perm_indices and remove separate nelt field. > (aarch64_evpc_trn, aarch64_evpc_uzp, aarch64_evpc_zip) > (aarch64_evpc_ext, aarch64_evpc_rev, aarch64_evpc_dup) > (aarch64_evpc_tbl, aarch64_expand_vec_perm_const_1) > (aarch64_expand_vec_perm_const): Update accordingly. > (aarch64_vectorize_vec_perm_const_ok): Likewise. Change sel > to vec_perm_indices. > * config/arm/arm.c (expand_vec_perm_d): Change perm to > auto_vec_perm_indices and remove separate nelt field. > (arm_evpc_neon_vuzp, arm_evpc_neon_vzip, arm_evpc_neon_vrev) > (arm_evpc_neon_vtrn, arm_evpc_neon_vext, arm_evpc_neon_vtbl) > (arm_expand_vec_perm_const_1, arm_expand_vec_perm_const): Update > accordingly. > (arm_vectorize_vec_perm_const_ok): Likewise. Change sel > to vec_perm_indices. > * config/i386/i386.c (ix86_vectorize_vec_perm_const_ok): Change > sel to vec_perm_indices. > * config/ia64/ia64.c (ia64_vectorize_vec_perm_const_ok): Likewise. > * config/mips/mips.c (mips_vectorize_vec_perm_const_ok): Likewise. > * config/powerpcspe/powerpcspe.c (rs6000_vectorize_vec_perm_const_ok): > Likewise. > * config/rs6000/rs6000.c (rs6000_vectorize_vec_perm_const_ok): > Likewise. > > Index: gcc/target.def > =================================================================== > --- gcc/target.def 2017-09-22 17:31:36.935337179 +0100 > +++ gcc/target.def 2017-09-22 17:31:56.428954480 +0100 > @@ -1847,7 +1847,7 @@ DEFHOOK > DEFHOOK > (vec_perm_const_ok, > "Return true if a vector created for @code{vec_perm_const} is valid.", > - bool, (machine_mode, const unsigned char *sel), > + bool, (machine_mode, vec_perm_indices), > NULL) > > /* Return true if the target supports misaligned store/load of a > Index: gcc/optabs-query.c > =================================================================== > --- gcc/optabs-query.c 2017-09-14 17:04:19.080694343 +0100 > +++ gcc/optabs-query.c 2017-09-22 17:31:56.428006577 +0100 > @@ -367,7 +367,7 @@ can_vec_perm_p (machine_mode mode, bool > if (direct_optab_handler (vec_perm_const_optab, mode) != CODE_FOR_nothing > && (sel == NULL > || targetm.vectorize.vec_perm_const_ok == NULL > - || targetm.vectorize.vec_perm_const_ok (mode, &(*sel)[0]))) > + || targetm.vectorize.vec_perm_const_ok (mode, *sel))) > return true; > } > > Index: gcc/doc/tm.texi > =================================================================== > --- gcc/doc/tm.texi 2017-09-22 17:31:36.933441374 +0100 > +++ gcc/doc/tm.texi 2017-09-22 17:31:56.428006577 +0100 > @@ -5774,7 +5774,7 @@ correct for most targets. > Return true if vector alignment is reachable (by peeling N iterations) for the given scalar type @var{type}. @var{is_packed} is false if the scalar access using @var{type} is known to be naturally aligned. > @end deftypefn > > -@deftypefn {Target Hook} bool TARGET_VECTORIZE_VEC_PERM_CONST_OK (machine_mode, const unsigned char *@var{sel}) > +@deftypefn {Target Hook} bool TARGET_VECTORIZE_VEC_PERM_CONST_OK (machine_mode, @var{vec_perm_indices}) > Return true if a vector created for @code{vec_perm_const} is valid. > @end deftypefn > > Index: gcc/config/aarch64/aarch64.c > =================================================================== > --- gcc/config/aarch64/aarch64.c 2017-09-21 11:53:16.681759682 +0100 > +++ gcc/config/aarch64/aarch64.c 2017-09-22 17:31:56.412840135 +0100 > @@ -141,8 +141,8 @@ static void aarch64_elf_asm_constructor > static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED; > static void aarch64_override_options_after_change (void); > static bool aarch64_vector_mode_supported_p (machine_mode); > -static bool aarch64_vectorize_vec_perm_const_ok (machine_mode vmode, > - const unsigned char *sel); > +static bool aarch64_vectorize_vec_perm_const_ok (machine_mode, > + vec_perm_indices); > static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool); > static bool aarch64_builtin_support_vector_misalignment (machine_mode mode, > const_tree type, > @@ -13146,9 +13146,8 @@ #define MAX_VECT_LEN 16 > struct expand_vec_perm_d > { > rtx target, op0, op1; > - unsigned char perm[MAX_VECT_LEN]; > + auto_vec_perm_indices perm; > machine_mode vmode; > - unsigned char nelt; > bool one_vector_p; > bool testing_p; > }; > @@ -13231,7 +13230,7 @@ aarch64_expand_vec_perm (rtx target, rtx > static bool > aarch64_evpc_trn (struct expand_vec_perm_d *d) > { > - unsigned int i, odd, mask, nelt = d->nelt; > + unsigned int i, odd, mask, nelt = d->perm.length (); > rtx out, in0, in1, x; > rtx (*gen) (rtx, rtx, rtx); > machine_mode vmode = d->vmode; > @@ -13319,7 +13318,7 @@ aarch64_evpc_trn (struct expand_vec_perm > static bool > aarch64_evpc_uzp (struct expand_vec_perm_d *d) > { > - unsigned int i, odd, mask, nelt = d->nelt; > + unsigned int i, odd, mask, nelt = d->perm.length (); > rtx out, in0, in1, x; > rtx (*gen) (rtx, rtx, rtx); > machine_mode vmode = d->vmode; > @@ -13406,7 +13405,7 @@ aarch64_evpc_uzp (struct expand_vec_perm > static bool > aarch64_evpc_zip (struct expand_vec_perm_d *d) > { > - unsigned int i, high, mask, nelt = d->nelt; > + unsigned int i, high, mask, nelt = d->perm.length (); > rtx out, in0, in1, x; > rtx (*gen) (rtx, rtx, rtx); > machine_mode vmode = d->vmode; > @@ -13499,7 +13498,7 @@ aarch64_evpc_zip (struct expand_vec_perm > static bool > aarch64_evpc_ext (struct expand_vec_perm_d *d) > { > - unsigned int i, nelt = d->nelt; > + unsigned int i, nelt = d->perm.length (); > rtx (*gen) (rtx, rtx, rtx, rtx); > rtx offset; > > @@ -13563,7 +13562,7 @@ aarch64_evpc_ext (struct expand_vec_perm > static bool > aarch64_evpc_rev (struct expand_vec_perm_d *d) > { > - unsigned int i, j, diff, nelt = d->nelt; > + unsigned int i, j, diff, nelt = d->perm.length (); > rtx (*gen) (rtx, rtx); > > if (!d->one_vector_p) > @@ -13641,7 +13640,7 @@ aarch64_evpc_dup (struct expand_vec_perm > rtx out = d->target; > rtx in0; > machine_mode vmode = d->vmode; > - unsigned int i, elt, nelt = d->nelt; > + unsigned int i, elt, nelt = d->perm.length (); > rtx lane; > > elt = d->perm[0]; > @@ -13686,7 +13685,7 @@ aarch64_evpc_tbl (struct expand_vec_perm > { > rtx rperm[MAX_VECT_LEN], sel; > machine_mode vmode = d->vmode; > - unsigned int i, nelt = d->nelt; > + unsigned int i, nelt = d->perm.length (); > > if (d->testing_p) > return true; > @@ -13720,12 +13719,11 @@ aarch64_expand_vec_perm_const_1 (struct > /* The pattern matching functions above are written to look for a small > number to begin the sequence (0, 1, N/2). If we begin with an index > from the second operand, we can swap the operands. */ > - if (d->perm[0] >= d->nelt) > + unsigned int nelt = d->perm.length (); > + if (d->perm[0] >= nelt) > { > - unsigned i, nelt = d->nelt; > - > gcc_assert (nelt == (nelt & -nelt)); > - for (i = 0; i < nelt; ++i) > + for (unsigned int i = 0; i < nelt; ++i) > d->perm[i] ^= nelt; /* Keep the same index, but in the other vector. */ > > std::swap (d->op0, d->op1); > @@ -13764,15 +13762,16 @@ aarch64_expand_vec_perm_const (rtx targe > > d.vmode = GET_MODE (target); > gcc_assert (VECTOR_MODE_P (d.vmode)); > - d.nelt = nelt = GET_MODE_NUNITS (d.vmode); > d.testing_p = false; > > + nelt = GET_MODE_NUNITS (d.vmode); > + d.perm.reserve (nelt); > for (i = which = 0; i < nelt; ++i) > { > rtx e = XVECEXP (sel, 0, i); > int ei = INTVAL (e) & (2 * nelt - 1); > which |= (ei < nelt ? 1 : 2); > - d.perm[i] = ei; > + d.perm.quick_push (ei); > } > > switch (which) > @@ -13807,19 +13806,18 @@ aarch64_expand_vec_perm_const (rtx targe > } > > static bool > -aarch64_vectorize_vec_perm_const_ok (machine_mode vmode, > - const unsigned char *sel) > +aarch64_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel) > { > struct expand_vec_perm_d d; > unsigned int i, nelt, which; > bool ret; > > d.vmode = vmode; > - d.nelt = nelt = GET_MODE_NUNITS (d.vmode); > d.testing_p = true; > - memcpy (d.perm, sel, nelt); > + d.perm.safe_splice (sel); > > /* Calculate whether all elements are in one vector. */ > + nelt = sel.length (); > for (i = which = 0; i < nelt; ++i) > { > unsigned char e = d.perm[i]; > Index: gcc/config/arm/arm.c > =================================================================== > --- gcc/config/arm/arm.c 2017-09-22 17:22:08.191305805 +0100 > +++ gcc/config/arm/arm.c 2017-09-22 17:31:56.414735941 +0100 > @@ -287,8 +287,7 @@ static int arm_cortex_a5_branch_cost (bo > static int arm_cortex_m_branch_cost (bool, bool); > static int arm_cortex_m7_branch_cost (bool, bool); > > -static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode, > - const unsigned char *sel); > +static bool arm_vectorize_vec_perm_const_ok (machine_mode, vec_perm_indices); > > static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*); > > @@ -28657,9 +28656,8 @@ #define MAX_VECT_LEN 16 > struct expand_vec_perm_d > { > rtx target, op0, op1; > - unsigned char perm[MAX_VECT_LEN]; > + auto_vec_perm_indices perm; > machine_mode vmode; > - unsigned char nelt; > bool one_vector_p; > bool testing_p; > }; > @@ -28766,7 +28764,7 @@ neon_pair_endian_lane_map (machine_mode > static bool > arm_evpc_neon_vuzp (struct expand_vec_perm_d *d) > { > - unsigned int i, odd, mask, nelt = d->nelt; > + unsigned int i, odd, mask, nelt = d->perm.length (); > rtx out0, out1, in0, in1; > rtx (*gen)(rtx, rtx, rtx, rtx); > int first_elem; > @@ -28778,7 +28776,7 @@ arm_evpc_neon_vuzp (struct expand_vec_pe > /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the > big endian pattern on 64 bit vectors, so we correct for that. */ > swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p > - && GET_MODE_SIZE (d->vmode) == 8 ? d->nelt : 0; > + && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0; > > first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt; > > @@ -28837,7 +28835,7 @@ arm_evpc_neon_vuzp (struct expand_vec_pe > static bool > arm_evpc_neon_vzip (struct expand_vec_perm_d *d) > { > - unsigned int i, high, mask, nelt = d->nelt; > + unsigned int i, high, mask, nelt = d->perm.length (); > rtx out0, out1, in0, in1; > rtx (*gen)(rtx, rtx, rtx, rtx); > int first_elem; > @@ -28912,7 +28910,7 @@ arm_evpc_neon_vzip (struct expand_vec_pe > static bool > arm_evpc_neon_vrev (struct expand_vec_perm_d *d) > { > - unsigned int i, j, diff, nelt = d->nelt; > + unsigned int i, j, diff, nelt = d->perm.length (); > rtx (*gen)(rtx, rtx); > > if (!d->one_vector_p) > @@ -28988,7 +28986,7 @@ arm_evpc_neon_vrev (struct expand_vec_pe > static bool > arm_evpc_neon_vtrn (struct expand_vec_perm_d *d) > { > - unsigned int i, odd, mask, nelt = d->nelt; > + unsigned int i, odd, mask, nelt = d->perm.length (); > rtx out0, out1, in0, in1; > rtx (*gen)(rtx, rtx, rtx, rtx); > > @@ -29054,7 +29052,7 @@ arm_evpc_neon_vtrn (struct expand_vec_pe > static bool > arm_evpc_neon_vext (struct expand_vec_perm_d *d) > { > - unsigned int i, nelt = d->nelt; > + unsigned int i, nelt = d->perm.length (); > rtx (*gen) (rtx, rtx, rtx, rtx); > rtx offset; > > @@ -29128,7 +29126,7 @@ arm_evpc_neon_vtbl (struct expand_vec_pe > { > rtx rperm[MAX_VECT_LEN], sel; > machine_mode vmode = d->vmode; > - unsigned int i, nelt = d->nelt; > + unsigned int i, nelt = d->perm.length (); > > /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's > numbering of elements for big-endian, we must reverse the order. */ > @@ -29165,11 +29163,10 @@ arm_expand_vec_perm_const_1 (struct expa > /* The pattern matching functions above are written to look for a small > number to begin the sequence (0, 1, N/2). If we begin with an index > from the second operand, we can swap the operands. */ > - if (d->perm[0] >= d->nelt) > + unsigned int nelt = d->perm.length (); > + if (d->perm[0] >= nelt) > { > - unsigned i, nelt = d->nelt; > - > - for (i = 0; i < nelt; ++i) > + for (unsigned int i = 0; i < nelt; ++i) > d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1); > > std::swap (d->op0, d->op1); > @@ -29204,15 +29201,16 @@ arm_expand_vec_perm_const (rtx target, r > > d.vmode = GET_MODE (target); > gcc_assert (VECTOR_MODE_P (d.vmode)); > - d.nelt = nelt = GET_MODE_NUNITS (d.vmode); > d.testing_p = false; > > + nelt = GET_MODE_NUNITS (d.vmode); > + d.perm.reserve (nelt); > for (i = which = 0; i < nelt; ++i) > { > rtx e = XVECEXP (sel, 0, i); > int ei = INTVAL (e) & (2 * nelt - 1); > which |= (ei < nelt ? 1 : 2); > - d.perm[i] = ei; > + d.perm.quick_push (ei); > } > > switch (which) > @@ -29249,19 +29247,18 @@ arm_expand_vec_perm_const (rtx target, r > /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */ > > static bool > -arm_vectorize_vec_perm_const_ok (machine_mode vmode, > - const unsigned char *sel) > +arm_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel) > { > struct expand_vec_perm_d d; > unsigned int i, nelt, which; > bool ret; > > d.vmode = vmode; > - d.nelt = nelt = GET_MODE_NUNITS (d.vmode); > d.testing_p = true; > - memcpy (d.perm, sel, nelt); > + d.perm.safe_splice (sel); > > /* Categorize the set of elements in the selector. */ > + nelt = GET_MODE_NUNITS (d.vmode); > for (i = which = 0; i < nelt; ++i) > { > unsigned char e = d.perm[i]; > Index: gcc/config/i386/i386.c > =================================================================== > --- gcc/config/i386/i386.c 2017-09-22 17:22:08.149305815 +0100 > +++ gcc/config/i386/i386.c 2017-09-22 17:31:56.418527551 +0100 > @@ -50024,8 +50024,7 @@ ix86_expand_vec_perm_const (rtx operands > /* Implement targetm.vectorize.vec_perm_const_ok. */ > > static bool > -ix86_vectorize_vec_perm_const_ok (machine_mode vmode, > - const unsigned char *sel) > +ix86_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel) > { > struct expand_vec_perm_d d; > unsigned int i, nelt, which; > @@ -50096,11 +50095,11 @@ ix86_vectorize_vec_perm_const_ok (machin > > /* Extract the values from the vector CST into the permutation > array in D. */ > - memcpy (d.perm, sel, nelt); > for (i = which = 0; i < nelt; ++i) > { > - unsigned char e = d.perm[i]; > + unsigned char e = sel[i]; > gcc_assert (e < 2 * nelt); > + d.perm[i] = e; > which |= (e < nelt ? 1 : 2); > } > > Index: gcc/config/ia64/ia64.c > =================================================================== > --- gcc/config/ia64/ia64.c 2017-09-21 11:53:16.654742357 +0100 > +++ gcc/config/ia64/ia64.c 2017-09-22 17:31:56.419475454 +0100 > @@ -333,8 +333,7 @@ static machine_mode ia64_get_reg_raw_mod > static section * ia64_hpux_function_section (tree, enum node_frequency, > bool, bool); > > -static bool ia64_vectorize_vec_perm_const_ok (machine_mode vmode, > - const unsigned char *sel); > +static bool ia64_vectorize_vec_perm_const_ok (machine_mode, vec_perm_indices); > > static unsigned int ia64_hard_regno_nregs (unsigned int, machine_mode); > static bool ia64_hard_regno_mode_ok (unsigned int, machine_mode); > @@ -11824,8 +11823,7 @@ ia64_expand_vec_perm_const (rtx operands > /* Implement targetm.vectorize.vec_perm_const_ok. */ > > static bool > -ia64_vectorize_vec_perm_const_ok (machine_mode vmode, > - const unsigned char *sel) > +ia64_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel) > { > struct expand_vec_perm_d d; > unsigned int i, nelt, which; > @@ -11837,10 +11835,10 @@ ia64_vectorize_vec_perm_const_ok (machin > > /* Extract the values from the vector CST into the permutation > array in D. */ > - memcpy (d.perm, sel, nelt); > for (i = which = 0; i < nelt; ++i) > { > - unsigned char e = d.perm[i]; > + unsigned char e = sel[i]; > + d.perm[i] = e; > gcc_assert (e < 2 * nelt); > which |= (e < nelt ? 1 : 2); > } > Index: gcc/config/mips/mips.c > =================================================================== > --- gcc/config/mips/mips.c 2017-09-21 11:53:16.776320319 +0100 > +++ gcc/config/mips/mips.c 2017-09-22 17:31:56.421371259 +0100 > @@ -21470,8 +21470,7 @@ mips_sched_reassociation_width (unsigned > /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */ > > static bool > -mips_vectorize_vec_perm_const_ok (machine_mode vmode, > - const unsigned char *sel) > +mips_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel) > { > struct expand_vec_perm_d d; > unsigned int i, nelt, which; > @@ -21480,12 +21479,12 @@ mips_vectorize_vec_perm_const_ok (machin > d.vmode = vmode; > d.nelt = nelt = GET_MODE_NUNITS (d.vmode); > d.testing_p = true; > - memcpy (d.perm, sel, nelt); > > /* Categorize the set of elements in the selector. */ > for (i = which = 0; i < nelt; ++i) > { > - unsigned char e = d.perm[i]; > + unsigned char e = sel[i]; > + d.perm[i] = e; > gcc_assert (e < 2 * nelt); > which |= (e < nelt ? 1 : 2); > } > Index: gcc/config/powerpcspe/powerpcspe.c > =================================================================== > --- gcc/config/powerpcspe/powerpcspe.c 2017-09-21 11:53:16.643935427 +0100 > +++ gcc/config/powerpcspe/powerpcspe.c 2017-09-22 17:31:56.424214967 +0100 > @@ -38731,8 +38731,7 @@ rs6000_expand_vec_perm_const (rtx operan > /* Test whether a constant permutation is supported. */ > > static bool > -rs6000_vectorize_vec_perm_const_ok (machine_mode vmode, > - const unsigned char *sel) > +rs6000_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel) > { > /* AltiVec (and thus VSX) can handle arbitrary permutations. */ > if (TARGET_ALTIVEC) > Index: gcc/config/rs6000/rs6000.c > =================================================================== > --- gcc/config/rs6000/rs6000.c 2017-09-21 11:53:16.730390867 +0100 > +++ gcc/config/rs6000/rs6000.c 2017-09-22 17:31:56.427058675 +0100 > @@ -35594,8 +35594,7 @@ rs6000_expand_vec_perm_const (rtx operan > /* Test whether a constant permutation is supported. */ > > static bool > -rs6000_vectorize_vec_perm_const_ok (machine_mode vmode, > - const unsigned char *sel) > +rs6000_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel) > { > /* AltiVec (and thus VSX) can handle arbitrary permutations. */ > if (TARGET_ALTIVEC)
Index: gcc/target.def =================================================================== --- gcc/target.def 2017-09-22 17:31:36.935337179 +0100 +++ gcc/target.def 2017-09-22 17:31:56.428954480 +0100 @@ -1847,7 +1847,7 @@ DEFHOOK DEFHOOK (vec_perm_const_ok, "Return true if a vector created for @code{vec_perm_const} is valid.", - bool, (machine_mode, const unsigned char *sel), + bool, (machine_mode, vec_perm_indices), NULL) /* Return true if the target supports misaligned store/load of a Index: gcc/optabs-query.c =================================================================== --- gcc/optabs-query.c 2017-09-14 17:04:19.080694343 +0100 +++ gcc/optabs-query.c 2017-09-22 17:31:56.428006577 +0100 @@ -367,7 +367,7 @@ can_vec_perm_p (machine_mode mode, bool if (direct_optab_handler (vec_perm_const_optab, mode) != CODE_FOR_nothing && (sel == NULL || targetm.vectorize.vec_perm_const_ok == NULL - || targetm.vectorize.vec_perm_const_ok (mode, &(*sel)[0]))) + || targetm.vectorize.vec_perm_const_ok (mode, *sel))) return true; } Index: gcc/doc/tm.texi =================================================================== --- gcc/doc/tm.texi 2017-09-22 17:31:36.933441374 +0100 +++ gcc/doc/tm.texi 2017-09-22 17:31:56.428006577 +0100 @@ -5774,7 +5774,7 @@ correct for most targets. Return true if vector alignment is reachable (by peeling N iterations) for the given scalar type @var{type}. @var{is_packed} is false if the scalar access using @var{type} is known to be naturally aligned. @end deftypefn -@deftypefn {Target Hook} bool TARGET_VECTORIZE_VEC_PERM_CONST_OK (machine_mode, const unsigned char *@var{sel}) +@deftypefn {Target Hook} bool TARGET_VECTORIZE_VEC_PERM_CONST_OK (machine_mode, @var{vec_perm_indices}) Return true if a vector created for @code{vec_perm_const} is valid. @end deftypefn Index: gcc/config/aarch64/aarch64.c =================================================================== --- gcc/config/aarch64/aarch64.c 2017-09-21 11:53:16.681759682 +0100 +++ gcc/config/aarch64/aarch64.c 2017-09-22 17:31:56.412840135 +0100 @@ -141,8 +141,8 @@ static void aarch64_elf_asm_constructor static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED; static void aarch64_override_options_after_change (void); static bool aarch64_vector_mode_supported_p (machine_mode); -static bool aarch64_vectorize_vec_perm_const_ok (machine_mode vmode, - const unsigned char *sel); +static bool aarch64_vectorize_vec_perm_const_ok (machine_mode, + vec_perm_indices); static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool); static bool aarch64_builtin_support_vector_misalignment (machine_mode mode, const_tree type, @@ -13146,9 +13146,8 @@ #define MAX_VECT_LEN 16 struct expand_vec_perm_d { rtx target, op0, op1; - unsigned char perm[MAX_VECT_LEN]; + auto_vec_perm_indices perm; machine_mode vmode; - unsigned char nelt; bool one_vector_p; bool testing_p; }; @@ -13231,7 +13230,7 @@ aarch64_expand_vec_perm (rtx target, rtx static bool aarch64_evpc_trn (struct expand_vec_perm_d *d) { - unsigned int i, odd, mask, nelt = d->nelt; + unsigned int i, odd, mask, nelt = d->perm.length (); rtx out, in0, in1, x; rtx (*gen) (rtx, rtx, rtx); machine_mode vmode = d->vmode; @@ -13319,7 +13318,7 @@ aarch64_evpc_trn (struct expand_vec_perm static bool aarch64_evpc_uzp (struct expand_vec_perm_d *d) { - unsigned int i, odd, mask, nelt = d->nelt; + unsigned int i, odd, mask, nelt = d->perm.length (); rtx out, in0, in1, x; rtx (*gen) (rtx, rtx, rtx); machine_mode vmode = d->vmode; @@ -13406,7 +13405,7 @@ aarch64_evpc_uzp (struct expand_vec_perm static bool aarch64_evpc_zip (struct expand_vec_perm_d *d) { - unsigned int i, high, mask, nelt = d->nelt; + unsigned int i, high, mask, nelt = d->perm.length (); rtx out, in0, in1, x; rtx (*gen) (rtx, rtx, rtx); machine_mode vmode = d->vmode; @@ -13499,7 +13498,7 @@ aarch64_evpc_zip (struct expand_vec_perm static bool aarch64_evpc_ext (struct expand_vec_perm_d *d) { - unsigned int i, nelt = d->nelt; + unsigned int i, nelt = d->perm.length (); rtx (*gen) (rtx, rtx, rtx, rtx); rtx offset; @@ -13563,7 +13562,7 @@ aarch64_evpc_ext (struct expand_vec_perm static bool aarch64_evpc_rev (struct expand_vec_perm_d *d) { - unsigned int i, j, diff, nelt = d->nelt; + unsigned int i, j, diff, nelt = d->perm.length (); rtx (*gen) (rtx, rtx); if (!d->one_vector_p) @@ -13641,7 +13640,7 @@ aarch64_evpc_dup (struct expand_vec_perm rtx out = d->target; rtx in0; machine_mode vmode = d->vmode; - unsigned int i, elt, nelt = d->nelt; + unsigned int i, elt, nelt = d->perm.length (); rtx lane; elt = d->perm[0]; @@ -13686,7 +13685,7 @@ aarch64_evpc_tbl (struct expand_vec_perm { rtx rperm[MAX_VECT_LEN], sel; machine_mode vmode = d->vmode; - unsigned int i, nelt = d->nelt; + unsigned int i, nelt = d->perm.length (); if (d->testing_p) return true; @@ -13720,12 +13719,11 @@ aarch64_expand_vec_perm_const_1 (struct /* The pattern matching functions above are written to look for a small number to begin the sequence (0, 1, N/2). If we begin with an index from the second operand, we can swap the operands. */ - if (d->perm[0] >= d->nelt) + unsigned int nelt = d->perm.length (); + if (d->perm[0] >= nelt) { - unsigned i, nelt = d->nelt; - gcc_assert (nelt == (nelt & -nelt)); - for (i = 0; i < nelt; ++i) + for (unsigned int i = 0; i < nelt; ++i) d->perm[i] ^= nelt; /* Keep the same index, but in the other vector. */ std::swap (d->op0, d->op1); @@ -13764,15 +13762,16 @@ aarch64_expand_vec_perm_const (rtx targe d.vmode = GET_MODE (target); gcc_assert (VECTOR_MODE_P (d.vmode)); - d.nelt = nelt = GET_MODE_NUNITS (d.vmode); d.testing_p = false; + nelt = GET_MODE_NUNITS (d.vmode); + d.perm.reserve (nelt); for (i = which = 0; i < nelt; ++i) { rtx e = XVECEXP (sel, 0, i); int ei = INTVAL (e) & (2 * nelt - 1); which |= (ei < nelt ? 1 : 2); - d.perm[i] = ei; + d.perm.quick_push (ei); } switch (which) @@ -13807,19 +13806,18 @@ aarch64_expand_vec_perm_const (rtx targe } static bool -aarch64_vectorize_vec_perm_const_ok (machine_mode vmode, - const unsigned char *sel) +aarch64_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel) { struct expand_vec_perm_d d; unsigned int i, nelt, which; bool ret; d.vmode = vmode; - d.nelt = nelt = GET_MODE_NUNITS (d.vmode); d.testing_p = true; - memcpy (d.perm, sel, nelt); + d.perm.safe_splice (sel); /* Calculate whether all elements are in one vector. */ + nelt = sel.length (); for (i = which = 0; i < nelt; ++i) { unsigned char e = d.perm[i]; Index: gcc/config/arm/arm.c =================================================================== --- gcc/config/arm/arm.c 2017-09-22 17:22:08.191305805 +0100 +++ gcc/config/arm/arm.c 2017-09-22 17:31:56.414735941 +0100 @@ -287,8 +287,7 @@ static int arm_cortex_a5_branch_cost (bo static int arm_cortex_m_branch_cost (bool, bool); static int arm_cortex_m7_branch_cost (bool, bool); -static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode, - const unsigned char *sel); +static bool arm_vectorize_vec_perm_const_ok (machine_mode, vec_perm_indices); static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*); @@ -28657,9 +28656,8 @@ #define MAX_VECT_LEN 16 struct expand_vec_perm_d { rtx target, op0, op1; - unsigned char perm[MAX_VECT_LEN]; + auto_vec_perm_indices perm; machine_mode vmode; - unsigned char nelt; bool one_vector_p; bool testing_p; }; @@ -28766,7 +28764,7 @@ neon_pair_endian_lane_map (machine_mode static bool arm_evpc_neon_vuzp (struct expand_vec_perm_d *d) { - unsigned int i, odd, mask, nelt = d->nelt; + unsigned int i, odd, mask, nelt = d->perm.length (); rtx out0, out1, in0, in1; rtx (*gen)(rtx, rtx, rtx, rtx); int first_elem; @@ -28778,7 +28776,7 @@ arm_evpc_neon_vuzp (struct expand_vec_pe /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the big endian pattern on 64 bit vectors, so we correct for that. */ swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p - && GET_MODE_SIZE (d->vmode) == 8 ? d->nelt : 0; + && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0; first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt; @@ -28837,7 +28835,7 @@ arm_evpc_neon_vuzp (struct expand_vec_pe static bool arm_evpc_neon_vzip (struct expand_vec_perm_d *d) { - unsigned int i, high, mask, nelt = d->nelt; + unsigned int i, high, mask, nelt = d->perm.length (); rtx out0, out1, in0, in1; rtx (*gen)(rtx, rtx, rtx, rtx); int first_elem; @@ -28912,7 +28910,7 @@ arm_evpc_neon_vzip (struct expand_vec_pe static bool arm_evpc_neon_vrev (struct expand_vec_perm_d *d) { - unsigned int i, j, diff, nelt = d->nelt; + unsigned int i, j, diff, nelt = d->perm.length (); rtx (*gen)(rtx, rtx); if (!d->one_vector_p) @@ -28988,7 +28986,7 @@ arm_evpc_neon_vrev (struct expand_vec_pe static bool arm_evpc_neon_vtrn (struct expand_vec_perm_d *d) { - unsigned int i, odd, mask, nelt = d->nelt; + unsigned int i, odd, mask, nelt = d->perm.length (); rtx out0, out1, in0, in1; rtx (*gen)(rtx, rtx, rtx, rtx); @@ -29054,7 +29052,7 @@ arm_evpc_neon_vtrn (struct expand_vec_pe static bool arm_evpc_neon_vext (struct expand_vec_perm_d *d) { - unsigned int i, nelt = d->nelt; + unsigned int i, nelt = d->perm.length (); rtx (*gen) (rtx, rtx, rtx, rtx); rtx offset; @@ -29128,7 +29126,7 @@ arm_evpc_neon_vtbl (struct expand_vec_pe { rtx rperm[MAX_VECT_LEN], sel; machine_mode vmode = d->vmode; - unsigned int i, nelt = d->nelt; + unsigned int i, nelt = d->perm.length (); /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's numbering of elements for big-endian, we must reverse the order. */ @@ -29165,11 +29163,10 @@ arm_expand_vec_perm_const_1 (struct expa /* The pattern matching functions above are written to look for a small number to begin the sequence (0, 1, N/2). If we begin with an index from the second operand, we can swap the operands. */ - if (d->perm[0] >= d->nelt) + unsigned int nelt = d->perm.length (); + if (d->perm[0] >= nelt) { - unsigned i, nelt = d->nelt; - - for (i = 0; i < nelt; ++i) + for (unsigned int i = 0; i < nelt; ++i) d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1); std::swap (d->op0, d->op1); @@ -29204,15 +29201,16 @@ arm_expand_vec_perm_const (rtx target, r d.vmode = GET_MODE (target); gcc_assert (VECTOR_MODE_P (d.vmode)); - d.nelt = nelt = GET_MODE_NUNITS (d.vmode); d.testing_p = false; + nelt = GET_MODE_NUNITS (d.vmode); + d.perm.reserve (nelt); for (i = which = 0; i < nelt; ++i) { rtx e = XVECEXP (sel, 0, i); int ei = INTVAL (e) & (2 * nelt - 1); which |= (ei < nelt ? 1 : 2); - d.perm[i] = ei; + d.perm.quick_push (ei); } switch (which) @@ -29249,19 +29247,18 @@ arm_expand_vec_perm_const (rtx target, r /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */ static bool -arm_vectorize_vec_perm_const_ok (machine_mode vmode, - const unsigned char *sel) +arm_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel) { struct expand_vec_perm_d d; unsigned int i, nelt, which; bool ret; d.vmode = vmode; - d.nelt = nelt = GET_MODE_NUNITS (d.vmode); d.testing_p = true; - memcpy (d.perm, sel, nelt); + d.perm.safe_splice (sel); /* Categorize the set of elements in the selector. */ + nelt = GET_MODE_NUNITS (d.vmode); for (i = which = 0; i < nelt; ++i) { unsigned char e = d.perm[i]; Index: gcc/config/i386/i386.c =================================================================== --- gcc/config/i386/i386.c 2017-09-22 17:22:08.149305815 +0100 +++ gcc/config/i386/i386.c 2017-09-22 17:31:56.418527551 +0100 @@ -50024,8 +50024,7 @@ ix86_expand_vec_perm_const (rtx operands /* Implement targetm.vectorize.vec_perm_const_ok. */ static bool -ix86_vectorize_vec_perm_const_ok (machine_mode vmode, - const unsigned char *sel) +ix86_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel) { struct expand_vec_perm_d d; unsigned int i, nelt, which; @@ -50096,11 +50095,11 @@ ix86_vectorize_vec_perm_const_ok (machin /* Extract the values from the vector CST into the permutation array in D. */ - memcpy (d.perm, sel, nelt); for (i = which = 0; i < nelt; ++i) { - unsigned char e = d.perm[i]; + unsigned char e = sel[i]; gcc_assert (e < 2 * nelt); + d.perm[i] = e; which |= (e < nelt ? 1 : 2); } Index: gcc/config/ia64/ia64.c =================================================================== --- gcc/config/ia64/ia64.c 2017-09-21 11:53:16.654742357 +0100 +++ gcc/config/ia64/ia64.c 2017-09-22 17:31:56.419475454 +0100 @@ -333,8 +333,7 @@ static machine_mode ia64_get_reg_raw_mod static section * ia64_hpux_function_section (tree, enum node_frequency, bool, bool); -static bool ia64_vectorize_vec_perm_const_ok (machine_mode vmode, - const unsigned char *sel); +static bool ia64_vectorize_vec_perm_const_ok (machine_mode, vec_perm_indices); static unsigned int ia64_hard_regno_nregs (unsigned int, machine_mode); static bool ia64_hard_regno_mode_ok (unsigned int, machine_mode); @@ -11824,8 +11823,7 @@ ia64_expand_vec_perm_const (rtx operands /* Implement targetm.vectorize.vec_perm_const_ok. */ static bool -ia64_vectorize_vec_perm_const_ok (machine_mode vmode, - const unsigned char *sel) +ia64_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel) { struct expand_vec_perm_d d; unsigned int i, nelt, which; @@ -11837,10 +11835,10 @@ ia64_vectorize_vec_perm_const_ok (machin /* Extract the values from the vector CST into the permutation array in D. */ - memcpy (d.perm, sel, nelt); for (i = which = 0; i < nelt; ++i) { - unsigned char e = d.perm[i]; + unsigned char e = sel[i]; + d.perm[i] = e; gcc_assert (e < 2 * nelt); which |= (e < nelt ? 1 : 2); } Index: gcc/config/mips/mips.c =================================================================== --- gcc/config/mips/mips.c 2017-09-21 11:53:16.776320319 +0100 +++ gcc/config/mips/mips.c 2017-09-22 17:31:56.421371259 +0100 @@ -21470,8 +21470,7 @@ mips_sched_reassociation_width (unsigned /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */ static bool -mips_vectorize_vec_perm_const_ok (machine_mode vmode, - const unsigned char *sel) +mips_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel) { struct expand_vec_perm_d d; unsigned int i, nelt, which; @@ -21480,12 +21479,12 @@ mips_vectorize_vec_perm_const_ok (machin d.vmode = vmode; d.nelt = nelt = GET_MODE_NUNITS (d.vmode); d.testing_p = true; - memcpy (d.perm, sel, nelt); /* Categorize the set of elements in the selector. */ for (i = which = 0; i < nelt; ++i) { - unsigned char e = d.perm[i]; + unsigned char e = sel[i]; + d.perm[i] = e; gcc_assert (e < 2 * nelt); which |= (e < nelt ? 1 : 2); } Index: gcc/config/powerpcspe/powerpcspe.c =================================================================== --- gcc/config/powerpcspe/powerpcspe.c 2017-09-21 11:53:16.643935427 +0100 +++ gcc/config/powerpcspe/powerpcspe.c 2017-09-22 17:31:56.424214967 +0100 @@ -38731,8 +38731,7 @@ rs6000_expand_vec_perm_const (rtx operan /* Test whether a constant permutation is supported. */ static bool -rs6000_vectorize_vec_perm_const_ok (machine_mode vmode, - const unsigned char *sel) +rs6000_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel) { /* AltiVec (and thus VSX) can handle arbitrary permutations. */ if (TARGET_ALTIVEC) Index: gcc/config/rs6000/rs6000.c =================================================================== --- gcc/config/rs6000/rs6000.c 2017-09-21 11:53:16.730390867 +0100 +++ gcc/config/rs6000/rs6000.c 2017-09-22 17:31:56.427058675 +0100 @@ -35594,8 +35594,7 @@ rs6000_expand_vec_perm_const (rtx operan /* Test whether a constant permutation is supported. */ static bool -rs6000_vectorize_vec_perm_const_ok (machine_mode vmode, - const unsigned char *sel) +rs6000_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel) { /* AltiVec (and thus VSX) can handle arbitrary permutations. */ if (TARGET_ALTIVEC)