Message ID | 87zi6rlclc.fsf@linaro.org |
---|---|
State | New |
Headers | show |
Series | Make VEC_PERM_EXPR work for variable-length vectors | expand |
On Sun, Dec 10, 2017 at 12:13 AM, Richard Sandiford <richard.sandiford@linaro.org> wrote: > This patch splits the variable handling out of expand_vec_perm into > a subroutine, so that the next patch can use a different interface > for expanding constant permutes. expand_vec_perm now does all the > CONST_VECTOR handling directly and defers to expand_vec_perm_var > for other rtx codes. Handling CONST_VECTORs includes handling the > fallback to variable permutes. > > The patch also adds an assert for valid optab modes to expand_vec_perm_1, > so that we get it when using optabs for CONST_VECTORs. The MODE_VECTOR_INT > part was previously in expand_vec_perm and the mode_for_int_vector part > is new. > > Most of the patch is just reindentation, so I've attached a -b version. Ok. > > 2017-12-06 Richard Sandiford <richard.sandiford@linaro.org> > > gcc/ > * optabs.c (expand_vec_perm_1): Assert that SEL has an integer > vector mode and that that mode matches the mode of the data > being permuted. > (expand_vec_perm): Split handling of non-CONST_VECTOR selectors > out into expand_vec_perm_var. Do all CONST_VECTOR handling here, > directly using expand_vec_perm_1 when forcing selectors into > registers. > (expand_vec_perm_var): New function, split out from expand_vec_perm. > > Index: gcc/optabs.c > =================================================================== > --- gcc/optabs.c 2017-12-09 22:47:14.731310077 +0000 > +++ gcc/optabs.c 2017-12-09 22:47:23.878315657 +0000 > @@ -5405,6 +5405,8 @@ expand_vec_perm_1 (enum insn_code icode, > machine_mode smode = GET_MODE (sel); > struct expand_operand ops[4]; > > + gcc_assert (GET_MODE_CLASS (smode) == MODE_VECTOR_INT > + || mode_for_int_vector (tmode).require () == smode); > create_output_operand (&ops[0], target, tmode); > create_input_operand (&ops[3], sel, smode); > > @@ -5431,8 +5433,13 @@ expand_vec_perm_1 (enum insn_code icode, > return NULL_RTX; > } > > -/* Generate instructions for vec_perm optab given its mode > - and three operands. */ > +static rtx expand_vec_perm_var (machine_mode, rtx, rtx, rtx, rtx); > + > +/* Implement a permutation of vectors v0 and v1 using the permutation > + vector in SEL and return the result. Use TARGET to hold the result > + if nonnull and convenient. > + > + MODE is the mode of the vectors being permuted (V0 and V1). */ > > rtx > expand_vec_perm (machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target) > @@ -5443,6 +5450,9 @@ expand_vec_perm (machine_mode mode, rtx > rtx tmp, sel_qi = NULL; > rtvec vec; > > + if (GET_CODE (sel) != CONST_VECTOR) > + return expand_vec_perm_var (mode, v0, v1, sel, target); > + > if (!target || GET_MODE (target) != mode) > target = gen_reg_rtx (mode); > > @@ -5455,86 +5465,125 @@ expand_vec_perm (machine_mode mode, rtx > if (!qimode_for_vec_perm (mode).exists (&qimode)) > qimode = VOIDmode; > > - /* If the input is a constant, expand it specially. */ > - gcc_assert (GET_MODE_CLASS (GET_MODE (sel)) == MODE_VECTOR_INT); > - if (GET_CODE (sel) == CONST_VECTOR) > - { > - /* See if this can be handled with a vec_shr. We only do this if the > - second vector is all zeroes. */ > - enum insn_code shift_code = optab_handler (vec_shr_optab, mode); > - enum insn_code shift_code_qi = ((qimode != VOIDmode && qimode != mode) > - ? optab_handler (vec_shr_optab, qimode) > - : CODE_FOR_nothing); > - rtx shift_amt = NULL_RTX; > - if (v1 == CONST0_RTX (GET_MODE (v1)) > - && (shift_code != CODE_FOR_nothing > - || shift_code_qi != CODE_FOR_nothing)) > + /* See if this can be handled with a vec_shr. We only do this if the > + second vector is all zeroes. */ > + insn_code shift_code = optab_handler (vec_shr_optab, mode); > + insn_code shift_code_qi = ((qimode != VOIDmode && qimode != mode) > + ? optab_handler (vec_shr_optab, qimode) > + : CODE_FOR_nothing); > + > + if (v1 == CONST0_RTX (GET_MODE (v1)) > + && (shift_code != CODE_FOR_nothing > + || shift_code_qi != CODE_FOR_nothing)) > + { > + rtx shift_amt = shift_amt_for_vec_perm_mask (sel); > + if (shift_amt) > { > - shift_amt = shift_amt_for_vec_perm_mask (sel); > - if (shift_amt) > + struct expand_operand ops[3]; > + if (shift_code != CODE_FOR_nothing) > { > - struct expand_operand ops[3]; > - if (shift_code != CODE_FOR_nothing) > - { > - create_output_operand (&ops[0], target, mode); > - create_input_operand (&ops[1], v0, mode); > - create_convert_operand_from_type (&ops[2], shift_amt, > - sizetype); > - if (maybe_expand_insn (shift_code, 3, ops)) > - return ops[0].value; > - } > - if (shift_code_qi != CODE_FOR_nothing) > - { > - tmp = gen_reg_rtx (qimode); > - create_output_operand (&ops[0], tmp, qimode); > - create_input_operand (&ops[1], gen_lowpart (qimode, v0), > - qimode); > - create_convert_operand_from_type (&ops[2], shift_amt, > - sizetype); > - if (maybe_expand_insn (shift_code_qi, 3, ops)) > - return gen_lowpart (mode, ops[0].value); > - } > + create_output_operand (&ops[0], target, mode); > + create_input_operand (&ops[1], v0, mode); > + create_convert_operand_from_type (&ops[2], shift_amt, sizetype); > + if (maybe_expand_insn (shift_code, 3, ops)) > + return ops[0].value; > + } > + if (shift_code_qi != CODE_FOR_nothing) > + { > + rtx tmp = gen_reg_rtx (qimode); > + create_output_operand (&ops[0], tmp, qimode); > + create_input_operand (&ops[1], gen_lowpart (qimode, v0), qimode); > + create_convert_operand_from_type (&ops[2], shift_amt, sizetype); > + if (maybe_expand_insn (shift_code_qi, 3, ops)) > + return gen_lowpart (mode, ops[0].value); > } > } > + } > > - icode = direct_optab_handler (vec_perm_const_optab, mode); > - if (icode != CODE_FOR_nothing) > + icode = direct_optab_handler (vec_perm_const_optab, mode); > + if (icode != CODE_FOR_nothing) > + { > + tmp = expand_vec_perm_1 (icode, target, v0, v1, sel); > + if (tmp) > + return tmp; > + } > + > + /* Fall back to a constant byte-based permutation. */ > + if (qimode != VOIDmode) > + { > + vec = rtvec_alloc (w); > + for (i = 0; i < e; ++i) > { > - tmp = expand_vec_perm_1 (icode, target, v0, v1, sel); > - if (tmp) > - return tmp; > + unsigned int j, this_e; > + > + this_e = INTVAL (CONST_VECTOR_ELT (sel, i)); > + this_e &= 2 * e - 1; > + this_e *= u; > + > + for (j = 0; j < u; ++j) > + RTVEC_ELT (vec, i * u + j) = GEN_INT (this_e + j); > } > + sel_qi = gen_rtx_CONST_VECTOR (qimode, vec); > > - /* Fall back to a constant byte-based permutation. */ > - if (qimode != VOIDmode) > + icode = direct_optab_handler (vec_perm_const_optab, qimode); > + if (icode != CODE_FOR_nothing) > { > - vec = rtvec_alloc (w); > - for (i = 0; i < e; ++i) > - { > - unsigned int j, this_e; > + tmp = gen_reg_rtx (qimode); > + tmp = expand_vec_perm_1 (icode, tmp, gen_lowpart (qimode, v0), > + gen_lowpart (qimode, v1), sel_qi); > + if (tmp) > + return gen_lowpart (mode, tmp); > + } > + } > > - this_e = INTVAL (CONST_VECTOR_ELT (sel, i)); > - this_e &= 2 * e - 1; > - this_e *= u; > + /* Otherwise expand as a fully variable permuation. */ > > - for (j = 0; j < u; ++j) > - RTVEC_ELT (vec, i * u + j) = GEN_INT (this_e + j); > - } > - sel_qi = gen_rtx_CONST_VECTOR (qimode, vec); > + icode = direct_optab_handler (vec_perm_optab, mode); > + if (icode != CODE_FOR_nothing) > + { > + rtx tmp = expand_vec_perm_1 (icode, target, v0, v1, sel); > + if (tmp) > + return tmp; > + } > > - icode = direct_optab_handler (vec_perm_const_optab, qimode); > - if (icode != CODE_FOR_nothing) > - { > - tmp = mode != qimode ? gen_reg_rtx (qimode) : target; > - tmp = expand_vec_perm_1 (icode, tmp, gen_lowpart (qimode, v0), > - gen_lowpart (qimode, v1), sel_qi); > - if (tmp) > - return gen_lowpart (mode, tmp); > - } > + if (qimode != VOIDmode) > + { > + icode = direct_optab_handler (vec_perm_optab, qimode); > + if (icode != CODE_FOR_nothing) > + { > + rtx tmp = gen_reg_rtx (qimode); > + tmp = expand_vec_perm_1 (icode, tmp, gen_lowpart (qimode, v0), > + gen_lowpart (qimode, v1), sel_qi); > + if (tmp) > + return gen_lowpart (mode, tmp); > } > } > > - /* Otherwise expand as a fully variable permuation. */ > + return NULL_RTX; > +} > + > +/* Implement a permutation of vectors v0 and v1 using the permutation > + vector in SEL and return the result. Use TARGET to hold the result > + if nonnull and convenient. > + > + MODE is the mode of the vectors being permuted (V0 and V1). > + SEL must have the integer equivalent of MODE and is known to be > + unsuitable for permutes with a constant permutation vector. */ > + > +static rtx > +expand_vec_perm_var (machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target) > +{ > + enum insn_code icode; > + unsigned int i, w, u; > + rtx tmp, sel_qi; > + rtvec vec; > + > + w = GET_MODE_SIZE (mode); > + u = GET_MODE_UNIT_SIZE (mode); > + > + if (!target || GET_MODE (target) != mode) > + target = gen_reg_rtx (mode); > + > icode = direct_optab_handler (vec_perm_optab, mode); > if (icode != CODE_FOR_nothing) > { > @@ -5545,50 +5594,47 @@ expand_vec_perm (machine_mode mode, rtx > > /* As a special case to aid several targets, lower the element-based > permutation to a byte-based permutation and try again. */ > - if (qimode == VOIDmode) > + machine_mode qimode; > + if (!qimode_for_vec_perm (mode).exists (&qimode)) > return NULL_RTX; > icode = direct_optab_handler (vec_perm_optab, qimode); > if (icode == CODE_FOR_nothing) > return NULL_RTX; > > - if (sel_qi == NULL) > + /* Multiply each element by its byte size. */ > + machine_mode selmode = GET_MODE (sel); > + if (u == 2) > + sel = expand_simple_binop (selmode, PLUS, sel, sel, > + NULL, 0, OPTAB_DIRECT); > + else > + sel = expand_simple_binop (selmode, ASHIFT, sel, GEN_INT (exact_log2 (u)), > + NULL, 0, OPTAB_DIRECT); > + gcc_assert (sel != NULL); > + > + /* Broadcast the low byte each element into each of its bytes. */ > + vec = rtvec_alloc (w); > + for (i = 0; i < w; ++i) > { > - /* Multiply each element by its byte size. */ > - machine_mode selmode = GET_MODE (sel); > - if (u == 2) > - sel = expand_simple_binop (selmode, PLUS, sel, sel, > - NULL, 0, OPTAB_DIRECT); > - else > - sel = expand_simple_binop (selmode, ASHIFT, sel, > - GEN_INT (exact_log2 (u)), > - NULL, 0, OPTAB_DIRECT); > - gcc_assert (sel != NULL); > - > - /* Broadcast the low byte each element into each of its bytes. */ > - vec = rtvec_alloc (w); > - for (i = 0; i < w; ++i) > - { > - int this_e = i / u * u; > - if (BYTES_BIG_ENDIAN) > - this_e += u - 1; > - RTVEC_ELT (vec, i) = GEN_INT (this_e); > - } > - tmp = gen_rtx_CONST_VECTOR (qimode, vec); > - sel = gen_lowpart (qimode, sel); > - sel = expand_vec_perm (qimode, sel, sel, tmp, NULL); > - gcc_assert (sel != NULL); > - > - /* Add the byte offset to each byte element. */ > - /* Note that the definition of the indicies here is memory ordering, > - so there should be no difference between big and little endian. */ > - vec = rtvec_alloc (w); > - for (i = 0; i < w; ++i) > - RTVEC_ELT (vec, i) = GEN_INT (i % u); > - tmp = gen_rtx_CONST_VECTOR (qimode, vec); > - sel_qi = expand_simple_binop (qimode, PLUS, sel, tmp, > - sel, 0, OPTAB_DIRECT); > - gcc_assert (sel_qi != NULL); > + int this_e = i / u * u; > + if (BYTES_BIG_ENDIAN) > + this_e += u - 1; > + RTVEC_ELT (vec, i) = GEN_INT (this_e); > } > + tmp = gen_rtx_CONST_VECTOR (qimode, vec); > + sel = gen_lowpart (qimode, sel); > + sel = expand_vec_perm (qimode, sel, sel, tmp, NULL); > + gcc_assert (sel != NULL); > + > + /* Add the byte offset to each byte element. */ > + /* Note that the definition of the indicies here is memory ordering, > + so there should be no difference between big and little endian. */ > + vec = rtvec_alloc (w); > + for (i = 0; i < w; ++i) > + RTVEC_ELT (vec, i) = GEN_INT (i % u); > + tmp = gen_rtx_CONST_VECTOR (qimode, vec); > + sel_qi = expand_simple_binop (qimode, PLUS, sel, tmp, > + sel, 0, OPTAB_DIRECT); > + gcc_assert (sel_qi != NULL); > > tmp = mode != qimode ? gen_reg_rtx (qimode) : target; > tmp = expand_vec_perm_1 (icode, tmp, gen_lowpart (qimode, v0), >
Index: gcc/optabs.c =================================================================== --- gcc/optabs.c 2017-12-09 23:06:57.167722990 +0000 +++ gcc/optabs.c 2017-12-09 23:11:09.452859833 +0000 @@ -5405,6 +5405,8 @@ expand_vec_perm_1 (enum insn_code icode, machine_mode smode = GET_MODE (sel); struct expand_operand ops[4]; + gcc_assert (GET_MODE_CLASS (smode) == MODE_VECTOR_INT + || mode_for_int_vector (tmode).require () == smode); create_output_operand (&ops[0], target, tmode); create_input_operand (&ops[3], sel, smode); @@ -5431,8 +5433,13 @@ expand_vec_perm_1 (enum insn_code icode, return NULL_RTX; } -/* Generate instructions for vec_perm optab given its mode - and three operands. */ +static rtx expand_vec_perm_var (machine_mode, rtx, rtx, rtx, rtx); + +/* Implement a permutation of vectors v0 and v1 using the permutation + vector in SEL and return the result. Use TARGET to hold the result + if nonnull and convenient. + + MODE is the mode of the vectors being permuted (V0 and V1). */ rtx expand_vec_perm (machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target) @@ -5443,6 +5450,9 @@ expand_vec_perm (machine_mode mode, rtx rtx tmp, sel_qi = NULL; rtvec vec; + if (GET_CODE (sel) != CONST_VECTOR) + return expand_vec_perm_var (mode, v0, v1, sel, target); + if (!target || GET_MODE (target) != mode) target = gen_reg_rtx (mode); @@ -5455,22 +5465,18 @@ expand_vec_perm (machine_mode mode, rtx if (!qimode_for_vec_perm (mode).exists (&qimode)) qimode = VOIDmode; - /* If the input is a constant, expand it specially. */ - gcc_assert (GET_MODE_CLASS (GET_MODE (sel)) == MODE_VECTOR_INT); - if (GET_CODE (sel) == CONST_VECTOR) - { /* See if this can be handled with a vec_shr. We only do this if the second vector is all zeroes. */ - enum insn_code shift_code = optab_handler (vec_shr_optab, mode); - enum insn_code shift_code_qi = ((qimode != VOIDmode && qimode != mode) + insn_code shift_code = optab_handler (vec_shr_optab, mode); + insn_code shift_code_qi = ((qimode != VOIDmode && qimode != mode) ? optab_handler (vec_shr_optab, qimode) : CODE_FOR_nothing); - rtx shift_amt = NULL_RTX; + if (v1 == CONST0_RTX (GET_MODE (v1)) && (shift_code != CODE_FOR_nothing || shift_code_qi != CODE_FOR_nothing)) { - shift_amt = shift_amt_for_vec_perm_mask (sel); + rtx shift_amt = shift_amt_for_vec_perm_mask (sel); if (shift_amt) { struct expand_operand ops[3]; @@ -5478,19 +5484,16 @@ expand_vec_perm (machine_mode mode, rtx { create_output_operand (&ops[0], target, mode); create_input_operand (&ops[1], v0, mode); - create_convert_operand_from_type (&ops[2], shift_amt, - sizetype); + create_convert_operand_from_type (&ops[2], shift_amt, sizetype); if (maybe_expand_insn (shift_code, 3, ops)) return ops[0].value; } if (shift_code_qi != CODE_FOR_nothing) { - tmp = gen_reg_rtx (qimode); + rtx tmp = gen_reg_rtx (qimode); create_output_operand (&ops[0], tmp, qimode); - create_input_operand (&ops[1], gen_lowpart (qimode, v0), - qimode); - create_convert_operand_from_type (&ops[2], shift_amt, - sizetype); + create_input_operand (&ops[1], gen_lowpart (qimode, v0), qimode); + create_convert_operand_from_type (&ops[2], shift_amt, sizetype); if (maybe_expand_insn (shift_code_qi, 3, ops)) return gen_lowpart (mode, ops[0].value); } @@ -5525,16 +5528,62 @@ expand_vec_perm (machine_mode mode, rtx icode = direct_optab_handler (vec_perm_const_optab, qimode); if (icode != CODE_FOR_nothing) { - tmp = mode != qimode ? gen_reg_rtx (qimode) : target; + tmp = gen_reg_rtx (qimode); tmp = expand_vec_perm_1 (icode, tmp, gen_lowpart (qimode, v0), gen_lowpart (qimode, v1), sel_qi); if (tmp) return gen_lowpart (mode, tmp); } } - } /* Otherwise expand as a fully variable permuation. */ + + icode = direct_optab_handler (vec_perm_optab, mode); + if (icode != CODE_FOR_nothing) + { + rtx tmp = expand_vec_perm_1 (icode, target, v0, v1, sel); + if (tmp) + return tmp; + } + + if (qimode != VOIDmode) + { + icode = direct_optab_handler (vec_perm_optab, qimode); + if (icode != CODE_FOR_nothing) + { + rtx tmp = gen_reg_rtx (qimode); + tmp = expand_vec_perm_1 (icode, tmp, gen_lowpart (qimode, v0), + gen_lowpart (qimode, v1), sel_qi); + if (tmp) + return gen_lowpart (mode, tmp); + } + } + + return NULL_RTX; +} + +/* Implement a permutation of vectors v0 and v1 using the permutation + vector in SEL and return the result. Use TARGET to hold the result + if nonnull and convenient. + + MODE is the mode of the vectors being permuted (V0 and V1). + SEL must have the integer equivalent of MODE and is known to be + unsuitable for permutes with a constant permutation vector. */ + +static rtx +expand_vec_perm_var (machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target) +{ + enum insn_code icode; + unsigned int i, w, u; + rtx tmp, sel_qi; + rtvec vec; + + w = GET_MODE_SIZE (mode); + u = GET_MODE_UNIT_SIZE (mode); + + if (!target || GET_MODE (target) != mode) + target = gen_reg_rtx (mode); + icode = direct_optab_handler (vec_perm_optab, mode); if (icode != CODE_FOR_nothing) { @@ -5545,22 +5594,20 @@ expand_vec_perm (machine_mode mode, rtx /* As a special case to aid several targets, lower the element-based permutation to a byte-based permutation and try again. */ - if (qimode == VOIDmode) + machine_mode qimode; + if (!qimode_for_vec_perm (mode).exists (&qimode)) return NULL_RTX; icode = direct_optab_handler (vec_perm_optab, qimode); if (icode == CODE_FOR_nothing) return NULL_RTX; - if (sel_qi == NULL) - { /* Multiply each element by its byte size. */ machine_mode selmode = GET_MODE (sel); if (u == 2) sel = expand_simple_binop (selmode, PLUS, sel, sel, NULL, 0, OPTAB_DIRECT); else - sel = expand_simple_binop (selmode, ASHIFT, sel, - GEN_INT (exact_log2 (u)), + sel = expand_simple_binop (selmode, ASHIFT, sel, GEN_INT (exact_log2 (u)), NULL, 0, OPTAB_DIRECT); gcc_assert (sel != NULL); @@ -5588,7 +5635,6 @@ expand_vec_perm (machine_mode mode, rtx sel_qi = expand_simple_binop (qimode, PLUS, sel, tmp, sel, 0, OPTAB_DIRECT); gcc_assert (sel_qi != NULL); - } tmp = mode != qimode ? gen_reg_rtx (qimode) : target; tmp = expand_vec_perm_1 (icode, tmp, gen_lowpart (qimode, v0),