Message ID | 20241230023913.10260-1-guojie@loongson.cn |
---|---|
State | Accepted |
Commit | d55d40afd42a280c80729b538e3cce994f20961d |
Headers | show |
Series | LoongArch: Optimize for conditional move operations | expand |
On Mon, 2024-12-30 at 10:39 +0800, Guo Jie wrote: > + /* Make sure that imm is a positive integer power of > 2. */ Maybe we should also consider the case $imm = 2^k + 1$ as they can be implemented with sl[te] and bstrins.[wd]. But it can be done in another patch anyway. > + if (val > 0 && !(val & (val - 1))) > + can_be_optimized = true;
Thanks for your suggestion! Indeed, there are still some scenarios that can be optimized and improved next. 在 2024/12/30 下午12:06, Xi Ruoyao 写道: > On Mon, 2024-12-30 at 10:39 +0800, Guo Jie wrote: >> + /* Make sure that imm is a positive integer power of >> 2. */ > Maybe we should also consider the case $imm = 2^k + 1$ as they can be > implemented with sl[te] and bstrins.[wd]. But it can be done in another > patch anyway. > >> + if (val > 0 && !(val & (val - 1))) >> + can_be_optimized = true;
Pushed to r15-6493. 在 2024/12/30 上午10:39, Guo Jie 写道: > The optimization example is as follows. > > From: > if (condition) > dest += 1 << 16; > To: > dest += (condition ? 1 : 0) << 16; > > It does not use maskeqz and masknez, thus reducing the number of > instructions. > > gcc/ChangeLog: > > * config/loongarch/loongarch.cc > (loongarch_expand_conditional_move): Add some optimization > implementations based on noce_try_cmove_arith. > > gcc/testsuite/ChangeLog: > > * gcc.target/loongarch/conditional-move-opt-1.c: New test. > * gcc.target/loongarch/conditional-move-opt-2.c: New test. > > --- > gcc/config/loongarch/loongarch.cc | 103 +++++++++++++++++- > .../loongarch/conditional-move-opt-1.c | 58 ++++++++++ > .../loongarch/conditional-move-opt-2.c | 42 +++++++ > 3 files changed, 202 insertions(+), 1 deletion(-) > create mode 100644 gcc/testsuite/gcc.target/loongarch/conditional-move-opt-1.c > create mode 100644 gcc/testsuite/gcc.target/loongarch/conditional-move-opt-2.c > > diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc > index 2d4290bc2d1..32fd1697813 100644 > --- a/gcc/config/loongarch/loongarch.cc > +++ b/gcc/config/loongarch/loongarch.cc > @@ -5294,6 +5294,81 @@ loongarch_expand_conditional_move (rtx *operands) > loongarch_emit_float_compare (&code, &op0, &op1); > else > { > + /* Optimize to reduce the number of instructions for ternary operations. > + Mainly implemented based on noce_try_cmove_arith. > + For dest = (condition) ? value_if_true : value_if_false; > + the optimization requires: > + a. value_if_false = var; > + b. value_if_true = var OP C (a positive integer power of 2). > + > + Situations similar to the following: > + if (condition) > + dest += 1 << imm; > + to: > + dest += (condition ? 1 : 0) << imm; */ > + > + rtx_insn *insn; > + HOST_WIDE_INT val = 0; /* The value of rtx C. */ > + /* INSN with operands[2] as the output. */ > + rtx_insn *value_if_true_insn = NULL; > + /* INSN with operands[3] as the output. */ > + rtx_insn *value_if_false_insn = NULL; > + rtx value_if_true_insn_src = NULL_RTX; > + /* Common operand var in value_if_true and value_if_false. */ > + rtx comm_var = NULL_RTX; > + bool can_be_optimized = false; > + > + /* Search value_if_true_insn and value_if_false_insn. */ > + struct sequence_stack *seq = get_current_sequence ()->next; > + for (insn = seq->last; insn; insn = PREV_INSN (insn)) > + { > + if (single_set (insn)) > + { > + rtx set_dest = SET_DEST (single_set (insn)); > + if (rtx_equal_p (set_dest, operands[2])) > + value_if_true_insn = insn; > + else if (rtx_equal_p (set_dest, operands[3])) > + value_if_false_insn = insn; > + if (value_if_true_insn && value_if_false_insn) > + break; > + } > + } > + > + /* Check if the optimization conditions are met. */ > + if (value_if_true_insn > + && value_if_false_insn > + /* Make sure that value_if_false and var are the same. */ > + && BINARY_P (value_if_true_insn_src > + = SET_SRC (single_set (value_if_true_insn))) > + /* Make sure that both value_if_true and value_if_false > + has the same var. */ > + && rtx_equal_p (XEXP (value_if_true_insn_src, 0), > + SET_SRC (single_set (value_if_false_insn)))) > + { > + comm_var = SET_SRC (single_set (value_if_false_insn)); > + rtx src = XEXP (value_if_true_insn_src, 1); > + rtx imm = NULL_RTX; > + if (CONST_INT_P (src)) > + imm = src; > + else > + for (insn = seq->last; insn; insn = PREV_INSN (insn)) > + { > + rtx set = single_set (insn); > + if (set && rtx_equal_p (SET_DEST (set), src)) > + { > + imm = SET_SRC (set); > + break; > + } > + } > + if (imm && CONST_INT_P (imm)) > + { > + val = INTVAL (imm); > + /* Make sure that imm is a positive integer power of 2. */ > + if (val > 0 && !(val & (val - 1))) > + can_be_optimized = true; > + } > + } > + > if (GET_MODE_SIZE (GET_MODE (op0)) < UNITS_PER_WORD) > { > promote_op[0] = (REG_P (op0) && REG_P (operands[2]) && > @@ -5314,22 +5389,48 @@ loongarch_expand_conditional_move (rtx *operands) > op0_extend = op0; > op1_extend = force_reg (word_mode, op1); > > + rtx target = gen_reg_rtx (GET_MODE (op0)); > + > if (code == EQ || code == NE) > { > op0 = loongarch_zero_if_equal (op0, op1); > op1 = const0_rtx; > + /* For EQ, set target to 1 if op0 and op1 are the same, > + otherwise set to 0. > + For NE, set target to 0 if op0 and op1 are the same, > + otherwise set to 1. */ > + if (can_be_optimized) > + loongarch_emit_binary (code, target, op0, const0_rtx); > } > else > { > /* The comparison needs a separate scc instruction. Store the > result of the scc in *OP0 and compare it against zero. */ > bool invert = false; > - rtx target = gen_reg_rtx (GET_MODE (op0)); > loongarch_emit_int_order_test (code, &invert, target, op0, op1); > + if (can_be_optimized && invert) > + loongarch_emit_binary (EQ, target, target, const0_rtx); > code = invert ? EQ : NE; > op0 = target; > op1 = const0_rtx; > } > + > + if (can_be_optimized) > + { > + /* Perform (condition ? 1 : 0) << log2 (C). */ > + loongarch_emit_binary (ASHIFT, target, target, > + GEN_INT (exact_log2 (val))); > + /* Shift-related insn patterns only support SImode operands[2]. */ > + enum rtx_code opcode = GET_CODE (value_if_true_insn_src); > + if (opcode == ASHIFT || opcode == ASHIFTRT || opcode == LSHIFTRT > + || opcode == ROTATE || opcode == ROTATERT) > + target = gen_lowpart (SImode, target); > + /* Perform target = target OP ((condition ? 1 : 0) << log2 (C)). */ > + loongarch_emit_binary (opcode, operands[0], > + force_reg (GET_MODE (operands[3]), comm_var), > + target); > + return; > + } > } > > rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1); > diff --git a/gcc/testsuite/gcc.target/loongarch/conditional-move-opt-1.c b/gcc/testsuite/gcc.target/loongarch/conditional-move-opt-1.c > new file mode 100644 > index 00000000000..ed13471aa90 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/loongarch/conditional-move-opt-1.c > @@ -0,0 +1,58 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > +/* { dg-final { scan-assembler-not "maskeqz" } } */ > +/* { dg-final { scan-assembler-not "masknez" } } */ > + > +extern long lm, ln, lr; > + > +void > +test_ne () > +{ > + if (lm != ln) > + lr += (1 << 16); > + lr += lm; > +} > + > +void > +test_eq () > +{ > + if (lm == ln) > + lr = lm + (1 << 16); > + else > + lr = lm; > + lr += lm; > +} > + > +void > +test_lt () > +{ > + if (lm < ln) > + lr *= (1 << 16); > + lr += lm; > +} > + > +void > +test_le () > +{ > + if (lm <= ln) > + lr = lm * ((long)1 << 32); > + else > + lr = lm; > + lr += lm; > +} > + > +void > +test_nez () > +{ > + if (lm != 0) > + lr <<= (1 << 4); > + lr += lm; > +} > + > +void > +test_eqz () > +{ > + if (lm == 0) > + lr >>= (1 << 2); > + lr += lm; > +} > diff --git a/gcc/testsuite/gcc.target/loongarch/conditional-move-opt-2.c b/gcc/testsuite/gcc.target/loongarch/conditional-move-opt-2.c > new file mode 100644 > index 00000000000..ac72d4d933a > --- /dev/null > +++ b/gcc/testsuite/gcc.target/loongarch/conditional-move-opt-2.c > @@ -0,0 +1,42 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 --param max-rtl-if-conversion-insns=1" } */ > +/* { dg-final { scan-assembler-not "maskeqz" } } */ > +/* { dg-final { scan-assembler-not "masknez" } } */ > + > +/* The relevant optimization is currently only based on noce_try_cmove_arith, > + so it bypasses noce_convert_multiple_sets by > + --param max-rtl-if-conversion-insns=1 to execute noce_try_cmove_arith. */ > + > +extern long lm, ln, lr; > + > +void > +test_ge () > +{ > + if (lm >= ln) > + lr += ((long)1 << 32); > + lr += lm; > +} > + > +void > +test_ltz () > +{ > + if (lm < 0) > + lr |= (1 << 16); > + lr += lm; > +} > + > +void > +test_lez () > +{ > + if (lm <= 0) > + lr &= (1 << 16); > + lr += lm; > +} > + > +void > +test_gez () > +{ > + if (lm >= 0) > + lr ^= (1 << 16); > + lr += lm; > +}
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc index 2d4290bc2d1..32fd1697813 100644 --- a/gcc/config/loongarch/loongarch.cc +++ b/gcc/config/loongarch/loongarch.cc @@ -5294,6 +5294,81 @@ loongarch_expand_conditional_move (rtx *operands) loongarch_emit_float_compare (&code, &op0, &op1); else { + /* Optimize to reduce the number of instructions for ternary operations. + Mainly implemented based on noce_try_cmove_arith. + For dest = (condition) ? value_if_true : value_if_false; + the optimization requires: + a. value_if_false = var; + b. value_if_true = var OP C (a positive integer power of 2). + + Situations similar to the following: + if (condition) + dest += 1 << imm; + to: + dest += (condition ? 1 : 0) << imm; */ + + rtx_insn *insn; + HOST_WIDE_INT val = 0; /* The value of rtx C. */ + /* INSN with operands[2] as the output. */ + rtx_insn *value_if_true_insn = NULL; + /* INSN with operands[3] as the output. */ + rtx_insn *value_if_false_insn = NULL; + rtx value_if_true_insn_src = NULL_RTX; + /* Common operand var in value_if_true and value_if_false. */ + rtx comm_var = NULL_RTX; + bool can_be_optimized = false; + + /* Search value_if_true_insn and value_if_false_insn. */ + struct sequence_stack *seq = get_current_sequence ()->next; + for (insn = seq->last; insn; insn = PREV_INSN (insn)) + { + if (single_set (insn)) + { + rtx set_dest = SET_DEST (single_set (insn)); + if (rtx_equal_p (set_dest, operands[2])) + value_if_true_insn = insn; + else if (rtx_equal_p (set_dest, operands[3])) + value_if_false_insn = insn; + if (value_if_true_insn && value_if_false_insn) + break; + } + } + + /* Check if the optimization conditions are met. */ + if (value_if_true_insn + && value_if_false_insn + /* Make sure that value_if_false and var are the same. */ + && BINARY_P (value_if_true_insn_src + = SET_SRC (single_set (value_if_true_insn))) + /* Make sure that both value_if_true and value_if_false + has the same var. */ + && rtx_equal_p (XEXP (value_if_true_insn_src, 0), + SET_SRC (single_set (value_if_false_insn)))) + { + comm_var = SET_SRC (single_set (value_if_false_insn)); + rtx src = XEXP (value_if_true_insn_src, 1); + rtx imm = NULL_RTX; + if (CONST_INT_P (src)) + imm = src; + else + for (insn = seq->last; insn; insn = PREV_INSN (insn)) + { + rtx set = single_set (insn); + if (set && rtx_equal_p (SET_DEST (set), src)) + { + imm = SET_SRC (set); + break; + } + } + if (imm && CONST_INT_P (imm)) + { + val = INTVAL (imm); + /* Make sure that imm is a positive integer power of 2. */ + if (val > 0 && !(val & (val - 1))) + can_be_optimized = true; + } + } + if (GET_MODE_SIZE (GET_MODE (op0)) < UNITS_PER_WORD) { promote_op[0] = (REG_P (op0) && REG_P (operands[2]) && @@ -5314,22 +5389,48 @@ loongarch_expand_conditional_move (rtx *operands) op0_extend = op0; op1_extend = force_reg (word_mode, op1); + rtx target = gen_reg_rtx (GET_MODE (op0)); + if (code == EQ || code == NE) { op0 = loongarch_zero_if_equal (op0, op1); op1 = const0_rtx; + /* For EQ, set target to 1 if op0 and op1 are the same, + otherwise set to 0. + For NE, set target to 0 if op0 and op1 are the same, + otherwise set to 1. */ + if (can_be_optimized) + loongarch_emit_binary (code, target, op0, const0_rtx); } else { /* The comparison needs a separate scc instruction. Store the result of the scc in *OP0 and compare it against zero. */ bool invert = false; - rtx target = gen_reg_rtx (GET_MODE (op0)); loongarch_emit_int_order_test (code, &invert, target, op0, op1); + if (can_be_optimized && invert) + loongarch_emit_binary (EQ, target, target, const0_rtx); code = invert ? EQ : NE; op0 = target; op1 = const0_rtx; } + + if (can_be_optimized) + { + /* Perform (condition ? 1 : 0) << log2 (C). */ + loongarch_emit_binary (ASHIFT, target, target, + GEN_INT (exact_log2 (val))); + /* Shift-related insn patterns only support SImode operands[2]. */ + enum rtx_code opcode = GET_CODE (value_if_true_insn_src); + if (opcode == ASHIFT || opcode == ASHIFTRT || opcode == LSHIFTRT + || opcode == ROTATE || opcode == ROTATERT) + target = gen_lowpart (SImode, target); + /* Perform target = target OP ((condition ? 1 : 0) << log2 (C)). */ + loongarch_emit_binary (opcode, operands[0], + force_reg (GET_MODE (operands[3]), comm_var), + target); + return; + } } rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1); diff --git a/gcc/testsuite/gcc.target/loongarch/conditional-move-opt-1.c b/gcc/testsuite/gcc.target/loongarch/conditional-move-opt-1.c new file mode 100644 index 00000000000..ed13471aa90 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/conditional-move-opt-1.c @@ -0,0 +1,58 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-final { scan-assembler-not "maskeqz" } } */ +/* { dg-final { scan-assembler-not "masknez" } } */ + +extern long lm, ln, lr; + +void +test_ne () +{ + if (lm != ln) + lr += (1 << 16); + lr += lm; +} + +void +test_eq () +{ + if (lm == ln) + lr = lm + (1 << 16); + else + lr = lm; + lr += lm; +} + +void +test_lt () +{ + if (lm < ln) + lr *= (1 << 16); + lr += lm; +} + +void +test_le () +{ + if (lm <= ln) + lr = lm * ((long)1 << 32); + else + lr = lm; + lr += lm; +} + +void +test_nez () +{ + if (lm != 0) + lr <<= (1 << 4); + lr += lm; +} + +void +test_eqz () +{ + if (lm == 0) + lr >>= (1 << 2); + lr += lm; +} diff --git a/gcc/testsuite/gcc.target/loongarch/conditional-move-opt-2.c b/gcc/testsuite/gcc.target/loongarch/conditional-move-opt-2.c new file mode 100644 index 00000000000..ac72d4d933a --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/conditional-move-opt-2.c @@ -0,0 +1,42 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 --param max-rtl-if-conversion-insns=1" } */ +/* { dg-final { scan-assembler-not "maskeqz" } } */ +/* { dg-final { scan-assembler-not "masknez" } } */ + +/* The relevant optimization is currently only based on noce_try_cmove_arith, + so it bypasses noce_convert_multiple_sets by + --param max-rtl-if-conversion-insns=1 to execute noce_try_cmove_arith. */ + +extern long lm, ln, lr; + +void +test_ge () +{ + if (lm >= ln) + lr += ((long)1 << 32); + lr += lm; +} + +void +test_ltz () +{ + if (lm < 0) + lr |= (1 << 16); + lr += lm; +} + +void +test_lez () +{ + if (lm <= 0) + lr &= (1 << 16); + lr += lm; +} + +void +test_gez () +{ + if (lm >= 0) + lr ^= (1 << 16); + lr += lm; +}