[ARM] Improve use of conditional execution in thumb mode.

Message ID	4F58D4D2.2030701@codesourcery.com
State	New
Headers	show Return-Path: <patch+caf_=linaro-patchwork=canonical.com@linaro.org> Received-SPF: neutral (google.com: 192.94.38.131 is neither permitted nor denied by best guess record for domain of Andrew_Stubbs@mentor.com) client-ip=192.94.38.131; Message-ID: <4F58D4D2.2030701@codesourcery.com> Date: Thu, 8 Mar 2012 15:48:34 +0000 From: Andrew Stubbs <ams@codesourcery.com> User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:10.0.2) Gecko/20120216 Thunderbird/10.0.2 MIME-Version: 1.0 CC: Richard Earnshaw <rearnsha@arm.com>, "gcc-patches@gcc.gnu.org" <gcc-patches@gcc.gnu.org>, "patches@linaro.org" <patches@linaro.org> Subject: Re: [PATCH][ARM] Improve use of conditional execution in thumb mode. References: <4F3A91A0.3040209@codesourcery.com> <4F3A9A44.2080105@arm.com> <4F3A9B68.10806@arm.com> <4F3AA159.7040707@codesourcery.com> <4F3E72A1.9080600@codesourcery.com> In-Reply-To: <4F3E72A1.9080600@codesourcery.com> Content-Type: multipart/mixed; boundary="------------010308060409060600050401"

2012-03-08 Andrew Stubbs <ams@codesourcery.com> gcc/ * config/arm/arm.c (thumb2_reorg): Add complete support for 16-bit instructions. * config/arm/thumb2.md: Delete obsolete flag-clobbering peepholes. gcc/testsuite/ * gcc.target/arm/thumb-16bit-ops.c: New file. * gcc.target/arm/thumb-ifcvt.c: New file. --- gcc/config/arm/arm.c | 157 ++++++++++++++++--- gcc/config/arm/thumb2.md | 107 ------------- gcc/testsuite/gcc.target/arm/thumb-16bit-ops.c | 196 ++++++++++++++++++++++++ gcc/testsuite/gcc.target/arm/thumb-ifcvt.c | 19 ++ 4 files changed, 344 insertions(+), 135 deletions(-) create mode 100644 gcc/testsuite/gcc.target/arm/thumb-16bit-ops.c create mode 100644 gcc/testsuite/gcc.target/arm/thumb-ifcvt.c diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 0bded8d..44f99c1 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -13246,47 +13246,148 @@ thumb2_reorg (void) FOR_BB_INSNS_REVERSE (bb, insn) { if (NONJUMP_INSN_P (insn) - && !REGNO_REG_SET_P (&live, CC_REGNUM)) + && !REGNO_REG_SET_P (&live, CC_REGNUM) + && GET_CODE (PATTERN (insn)) == SET) { + enum {SKIP, CONV, SWAP_CONV} action = SKIP; rtx pat = PATTERN (insn); - if (GET_CODE (pat) == SET - && low_register_operand (XEXP (pat, 0), SImode) - && thumb_16bit_operator (XEXP (pat, 1), SImode) - && low_register_operand (XEXP (XEXP (pat, 1), 0), SImode) - && low_register_operand (XEXP (XEXP (pat, 1), 1), SImode)) + rtx dst = XEXP (pat, 0); + rtx src = XEXP (pat, 1); + rtx op0 = NULL_RTX, op1 = NULL_RTX; + + if (!OBJECT_P (src)) + op0 = XEXP (src, 0); + + if (BINARY_P (src)) + op1 = XEXP (src, 1); + + if (low_register_operand (dst, SImode)) { - rtx dst = XEXP (pat, 0); - rtx src = XEXP (pat, 1); - rtx op0 = XEXP (src, 0); - rtx op1 = (GET_RTX_CLASS (GET_CODE (src)) == RTX_COMM_ARITH - ? XEXP (src, 1) : NULL); - - if (rtx_equal_p (dst, op0) - || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS) + switch (GET_CODE (src)) { - rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM); - rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg); - rtvec vec = gen_rtvec (2, pat, clobber); + case PLUS: + if (low_register_operand (op0, SImode)) + { + /* ADDS <Rd>,<Rn>,<Rm> */ + if (low_register_operand (op1, SImode)) + action = CONV; + /* ADDS <Rdn>,#<imm8> */ + /* SUBS <Rdn>,#<imm8> */ + else if (rtx_equal_p (dst, op0) + && CONST_INT_P (op1) + && IN_RANGE (INTVAL (op1), -255, 255)) + action = CONV; + /* ADDS <Rd>,<Rn>,#<imm3> */ + /* SUBS <Rd>,<Rn>,#<imm3> */ + else if (CONST_INT_P (op1) + && IN_RANGE (INTVAL (op1), -7, 7)) + action = CONV; + } + break; + + case MINUS: + /* RSBS <Rd>,<Rn>,#0 + Not handled here: see NEG below. */ + /* SUBS <Rd>,<Rn>,#<imm3> + SUBS <Rdn>,#<imm8> + Not handled here: see PLUS above. */ + /* SUBS <Rd>,<Rn>,<Rm> */ + if (low_register_operand (op0, SImode) + && low_register_operand (op1, SImode)) + action = CONV; + break; + + case MULT: + /* MULS <Rdm>,<Rn>,<Rdm> + As an exception to the rule, this is only used + when optimizing for size since MULS is slow on all + known implementations. We do not even want to use + MULS in cold code, if optimizing for speed, so we + test the global flag here. */ + if (!optimize_size) + break; + /* else fall through. */ + case AND: + case IOR: + case XOR: + /* ANDS <Rdn>,<Rm> */ + if (rtx_equal_p (dst, op0) + && low_register_operand (op1, SImode)) + action = CONV; + else if (rtx_equal_p (dst, op1) + && low_register_operand (op0, SImode)) + action = SWAP_CONV; + break; + + case ASHIFTRT: + case ASHIFT: + case LSHIFTRT: + /* ASRS <Rdn>,<Rm> */ + /* LSRS <Rdn>,<Rm> */ + /* LSLS <Rdn>,<Rm> */ + if (rtx_equal_p (dst, op0) + && low_register_operand (op1, SImode)) + action = CONV; + /* ASRS <Rd>,<Rm>,#<imm5> */ + /* LSRS <Rd>,<Rm>,#<imm5> */ + /* LSLS <Rd>,<Rm>,#<imm5> */ + else if (low_register_operand (op0, SImode) + && CONST_INT_P (op1) + && IN_RANGE (INTVAL (op1), 0, 31)) + action = CONV; + break; + + case ROTATERT: + /* RORS <Rdn>,<Rm> */ + if (rtx_equal_p (dst, op0) + && low_register_operand (op1, SImode)) + action = CONV; + break; - PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec); - INSN_CODE (insn) = -1; + case NOT: + case NEG: + /* MVNS <Rd>,<Rm> */ + /* NEGS <Rd>,<Rm> (a.k.a RSBS) */ + if (low_register_operand (op0, SImode)) + action = CONV; + break; + + case CONST_INT: + /* MOVS <Rd>,#<imm8> */ + if (CONST_INT_P (src) + && IN_RANGE (INTVAL (src), 0, 255)) + action = CONV; + break; + + case REG: + /* MOVS and MOV<c> with registers have different + encodings, so are not relevant here. */ + break; + + default: + break; } - /* We can also handle a commutative operation where the - second operand matches the destination. */ - else if (op1 && rtx_equal_p (dst, op1)) - { - rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM); - rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg); - rtvec vec; + } + if (action != SKIP) + { + rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM); + rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg); + rtvec vec; + + if (action == SWAP_CONV) + { src = copy_rtx (src); XEXP (src, 0) = op1; XEXP (src, 1) = op0; pat = gen_rtx_SET (VOIDmode, dst, src); vec = gen_rtvec (2, pat, clobber); - PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec); - INSN_CODE (insn) = -1; } + else /* action == CONV */ + vec = gen_rtvec (2, pat, clobber); + + PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec); + INSN_CODE (insn) = -1; } } diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md index 05585da..799a3df 100644 --- a/gcc/config/arm/thumb2.md +++ b/gcc/config/arm/thumb2.md @@ -677,26 +677,6 @@ (set_attr "length" "2")] ) -;; Similarly for 16-bit shift instructions -;; There is no 16-bit rotate by immediate instruction. -(define_peephole2 - [(set (match_operand:SI 0 "low_register_operand" "") - (match_operator:SI 3 "shift_operator" - [(match_operand:SI 1 "low_register_operand" "") - (match_operand:SI 2 "low_reg_or_int_operand" "")]))] - "TARGET_THUMB2 - && peep2_regno_dead_p(0, CC_REGNUM) - && ((GET_CODE(operands[3]) != ROTATE && GET_CODE(operands[3]) != ROTATERT) - || REG_P(operands[2]))" - [(parallel - [(set (match_dup 0) - (match_op_dup 3 - [(match_dup 1) - (match_dup 2)])) - (clobber (reg:CC CC_REGNUM))])] - "" -) - (define_insn "*thumb2_shiftsi3_short" [(set (match_operand:SI 0 "low_register_operand" "=l") (match_operator:SI 3 "shift_operator" @@ -715,20 +695,6 @@ (const_string "alu_shift_reg")))] ) -;; 16-bit load immediate -(define_peephole2 - [(set (match_operand:QHSI 0 "low_register_operand" "") - (match_operand:QHSI 1 "const_int_operand" ""))] - "TARGET_THUMB2 - && peep2_regno_dead_p(0, CC_REGNUM) - && (unsigned HOST_WIDE_INT) INTVAL(operands[1]) < 256" - [(parallel - [(set (match_dup 0) - (match_dup 1)) - (clobber (reg:CC CC_REGNUM))])] - "" -) - (define_insn "*thumb2_mov<mode>_shortim" [(set (match_operand:QHSI 0 "low_register_operand" "=l") (match_operand:QHSI 1 "const_int_operand" "I")) @@ -739,24 +705,6 @@ (set_attr "length" "2")] ) -;; 16-bit add/sub immediate -(define_peephole2 - [(set (match_operand:SI 0 "low_register_operand" "") - (plus:SI (match_operand:SI 1 "low_register_operand" "") - (match_operand:SI 2 "const_int_operand" "")))] - "TARGET_THUMB2 - && peep2_regno_dead_p(0, CC_REGNUM) - && ((rtx_equal_p(operands[0], operands[1]) - && INTVAL(operands[2]) > -256 && INTVAL(operands[2]) < 256) - || (INTVAL(operands[2]) > -8 && INTVAL(operands[2]) < 8))" - [(parallel - [(set (match_dup 0) - (plus:SI (match_dup 1) - (match_dup 2))) - (clobber (reg:CC CC_REGNUM))])] - "" -) - (define_insn "*thumb2_addsi_short" [(set (match_operand:SI 0 "low_register_operand" "=l,l") (plus:SI (match_operand:SI 1 "low_register_operand" "l,0") @@ -868,35 +816,6 @@ (set_attr "length" "2,4")] ) -;; 16-bit encodings of "muls" and "mul<c>". We only use these when -;; optimizing for size since "muls" is slow on all known -;; implementations and since "mul<c>" will be generated by -;; "*arm_mulsi3_v6" anyhow. The assembler will use a 16-bit encoding -;; for "mul<c>" whenever possible anyhow. -(define_peephole2 - [(set (match_operand:SI 0 "low_register_operand" "") - (mult:SI (match_operand:SI 1 "low_register_operand" "") - (match_dup 0)))] - "TARGET_THUMB2 && optimize_size && peep2_regno_dead_p (0, CC_REGNUM)" - [(parallel - [(set (match_dup 0) - (mult:SI (match_dup 0) (match_dup 1))) - (clobber (reg:CC CC_REGNUM))])] - "" -) - -(define_peephole2 - [(set (match_operand:SI 0 "low_register_operand" "") - (mult:SI (match_dup 0) - (match_operand:SI 1 "low_register_operand" "")))] - "TARGET_THUMB2 && optimize_size && peep2_regno_dead_p (0, CC_REGNUM)" - [(parallel - [(set (match_dup 0) - (mult:SI (match_dup 0) (match_dup 1))) - (clobber (reg:CC CC_REGNUM))])] - "" -) - (define_insn "*thumb2_mulsi_short" [(set (match_operand:SI 0 "low_register_operand" "=l") (mult:SI (match_operand:SI 1 "low_register_operand" "%0") @@ -979,19 +898,6 @@ (const_int 8)))] ) -;; 16-bit complement -(define_peephole2 - [(set (match_operand:SI 0 "low_register_operand" "") - (not:SI (match_operand:SI 1 "low_register_operand" "")))] - "TARGET_THUMB2 - && peep2_regno_dead_p(0, CC_REGNUM)" - [(parallel - [(set (match_dup 0) - (not:SI (match_dup 1))) - (clobber (reg:CC CC_REGNUM))])] - "" -) - (define_insn "*thumb2_one_cmplsi2_short" [(set (match_operand:SI 0 "low_register_operand" "=l") (not:SI (match_operand:SI 1 "low_register_operand" "l"))) @@ -1002,19 +908,6 @@ (set_attr "length" "2")] ) -;; 16-bit negate -(define_peephole2 - [(set (match_operand:SI 0 "low_register_operand" "") - (neg:SI (match_operand:SI 1 "low_register_operand" "")))] - "TARGET_THUMB2 - && peep2_regno_dead_p(0, CC_REGNUM)" - [(parallel - [(set (match_dup 0) - (neg:SI (match_dup 1))) - (clobber (reg:CC CC_REGNUM))])] - "" -) - (define_insn "*thumb2_negsi2_short" [(set (match_operand:SI 0 "low_register_operand" "=l") (neg:SI (match_operand:SI 1 "low_register_operand" "l"))) diff --git a/gcc/testsuite/gcc.target/arm/thumb-16bit-ops.c b/gcc/testsuite/gcc.target/arm/thumb-16bit-ops.c new file mode 100644 index 0000000..2b71238 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/thumb-16bit-ops.c @@ -0,0 +1,196 @@ +/* Check that the compiler properly uses 16-bit encodings where available. */ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_thumb2_ok } */ +/* { dg-options "-Os -fno-builtin" } */ + +int +f (int a, int b ) +{ + return a + b; +} + +/* { dg-final { scan-assembler "adds r0, r0, r1" } } */ + +int +g1 (int a) +{ + return a + 255; +} + +/* { dg-final { scan-assembler "adds r0, r0, #255" } } */ + +int +g2 (int a) +{ + return a + 256; +} + +/* { dg-final { scan-assembler "add r0, r0, #256" } } */ + +int +g3 (int a) +{ + return a - 255; +} + +/* { dg-final { scan-assembler "subs r0, r0, #255" } } */ + +int +g4 (int a) +{ + return a - 256; +} + +/* { dg-final { scan-assembler "sub r0, r0, #256" } } */ + +int +h1 (int a, int b) +{ + return b + 7; +} + +/* { dg-final { scan-assembler "adds r0, r1, #7" } } */ + +int +h2 (int a, int b) +{ + return b + 8; +} + +/* { dg-final { scan-assembler "add r0, r1, #8" } } */ + +int +h3 (int a, int b) +{ + return b - 7; +} + +/* { dg-final { scan-assembler "subs r0, r1, #7" } } */ + +int +h4 (int a, int b) +{ + return b - 8; +} + +/* { dg-final { scan-assembler "sub r0, r1, #8" } } */ + +int +i (int a, int b) +{ + return b; +} + +/* { dg-final { scan-assembler "mov r0, r1" } } */ + +int +j1 () +{ + return 255; +} + +/* { dg-final { scan-assembler "movs r0, #255" } } */ + +int +j2 () +{ + return 256; +} + +/* { dg-final { scan-assembler "mov r0, #256" } } */ + +int +k (int a, int b) +{ + return b << 15; +} + +/* { dg-final { scan-assembler "lsls r0, r1, #15" } } */ + +int +l1 (int a, int b) +{ + return a << b; +} + +/* { dg-final { scan-assembler "lsls r0, r0, r1" } } */ + +int +l2 (int a, int b, int c) +{ + return b << c; +} + +/* { dg-final { scan-assembler "lsl r0, r1, r2" } } */ + +int +m (int a, int b) +{ + return b >> 15; +} + +/* { dg-final { scan-assembler "asrs r0, r1, #15" } } */ + +int +n1 (int a, int b) +{ + return a >> b; +} + +/* { dg-final { scan-assembler "asrs r0, r0, r1" } } */ + +int +n2 (int a, int b, int c) +{ + return b >> c; +} + +/* { dg-final { scan-assembler "asr r0, r1, r2" } } */ + +unsigned int +o (unsigned int a, unsigned int b) +{ + return b >> 15; +} + +/* { dg-final { scan-assembler "lsrs r0, r1, #15" } } */ + +unsigned int +p1 (unsigned int a, unsigned int b) +{ + return a >> b; +} + +/* { dg-final { scan-assembler "lsrs r0, r0, r1" } } */ + +unsigned int +p2 (unsigned int a, unsigned int b, unsigned int c) +{ + return b >> c; +} + +/* { dg-final { scan-assembler "lsr r0, r1, r2" } } */ + +int +q (int a, int b) +{ + return b * a; +} + +/* { dg-final { scan-assembler "muls r0, r1, r0" } } */ + +int +r (int a, int b) +{ + return ~b; +} + +/* { dg-final { scan-assembler "mvns r0, r1" } } */ + +int +s (int a, int b) +{ + return -b; +} + +/* { dg-final { scan-assembler "negs r0, r1" } } */ diff --git a/gcc/testsuite/gcc.target/arm/thumb-ifcvt.c b/gcc/testsuite/gcc.target/arm/thumb-ifcvt.c new file mode 100644 index 0000000..b03bbce --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/thumb-ifcvt.c @@ -0,0 +1,19 @@ +/* Check that Thumb 16-bit shifts can be if-converted. */ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_thumb2_ok } */ +/* { dg-options "-O2" } */ + +int +foo (int a, int b) +{ + if (a != b) + { + a = a << b; + a = a >> 1; + } + + return a + b; +} + +/* { dg-final { scan-assembler "lslne" } } */ +/* { dg-final { scan-assembler "asrne" } } */

[ARM] Improve use of conditional execution in thumb mode.

Commit Message

Comments

Patch